diff --git a/0094-runtime-don-t-take-allglock-in-tracebackothers.patch b/0094-runtime-don-t-take-allglock-in-tracebackothers.patch new file mode 100644 index 0000000000000000000000000000000000000000..a63552fedb0b7d5f78203450feb7afaf14182297 --- /dev/null +++ b/0094-runtime-don-t-take-allglock-in-tracebackothers.patch @@ -0,0 +1,199 @@ +From 2113c53920d02e7805372b543d2b658394fcd636 Mon Sep 17 00:00:00 2001 +From: Michael Pratt +Date: Tue, 17 Nov 2020 11:55:53 -0500 +Subject: [PATCH 1/2] runtime: don't take allglock in tracebackothers + +tracebackothers is called from fatal throw/panic. + +A fatal throw may be taken with allglock held (notably in the allocator +when allglock is held), which would cause a deadlock in tracebackothers +when we try to take allglock again. Locking allglock here is also often +a lock order violation w.r.t. the locks held when throw was called. + +Avoid the deadlock and ordering issues by skipping locking altogether. +It is OK to miss concurrently created Gs (which are generally avoided by +freezetheworld(), and which were possible previously anyways if created +after the loop). + +Fatal throw/panic freezetheworld(), which should freeze other threads +that may be racing to modify allgs. However, freezetheworld() does _not_ +guarantee that it stops all other threads, so we can't simply drop the +lock. + +Fixes #42669 +Updates #43175 + +Change-Id: I657aec46ed35fd5d1b3f1ba25b500128ab26b088 +Reviewed-on: https://go-review.googlesource.com/c/go/+/270861 +Reviewed-by: Michael Knyszek +Trust: Michael Pratt + +Conflict: NA +Reference: https://go-review.googlesource.com/c/go/+/270861 +--- + src/runtime/mgcmark.go | 1 - + src/runtime/proc.go | 43 ++++++++++++++++++++++++++++++++++++---- + src/runtime/runtime2.go | 1 - + src/runtime/traceback.go | 27 +++++++++++++++---------- + 4 files changed, 56 insertions(+), 16 deletions(-) + +diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go +index b2fbcd4105..6ecc467475 100644 +--- a/src/runtime/mgcmark.go ++++ b/src/runtime/mgcmark.go +@@ -143,7 +143,6 @@ fail: + println("gp", gp, "goid", gp.goid, + "status", readgstatus(gp), + "gcscandone", gp.gcscandone) +- unlock(&allglock) // Avoid self-deadlock with traceback. + throw("scan missed a g") + } + +diff --git a/src/runtime/proc.go b/src/runtime/proc.go +index 7fa19d867b..db49fbf385 100644 +--- a/src/runtime/proc.go ++++ b/src/runtime/proc.go +@@ -460,8 +460,29 @@ func lockedOSThread() bool { + } + + var ( +- allgs []*g ++ // allgs contains all Gs ever created (including dead Gs), and thus ++ // never shrinks. ++ // ++ // Access via the slice is protected by allglock or stop-the-world. ++ // Readers that cannot take the lock may (carefully!) use the atomic ++ // variables below. + allglock mutex ++ allgs []*g ++ ++ // allglen and allgptr are atomic variables that contain len(allg) and ++ // &allg[0] respectively. Proper ordering depends on totally-ordered ++ // loads and stores. Writes are protected by allglock. ++ // ++ // allgptr is updated before allglen. Readers should read allglen ++ // before allgptr to ensure that allglen is always <= len(allgptr). New ++ // Gs appended during the race can be missed. For a consistent view of ++ // all Gs, allglock must be held. ++ // ++ // allgptr copies should always be stored as a concrete type or ++ // unsafe.Pointer, not uintptr, to ensure that GC can still reach it ++ // even if it points to a stale array. ++ allglen uintptr ++ allgptr **g + ) + + func allgadd(gp *g) { +@@ -471,10 +492,25 @@ func allgadd(gp *g) { + + lock(&allglock) + allgs = append(allgs, gp) +- allglen = uintptr(len(allgs)) ++ if &allgs[0] != allgptr { ++ atomicstorep(unsafe.Pointer(&allgptr), unsafe.Pointer(&allgs[0])) ++ } ++ atomic.Storeuintptr(&allglen, uintptr(len(allgs))) + unlock(&allglock) + } + ++// atomicAllG returns &allgs[0] and len(allgs) for use with atomicAllGIndex. ++func atomicAllG() (**g, uintptr) { ++ length := atomic.Loaduintptr(&allglen) ++ ptr := (**g)(atomic.Loadp(unsafe.Pointer(&allgptr))) ++ return ptr, length ++} ++ ++// atomicAllGIndex returns ptr[i] with the allgptr returned from atomicAllG. ++func atomicAllGIndex(ptr **g, i uintptr) *g { ++ return *(**g)(add(unsafe.Pointer(ptr), i*sys.PtrSize)) ++} ++ + const ( + // Number of goroutine ids to grab from sched.goidgen to local per-P cache at once. + // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number. +@@ -3913,7 +3949,7 @@ func badunlockosthread() { + } + + func gcount() int32 { +- n := int32(allglen) - sched.gFree.n - int32(atomic.Load(&sched.ngsys)) ++ n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.n - int32(atomic.Load(&sched.ngsys)) + for _, _p_ := range allp { + n -= _p_.gFree.n + } +@@ -4583,7 +4619,6 @@ func checkdead() { + case _Grunnable, + _Grunning, + _Gsyscall: +- unlock(&allglock) + print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n") + throw("checkdead: runnable g") + } +diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go +index 814364aa42..c82678e0ed 100644 +--- a/src/runtime/runtime2.go ++++ b/src/runtime/runtime2.go +@@ -1031,7 +1031,6 @@ func (w waitReason) String() string { + } + + var ( +- allglen uintptr + allm *m + allp []*p // len(allp) == gomaxprocs; may change at safe points, otherwise immutable + allpLock mutex // Protects P-less reads of allp and all writes +diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go +index 944c8473d2..bf4e02da0e 100644 +--- a/src/runtime/traceback.go ++++ b/src/runtime/traceback.go +@@ -918,17 +918,25 @@ func tracebackothers(me *g) { + level, _, _ := gotraceback() + + // Show the current goroutine first, if we haven't already. +- g := getg() +- gp := g.m.curg +- if gp != nil && gp != me { ++ curgp := getg().m.curg ++ if curgp != nil && curgp != me { + print("\n") +- goroutineheader(gp) +- traceback(^uintptr(0), ^uintptr(0), 0, gp) ++ goroutineheader(curgp) ++ traceback(^uintptr(0), ^uintptr(0), 0, curgp) + } + +- lock(&allglock) +- for _, gp := range allgs { +- if gp == me || gp == g.m.curg || readgstatus(gp) == _Gdead || isSystemGoroutine(gp, false) && level < 2 { ++ // We can't take allglock here because this may be during fatal ++ // throw/panic, where locking allglock could be out-of-order or a ++ // direct deadlock. ++ // ++ // Instead, use atomic access to allgs which requires no locking. We ++ // don't lock against concurrent creation of new Gs, but even with ++ // allglock we may miss Gs created after this loop. ++ ptr, length := atomicAllG() ++ for i := uintptr(0); i < length; i++ { ++ gp := atomicAllGIndex(ptr, i) ++ ++ if gp == me || gp == curgp || readgstatus(gp) == _Gdead || isSystemGoroutine(gp, false) && level < 2 { + continue + } + print("\n") +@@ -937,14 +945,13 @@ func tracebackothers(me *g) { + // called from a signal handler initiated during a + // systemstack call. The original G is still in the + // running state, and we want to print its stack. +- if gp.m != g.m && readgstatus(gp)&^_Gscan == _Grunning { ++ if gp.m != getg().m && readgstatus(gp)&^_Gscan == _Grunning { + print("\tgoroutine running on other thread; stack unavailable\n") + printcreatedby(gp) + } else { + traceback(^uintptr(0), ^uintptr(0), 0, gp) + } + } +- unlock(&allglock) + } + + // tracebackHexdump hexdumps part of stk around frame.sp and frame.fp +-- +2.33.0 + diff --git a/0095-runtime-encapsulate-access-to-allgs.patch b/0095-runtime-encapsulate-access-to-allgs.patch new file mode 100644 index 0000000000000000000000000000000000000000..11f3020dfac5d15b3ad6500713eccd6a1b69c4d2 --- /dev/null +++ b/0095-runtime-encapsulate-access-to-allgs.patch @@ -0,0 +1,339 @@ +From e1c2b3b3da33448e41a39aabd0cc8cbcea0e1450 Mon Sep 17 00:00:00 2001 +From: Michael Pratt +Date: Wed, 23 Dec 2020 15:05:37 -0500 +Subject: [PATCH 2/2] runtime: encapsulate access to allgs + +Correctly accessing allgs is a bit hairy. Some paths need to lock +allglock, some don't. Those that don't are safest using atomicAllG, but +usage is not consistent. + +Rather than doing this ad-hoc, move all access* through forEachG / +forEachGRace, the locking and atomic versions, respectively. This will +make it easier to ensure safe access. + +* markroot is the only exception, as it has a far-removed guarantee of +safe access via an atomic load of allglen far before actual use. + +Change-Id: Ie1c7a8243e155ae2b4bc3143577380c695680e89 +Reviewed-on: https://go-review.googlesource.com/c/go/+/279994 +Trust: Michael Pratt +Run-TryBot: Michael Pratt +TryBot-Result: Go Bot +Reviewed-by: Michael Knyszek + +Conflict: NA +Reference: https://go-review.googlesource.com/c/go/+/279994 +--- + src/runtime/heapdump.go | 5 ++--- + src/runtime/mgc.go | 8 +++----- + src/runtime/mgcmark.go | 32 +++++++++++++++++++------------- + src/runtime/mprof.go | 35 +++++++++++++++++++---------------- + src/runtime/proc.go | 40 +++++++++++++++++++++++++++++----------- + src/runtime/trace.go | 5 +++-- + src/runtime/traceback.go | 21 +++++++++------------ + 7 files changed, 84 insertions(+), 62 deletions(-) + +diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go +index cfd5c251b4..93add0ad5a 100644 +--- a/src/runtime/heapdump.go ++++ b/src/runtime/heapdump.go +@@ -393,8 +393,7 @@ func dumpgoroutine(gp *g) { + + func dumpgs() { + // goroutines & stacks +- for i := 0; uintptr(i) < allglen; i++ { +- gp := allgs[i] ++ forEachG(func(gp *g) { + status := readgstatus(gp) // The world is stopped so gp will not be in a scan state. + switch status { + default: +@@ -407,7 +406,7 @@ func dumpgs() { + _Gwaiting: + dumpgoroutine(gp) + } +- } ++ }) + } + + func finq_callback(fn *funcval, obj unsafe.Pointer, nret uintptr, fint *_type, ot *ptrtype) { +diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go +index b3499516f6..f7198822aa 100644 +--- a/src/runtime/mgc.go ++++ b/src/runtime/mgc.go +@@ -2207,14 +2207,12 @@ func gcSweep(mode gcMode) { + // + //go:systemstack + func gcResetMarkState() { +- // This may be called during a concurrent phase, so make sure ++ // This may be called during a concurrent phase, so lock to make sure + // allgs doesn't change. +- lock(&allglock) +- for _, gp := range allgs { ++ forEachG(func(gp *g) { + gp.gcscandone = false // set to true in gcphasework + gp.gcAssistBytes = 0 +- } +- unlock(&allglock) ++ }) + + // Clear page marks. This is just 1MB per 64GB of heap, so the + // time here is pretty trivial. +diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go +index 6ecc467475..233c4976eb 100644 +--- a/src/runtime/mgcmark.go ++++ b/src/runtime/mgcmark.go +@@ -127,23 +127,26 @@ func gcMarkRootCheck() { + throw("left over markroot jobs") + } + +- lock(&allglock) + // Check that stacks have been scanned. +- var gp *g +- for i := 0; i < work.nStackRoots; i++ { +- gp = allgs[i] ++ // ++ // We only check the first nStackRoots Gs that we should have scanned. ++ // Since we don't care about newer Gs (see comment in ++ // gcMarkRootPrepare), no locking is required. ++ i := 0 ++ forEachGRace(func(gp *g) { ++ if i >= work.nStackRoots { ++ return ++ } ++ + if !gp.gcscandone { +- goto fail ++ println("gp", gp, "goid", gp.goid, ++ "status", readgstatus(gp), ++ "gcscandone", gp.gcscandone) ++ throw("scan missed a g") + } +- } +- unlock(&allglock) +- return + +-fail: +- println("gp", gp, "goid", gp.goid, +- "status", readgstatus(gp), +- "gcscandone", gp.gcscandone) +- throw("scan missed a g") ++ i++ ++ }) + } + + // ptrmask for an allocation containing a single pointer. +@@ -200,6 +203,9 @@ func markroot(gcw *gcWork, i uint32) { + // the rest is scanning goroutine stacks + var gp *g + if baseStacks <= i && i < end { ++ // N.B. Atomic read of allglen in gcMarkRootPrepare ++ // acts as a barrier to ensure that allgs must be large ++ // enough to contain all relevant Gs. + gp = allgs[i-baseStacks] + } else { + throw("markroot: bad index") +diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go +index 128498d69b..c94b8f7cae 100644 +--- a/src/runtime/mprof.go ++++ b/src/runtime/mprof.go +@@ -731,12 +731,13 @@ func goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int + + stopTheWorld("profile") + ++ // World is stopped, no locking required. + n = 1 +- for _, gp1 := range allgs { ++ forEachGRace(func(gp1 *g) { + if isOK(gp1) { + n++ + } +- } ++ }) + + if n <= len(p) { + ok = true +@@ -757,21 +758,23 @@ func goroutineProfileWithLabels(p []StackRecord, labels []unsafe.Pointer) (n int + } + + // Save other goroutines. +- for _, gp1 := range allgs { +- if isOK(gp1) { +- if len(r) == 0 { +- // Should be impossible, but better to return a +- // truncated profile than to crash the entire process. +- break +- } +- saveg(^uintptr(0), ^uintptr(0), gp1, &r[0]) +- if labels != nil { +- lbl[0] = gp1.labels +- lbl = lbl[1:] +- } +- r = r[1:] ++ forEachGRace(func(gp1 *g) { ++ if !isOK(gp1) { ++ return + } +- } ++ ++ if len(r) == 0 { ++ // Should be impossible, but better to return a ++ // truncated profile than to crash the entire process. ++ return ++ } ++ saveg(^uintptr(0), ^uintptr(0), gp1, &r[0]) ++ if labels != nil { ++ lbl[0] = gp1.labels ++ lbl = lbl[1:] ++ } ++ r = r[1:] ++ }) + } + + startTheWorld() +diff --git a/src/runtime/proc.go b/src/runtime/proc.go +index db49fbf385..e1aafffc93 100644 +--- a/src/runtime/proc.go ++++ b/src/runtime/proc.go +@@ -511,6 +511,30 @@ func atomicAllGIndex(ptr **g, i uintptr) *g { + return *(**g)(add(unsafe.Pointer(ptr), i*sys.PtrSize)) + } + ++// forEachG calls fn on every G from allgs. ++// ++// forEachG takes a lock to exclude concurrent addition of new Gs. ++func forEachG(fn func(gp *g)) { ++ lock(&allglock) ++ for _, gp := range allgs { ++ fn(gp) ++ } ++ unlock(&allglock) ++} ++ ++// forEachGRace calls fn on every G from allgs. ++// ++// forEachGRace avoids locking, but does not exclude addition of new Gs during ++// execution, which may be missed. ++func forEachGRace(fn func(gp *g)) { ++ ptr, length := atomicAllG() ++ for i := uintptr(0); i < length; i++ { ++ gp := atomicAllGIndex(ptr, i) ++ fn(gp) ++ } ++ return ++} ++ + const ( + // Number of goroutine ids to grab from sched.goidgen to local per-P cache at once. + // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number. +@@ -4605,11 +4629,9 @@ func checkdead() { + } + + grunning := 0 +- lock(&allglock) +- for i := 0; i < len(allgs); i++ { +- gp := allgs[i] ++ forEachG(func(gp *g) { + if isSystemGoroutine(gp, false) { +- continue ++ return + } + s := readgstatus(gp) + switch s &^ _Gscan { +@@ -4622,8 +4644,7 @@ func checkdead() { + print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n") + throw("checkdead: runnable g") + } +- } +- unlock(&allglock) ++ }) + if grunning == 0 { // possible if main goroutine calls runtime·Goexit() + unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang + throw("no goroutines (main called runtime.Goexit) - deadlock!") +@@ -4993,9 +5014,7 @@ func schedtrace(detailed bool) { + print(" M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, ""+" locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=", id3, "\n") + } + +- lock(&allglock) +- for gi := 0; gi < len(allgs); gi++ { +- gp := allgs[gi] ++ forEachG(func(gp *g) { + mp := gp.m + lockedm := gp.lockedm.ptr() + id1 := int64(-1) +@@ -5007,8 +5026,7 @@ func schedtrace(detailed bool) { + id2 = lockedm.id + } + print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=", id1, " lockedm=", id2, "\n") +- } +- unlock(&allglock) ++ }) + unlock(&sched.lock) + } + +diff --git a/src/runtime/trace.go b/src/runtime/trace.go +index 169b650eb4..b8794d383a 100644 +--- a/src/runtime/trace.go ++++ b/src/runtime/trace.go +@@ -220,7 +220,8 @@ func StartTrace() error { + stackID := traceStackID(mp, stkBuf, 2) + releasem(mp) + +- for _, gp := range allgs { ++ // World is stopped, no need to lock. ++ forEachGRace(func(gp *g) { + status := readgstatus(gp) + if status != _Gdead { + gp.traceseq = 0 +@@ -240,7 +241,7 @@ func StartTrace() error { + } else { + gp.sysblocktraced = false + } +- } ++ }) + traceProcStart() + traceGoStart() + // Note: ticksStart needs to be set after we emit traceEvGoInSyscall events. +diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go +index bf4e02da0e..b57e61712c 100644 +--- a/src/runtime/traceback.go ++++ b/src/runtime/traceback.go +@@ -925,19 +925,16 @@ func tracebackothers(me *g) { + traceback(^uintptr(0), ^uintptr(0), 0, curgp) + } + +- // We can't take allglock here because this may be during fatal +- // throw/panic, where locking allglock could be out-of-order or a +- // direct deadlock. ++ // We can't call locking forEachG here because this may be during fatal ++ // throw/panic, where locking could be out-of-order or a direct ++ // deadlock. + // +- // Instead, use atomic access to allgs which requires no locking. We +- // don't lock against concurrent creation of new Gs, but even with +- // allglock we may miss Gs created after this loop. +- ptr, length := atomicAllG() +- for i := uintptr(0); i < length; i++ { +- gp := atomicAllGIndex(ptr, i) +- ++ // Instead, use forEachGRace, which requires no locking. We don't lock ++ // against concurrent creation of new Gs, but even with allglock we may ++ // miss Gs created after this loop. ++ forEachGRace(func(gp *g) { + if gp == me || gp == curgp || readgstatus(gp) == _Gdead || isSystemGoroutine(gp, false) && level < 2 { +- continue ++ return + } + print("\n") + goroutineheader(gp) +@@ -951,7 +948,7 @@ func tracebackothers(me *g) { + } else { + traceback(^uintptr(0), ^uintptr(0), 0, gp) + } +- } ++ }) + } + + // tracebackHexdump hexdumps part of stk around frame.sp and frame.fp +-- +2.33.0 + diff --git a/golang.spec b/golang.spec index 3a29b2603191686a621afef8af74f6b61e4f03b7..eb15ccd9c5166b07ba317cbb186895d3fedd4e8a 100644 --- a/golang.spec +++ b/golang.spec @@ -58,7 +58,7 @@ Name: golang Version: 1.15.7 -Release: 25 +Release: 26 Summary: The Go Programming Language License: BSD and Public Domain URL: https://golang.org/ @@ -235,6 +235,8 @@ Patch6090: 0090-release-branch.go1.19-html-template-disallow-actions.patch Patch6091: 0091-release-branch.go1.19-mime-multipart-avoid-excessive.patch Patch6092: 0092-release-branch.go1.19-net-textproto-mime-multipart-i.patch Patch6093: 0093-release-branch.go1.19-mime-multipart-limit-parsed-mi.patch +Patch6094: 0094-runtime-don-t-take-allglock-in-tracebackothers.patch +Patch6095: 0095-runtime-encapsulate-access-to-allgs.patch Patch9001: 0001-drop-hard-code-cert.patch Patch9002: 0002-fix-patch-cmd-go-internal-modfetch-do-not-sho.patch @@ -474,6 +476,12 @@ fi %files devel -f go-tests.list -f go-misc.list -f go-src.list %changelog +* Fri Apr 14 2023 hanchao - 1.15.7-26 +- Type:bugfix +- CVE: +- SUG:NA +- DESC:fix a deadlock issue when a signal is received. + * Thu Apr 13 2023 hanchao - 1.15.7-25 - Type:CVE - CVE:CVE-2023-24534,CVE-2023-24536,CVE-2023-24537,CVE-2023-24538