From 4a1d8da417382ac6e4c05891db427368d0f6e422 Mon Sep 17 00:00:00 2001 From: zhongjiawei Date: Fri, 8 Sep 2023 11:27:38 +0800 Subject: [PATCH] containerd:add patch for 1.6.22 Signed-off-by: zhongjiawei --- containerd.spec | 10 +- git-commit | 2 +- patch/0001-containerd-add-check-in-spec.patch | 24 - ...t-event-when-detect-containerd-resta.patch | 103 ++++ ...up-container-when-containerd-dockerd.patch | 475 ++++++++++++++++++ ...leanup-residual-runc-and-files-force.patch | 49 ++ ...Dump-log-to-file-when-docker-receive.patch | 57 +++ ...-shim-alive-when-containerd-is-resta.patch | 63 +++ patch/0006-containerd-Makefile-modify.patch | 101 ++++ ...reate-and-exec-timeout-to-avild-bloc.patch | 195 +++++++ ...load-task-in-creating-and-optimize-i.patch | 107 ++++ ...inerd-support-kill-D-state-container.patch | 68 +++ ...him-exit-when-bundle-dir-does-not-ex.patch | 39 ++ ...d-change-tmpfile-directory-when-exec.patch | 44 ++ ...12-containerd-stw-gc-sweep-for-arm64.patch | 53 ++ ...y-shim-initiative-exit-time-for-post.patch | 86 ++++ ...inerd-wrap-and-process-return-errors.patch | 36 ++ ...0015-containerd-add-timeout-for-shim.patch | 140 ++++++ ...-up-residual-container-after-shim-ab.patch | 88 ++++ ...LT-for-containerd-shim-timeout-requi.patch | 113 +++++ ...18-containerd-save-dumpstack-to-file.patch | 40 ++ ...inerd-add-timeout-for-delete-command.patch | 135 +++++ ...-if-bundle-exists-before-create-bund.patch | 66 +++ ...container-init-process-if-runc-start.patch | 104 ++++ ...ontainerd-shim-residual-when-kill-co.patch | 45 ++ ...-exec-event-missing-due-to-pid-reuse.patch | 156 ++++++ ...m-left-when-pause-contaienr-and-kill.patch | 35 ++ patch/0025-containerd-drop-opt-package.patch | 25 + ...rd-fix-race-access-for-mobySubcribed.patch | 62 +++ series.conf | 27 +- 30 files changed, 2520 insertions(+), 28 deletions(-) delete mode 100644 patch/0001-containerd-add-check-in-spec.patch create mode 100644 patch/0001-containerd-event-resend-exit-event-when-detect-containerd-resta.patch create mode 100644 patch/0002-containerd-cleanup-container-when-containerd-dockerd.patch create mode 100644 patch/0003-containerd-cleanup-residual-runc-and-files-force.patch create mode 100644 patch/0004-containerd-shim-Dump-log-to-file-when-docker-receive.patch create mode 100644 patch/0005-containerd-check-shim-alive-when-containerd-is-resta.patch create mode 100644 patch/0006-containerd-Makefile-modify.patch create mode 100644 patch/0007-containerd-set-create-and-exec-timeout-to-avild-bloc.patch create mode 100644 patch/0008-containerd-skip-load-task-in-creating-and-optimize-i.patch create mode 100644 patch/0009-containerd-support-kill-D-state-container.patch create mode 100644 patch/0010-containerd-add-shim-exit-when-bundle-dir-does-not-ex.patch create mode 100644 patch/0011-containerd-change-tmpfile-directory-when-exec.patch create mode 100644 patch/0012-containerd-stw-gc-sweep-for-arm64.patch create mode 100644 patch/0013-containerd-modify-shim-initiative-exit-time-for-post.patch create mode 100644 patch/0014-containerd-wrap-and-process-return-errors.patch create mode 100644 patch/0015-containerd-add-timeout-for-shim.patch create mode 100644 patch/0016-containerd-clean-up-residual-container-after-shim-ab.patch create mode 100644 patch/0017-containerd-add-LLT-for-containerd-shim-timeout-requi.patch create mode 100644 patch/0018-containerd-save-dumpstack-to-file.patch create mode 100644 patch/0019-containerd-add-timeout-for-delete-command.patch create mode 100644 patch/0020-containerd-check-if-bundle-exists-before-create-bund.patch create mode 100644 patch/0021-containerd-kill-container-init-process-if-runc-start.patch create mode 100644 patch/0022-containerd-fix-containerd-shim-residual-when-kill-co.patch create mode 100644 patch/0023-containerd-fix-exec-event-missing-due-to-pid-reuse.patch create mode 100644 patch/0024-containerd-fix-dm-left-when-pause-contaienr-and-kill.patch create mode 100644 patch/0025-containerd-drop-opt-package.patch create mode 100644 patch/0026-containerd-fix-race-access-for-mobySubcribed.patch diff --git a/containerd.spec b/containerd.spec index 78b1a4d..c09634e 100644 --- a/containerd.spec +++ b/containerd.spec @@ -2,7 +2,7 @@ %global debug_package %{nil} Version: 1.6.22 Name: containerd -Release: 1 +Release: 2 Summary: An industry-standard container runtime License: ASL 2.0 URL: https://containerd.io @@ -54,7 +54,7 @@ install -D -p -m 0644 %{S:7} %{buildroot}%{_sysconfdir}/containerd/config.toml %systemd_post containerd.service %preun -%systemd_prerun containerd.service +%systemd_preun containerd.service %postun %systemd_postun_with_restart containerd.service @@ -67,6 +67,12 @@ install -D -p -m 0644 %{S:7} %{buildroot}%{_sysconfdir}/containerd/config.toml %exclude %{_bindir}/containerd-stress %changelog +* Fri Sep 8 2023 zhongjiawei - 1.6.22-2 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:add patch for 1.6.22 + * Wed Aug 2 2023 vegbir - 1.6.22-1 - Type:bugfix - ID:NA diff --git a/git-commit b/git-commit index afea3a5..abe4d74 100644 --- a/git-commit +++ b/git-commit @@ -1 +1 @@ -e27e755bdb8bdd9b5f6499be09e544e228b1b2de +ecdc685acc9021b4af0f0996eccea19f8dc500fe diff --git a/patch/0001-containerd-add-check-in-spec.patch b/patch/0001-containerd-add-check-in-spec.patch deleted file mode 100644 index d601da5..0000000 --- a/patch/0001-containerd-add-check-in-spec.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 1ee9382e8af3ff3c6f46361366bad3e4f38e0ba9 Mon Sep 17 00:00:00 2001 -From: xulei -Date: Fri, 21 Apr 2023 14:49:54 +0800 -Subject: [PATCH] containerd: add check in spec - ---- - Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/Makefile b/Makefile -index 7441eea..31e96f5 100644 ---- a/Makefile -+++ b/Makefile -@@ -203,7 +203,7 @@ build: ## build the go packages - - test: ## run tests, except integration tests and tests that require root - @echo "$(WHALE) $@" -- @$(GOTEST) ${TESTFLAGS} ${PACKAGES} -+ @$(GOTEST) ${TESTFLAGS} ./gc - - root-test: ## run tests, except integration tests - @echo "$(WHALE) $@" --- -2.33.0 diff --git a/patch/0001-containerd-event-resend-exit-event-when-detect-containerd-resta.patch b/patch/0001-containerd-event-resend-exit-event-when-detect-containerd-resta.patch new file mode 100644 index 0000000..14faad2 --- /dev/null +++ b/patch/0001-containerd-event-resend-exit-event-when-detect-containerd-resta.patch @@ -0,0 +1,103 @@ +From 53122406aaf85c29cf70ca3b76a62580874ae9b5 Mon Sep 17 00:00:00 2001 +From: jingrui +Date: Sun, 10 Feb 2019 18:40:59 +0800 +Subject: [PATCH] event: resend exit event when detect containerd restarted + +reason: fix docker stop no effect. And add init pid to start event log +because DFX support start event with init pid + +Change-Id: I024b2f6a03d74fcbb5623c696212dcbfb624b285 +Signed-off-by: jingrui +--- + cmd/containerd-shim/main_unix.go | 38 +++++++++++++++++++++++++++++++- + runtime/v1/linux/task.go | 2 ++ + 2 files changed, 39 insertions(+), 1 deletion(-) + +diff --git a/cmd/containerd-shim/main_unix.go b/cmd/containerd-shim/main_unix.go +index 024611b..942f354 100644 +--- a/cmd/containerd-shim/main_unix.go ++++ b/cmd/containerd-shim/main_unix.go +@@ -25,11 +25,13 @@ import ( + "flag" + "fmt" + "io" ++ "io/ioutil" + "net" + "os" + "os/signal" + "runtime" + "runtime/debug" ++ "strconv" + "strings" + "sync" + "syscall" +@@ -284,7 +286,7 @@ type remoteEventsPublisher struct { + address string + } + +-func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error { ++func (l *remoteEventsPublisher) doPublish(ctx context.Context, topic string, event events.Event) error { + ns, _ := namespaces.Namespace(ctx) + encoded, err := typeurl.MarshalAny(event) + if err != nil { +@@ -316,3 +318,37 @@ func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event + } + return nil + } ++ ++func getContainerdPid() int { ++ pidFile := "/var/run/docker/containerd/containerd.pid" ++ data, err := ioutil.ReadFile(pidFile) ++ if err != nil { ++ return -1 ++ } ++ pid, err := strconv.Atoi(string(data)) ++ if err != nil { ++ return -1 ++ } ++ return pid ++} ++ ++func (l *remoteEventsPublisher) Publish(ctx context.Context, topic string, event events.Event) error { ++ old := getContainerdPid() ++ for i := 1; i <= 10; i++ { ++ err := l.doPublish(ctx, topic, event) ++ logrus.Infof("try publish event(%d) %s %v %v", i, topic, event, err) ++ if err == nil { ++ new := getContainerdPid() ++ if old == new { ++ return nil ++ } ++ logrus.Warnf("containerd pid %d changed to %d", old, new) ++ old = new ++ } ++ if i == 10 { ++ return err ++ } ++ time.Sleep(time.Duration(i) * time.Second) ++ } ++ return nil ++} +diff --git a/runtime/v1/linux/task.go b/runtime/v1/linux/task.go +index 3ac7839..5a8dab1 100644 +--- a/runtime/v1/linux/task.go ++++ b/runtime/v1/linux/task.go +@@ -38,6 +38,7 @@ import ( + "github.com/containerd/ttrpc" + "github.com/containerd/typeurl" + "github.com/gogo/protobuf/types" ++ "github.com/sirupsen/logrus" + ) + + // Task on a linux based system +@@ -148,6 +149,7 @@ func (t *Task) Start(ctx context.Context) error { + } + t.mu.Unlock() + } ++ logrus.Infof("publish event %s for container %s with pid %d", runtime.TaskStartEventTopic, t.id, t.pid) + t.events.Publish(ctx, runtime.TaskStartEventTopic, &eventstypes.TaskStart{ + ContainerID: t.id, + Pid: uint32(t.pid), +-- +2.33.0 + diff --git a/patch/0002-containerd-cleanup-container-when-containerd-dockerd.patch b/patch/0002-containerd-cleanup-container-when-containerd-dockerd.patch new file mode 100644 index 0000000..1da12b5 --- /dev/null +++ b/patch/0002-containerd-cleanup-container-when-containerd-dockerd.patch @@ -0,0 +1,475 @@ +From 49e88aa61dd8a99e17edf020faae2307b63858da Mon Sep 17 00:00:00 2001 +From: jingrui +Date: Sun, 10 Feb 2019 15:40:52 +0800 +Subject: [PATCH] containerd:cleanup container when containerd/dockerd is + killed + +when containerd killed during task create, see Runtime.Create(). the +defer function will not execute, so shim residual. cleanup shim for +container pid=-1 + +And kill dockerd during docker stop in post-stophook, containerd will load +task and treat as ok when shim response client. add init.exit to forbid +load exiting task. also exit event may lost, fix it + +Signed-off-by: jingrui +--- + events/events.go | 14 +++ + events/exchange/exchange.go | 12 +++ + events/exit.go | 108 ++++++++++++++++++++ + pkg/process/utils.go | 2 + + runtime/v1/linux/runtime.go | 63 ++++++++++-- + runtime/v1/linux/task.go | 27 ++++- + runtime/v1/shim/service.go | 4 + + vendor/github.com/docker/go-events/queue.go | 18 +++- + 8 files changed, 232 insertions(+), 16 deletions(-) + create mode 100644 events/exit.go + +diff --git a/events/events.go b/events/events.go +index b7eb86f..70ef315 100644 +--- a/events/events.go ++++ b/events/events.go +@@ -20,6 +20,7 @@ import ( + "context" + "time" + ++ apievents "github.com/containerd/containerd/api/events" + "github.com/containerd/typeurl" + "github.com/gogo/protobuf/types" + ) +@@ -32,6 +33,19 @@ type Envelope struct { + Event *types.Any + } + ++func (e *Envelope) ExitFile() string { ++ decoded, err := typeurl.UnmarshalAny(e.Event) ++ if err != nil { ++ return "" ++ } ++ ++ if e, ok := decoded.(*apievents.TaskExit); ok { ++ return ExitFile(e.ContainerID, e.Pid, e.ExitStatus) ++ } ++ ++ return "" ++} ++ + // Field returns the value for the given fieldpath as a string, if defined. + // If the value is not defined, the second value will be false. + func (e *Envelope) Field(fieldpath []string) (string, bool) { +diff --git a/events/exchange/exchange.go b/events/exchange/exchange.go +index a1f385d..162e7be 100644 +--- a/events/exchange/exchange.go ++++ b/events/exchange/exchange.go +@@ -49,6 +49,11 @@ func NewExchange() *Exchange { + var _ events.Publisher = &Exchange{} + var _ events.Forwarder = &Exchange{} + var _ events.Subscriber = &Exchange{} ++var mobySubcribed = false ++ ++func MobySubscribed() bool { ++ return mobySubcribed ++} + + // Forward accepts an envelope to be directly distributed on the exchange. + // +@@ -161,6 +166,13 @@ func (e *Exchange) Subscribe(ctx context.Context, fs ...string) (ch <-chan *even + } + + e.broadcaster.Add(dst) ++ logrus.Infof("subscribe ctx=%v fs=%v", ctx, fs) ++ for _, s := range fs { ++ if !MobySubscribed() && s == "namespace==moby,topic~=|^/tasks/|" { ++ queue.Namespace = "moby" ++ mobySubcribed = true ++ } ++ } + + go func() { + defer closeAll() +diff --git a/events/exit.go b/events/exit.go +new file mode 100644 +index 0000000..ee9d5a9 +--- /dev/null ++++ b/events/exit.go +@@ -0,0 +1,108 @@ ++/* ++Use of this source code is governed by Apache-2.0 ++license that can be found in the LICENSE file ++Description: common functions ++Author: jingrui ++Create: 2019-02-12 ++*/ ++ ++package events ++ ++import ( ++ "fmt" ++ "io/ioutil" ++ "os" ++ "path/filepath" ++ "strconv" ++ "strings" ++ ++ "github.com/sirupsen/logrus" ++) ++ ++const ExitDir = "/var/run/docker/containerd/exit" ++const ExitStatusDefault = 137 ++const InitExit = "init.exit" ++ ++func ExitFile(cid string, pid uint32, status uint32) string { ++ return fmt.Sprintf("%s.%d.%d", cid, pid, status) ++} ++ ++func ExitInfo(ef string) (string, uint32, uint32) { ++ s := strings.Split(ef, ".") ++ if len(s) != 3 { ++ return "", 0, 0 ++ } ++ ++ cid := s[0] ++ pid, err := strconv.ParseUint(s[1], 10, 32) ++ if err != nil { ++ return "", 0, 0 ++ } ++ status, err := strconv.ParseUint(s[2], 10, 32) ++ if err != nil { ++ return "", 0, 0 ++ } ++ ++ return cid, uint32(pid), uint32(status) ++} ++ ++func ExitAddFile(ns string, ef string, reason string) { ++ logrus.Devour(os.MkdirAll(filepath.Join(ExitDir, ns), 0700)) ++ err := ioutil.WriteFile(filepath.Join(ExitDir, ns, ef), []byte{}, 0600) ++ logrus.Infof("exit-add %s/%s [reason: %s] error=%v", ns, ef, reason, err) ++} ++ ++func ExitDelFile(ns string, ef string) { ++ err := os.RemoveAll(filepath.Join(ExitDir, ns, ef)) ++ logrus.Devour(err) ++ logrus.Infof("exit-del %s/%s error=%v", ns, ef, err) ++} ++ ++func ExitGetFile(ns string, cid string, pid uint32, status uint32) string { ++ ef := ExitFile(cid, pid, status) ++ if _, err := os.Stat(filepath.Join(ExitDir, ns, ef)); err == nil { ++ return ef ++ } ++ return "" ++} ++ ++func ExitGetFiles(ns string) []string { ++ files, err := ioutil.ReadDir(filepath.Join(ExitDir, ns)) ++ if err != nil { ++ return []string{} ++ } ++ ++ names := []string{} ++ for _, f := range files { ++ names = append(names, f.Name()) ++ } ++ ++ return names ++} ++ ++func ExitPending(ns string, cid string, pid uint32) bool { ++ for _, ef := range ExitGetFiles(ns) { ++ if strings.Contains(ef, fmt.Sprintf("%s.%d", cid, pid)) { ++ return true ++ } ++ } ++ return false ++} ++ ++func InitExitWrite(bundle string, pid int) { ++ if _, err := os.Stat(bundle); err != nil { ++ logrus.Infof("skip write init.exit %s error=%v", bundle, err) ++ return ++ } ++ err := ioutil.WriteFile(filepath.Join(bundle, InitExit), []byte(fmt.Sprintf("%d", pid)), 0600) ++ if err != nil { ++ logrus.Infof("failed write init.exit error=%s", bundle, err) ++ } ++} ++ ++func InitExitExist(bundle string) bool { ++ if _, err := os.Stat(filepath.Join(bundle, InitExit)); err == nil { ++ return true ++ } ++ return false ++} +diff --git a/pkg/process/utils.go b/pkg/process/utils.go +index afada02..5ff04ed 100644 +--- a/pkg/process/utils.go ++++ b/pkg/process/utils.go +@@ -41,6 +41,8 @@ const ( + RuncRoot = "/run/containerd/runc" + // InitPidFile name of the file that contains the init pid + InitPidFile = "init.pid" ++ ++ InitExit = "init.exit" + ) + + // safePid is a thread safe wrapper for pid. +diff --git a/runtime/v1/linux/runtime.go b/runtime/v1/linux/runtime.go +index b6d5382..a6efd81 100644 +--- a/runtime/v1/linux/runtime.go ++++ b/runtime/v1/linux/runtime.go +@@ -32,6 +32,7 @@ import ( + "github.com/containerd/containerd/api/types" + "github.com/containerd/containerd/containers" + "github.com/containerd/containerd/errdefs" ++ "github.com/containerd/containerd/events" + "github.com/containerd/containerd/events/exchange" + "github.com/containerd/containerd/identifiers" + "github.com/containerd/containerd/log" +@@ -138,6 +139,7 @@ func New(ic *plugin.InitContext) (interface{}, error) { + return nil, err + } + } ++ go r.resendExitEvents(ic.Context, "moby") + return r, nil + } + +@@ -184,7 +186,8 @@ func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts + } + defer func() { + if err != nil { +- bundle.Delete() ++ errd := bundle.Delete() ++ log.G(ctx).WithError(err).Errorf("revert: delete bundle error=%v", errd) + } + }() + +@@ -225,9 +228,8 @@ func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts + deferCtx, deferCancel := context.WithTimeout( + namespaces.WithNamespace(context.TODO(), namespace), cleanupTimeout) + defer deferCancel() +- if kerr := s.KillShim(deferCtx); kerr != nil { +- log.G(ctx).WithError(kerr).Error("failed to kill shim") +- } ++ kerr := s.KillShim(deferCtx) ++ log.G(ctx).WithError(err).Errorf("revert: kill shim error=%v", kerr) + } + }() + +@@ -338,6 +340,41 @@ func (r *Runtime) Delete(ctx context.Context, id string) (*runtime.Exit, error) + return exit, nil + } + ++func (r *Runtime) resendExitEvents(ctx context.Context, ns string) { ++ for { ++ time.Sleep(time.Second) ++ efs := events.ExitGetFiles(ns) ++ if len(efs) == 0 { ++ break ++ } ++ ++ if !exchange.MobySubscribed() { ++ logrus.Infof("waiting moby event stream ...") ++ continue ++ } ++ time.Sleep(time.Second) ++ ++ for _, ef := range efs { ++ cid, pid, status := events.ExitInfo(ef) ++ if cid == "" { ++ continue ++ } ++ ++ e := &eventstypes.TaskExit{ ++ ContainerID: cid, ++ ID: cid, ++ ExitStatus: status, ++ ExitedAt: time.Now().UTC(), ++ Pid: uint32(pid), ++ } ++ ++ ctx := namespaces.WithNamespace(context.Background(), ns) ++ err := r.events.Publish(ctx, runtime.TaskExitEventTopic, e) ++ logrus.Infof("resend exit event %v error=%v", e, err) ++ } ++ } ++} ++ + func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) { + dir, err := os.ReadDir(filepath.Join(r.state, ns)) + if err != nil { +@@ -349,6 +386,7 @@ func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) { + continue + } + id := path.Name() ++ log.G(ctx).Infof("load-task %s", id) + // skip hidden directories + if len(id) > 0 && id[0] == '.' { + continue +@@ -435,6 +473,20 @@ func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) { + log.G(ctx).WithError(err).Error("loading task type") + continue + } ++ if pid <= 0 { ++ _, err := t.DeleteForce(ctx, 0) ++ log.G(ctx).Warnf("delete force %s Pid=%d error=%v", id, pid, err) ++ continue ++ } ++ if _, err := os.Stat(filepath.Join(bundle.path, process.InitExit)); err == nil { ++ if !events.ExitPending(ns, t.id, uint32(pid)) { ++ events.ExitAddFile(ns, events.ExitFile(t.id, uint32(pid), uint32(events.ExitStatusDefault)), "cleanup dirty task") ++ } ++ _, err := t.DeleteForce(ctx, uint32(pid)) ++ log.G(ctx).Warnf("delete force %s Pid=%d(exiting) error=%v", id, pid, err) ++ continue ++ } ++ log.G(ctx).Infof("load-task %s Pid=%d done", id, pid) + o = append(o, t) + } + return o, nil +@@ -449,9 +501,6 @@ func (r *Runtime) cleanupAfterDeadShim(ctx context.Context, bundle *bundle, ns, + pid, _ := runc.ReadPidFile(filepath.Join(bundle.path, process.InitPidFile)) + ctx = namespaces.WithNamespace(ctx, ns) + if err := r.terminate(ctx, bundle, ns, id); err != nil { +- if r.config.ShimDebug { +- return fmt.Errorf("failed to terminate task, leaving bundle for debugging: %w", err) +- } + log.G(ctx).WithError(err).Warn("failed to terminate task") + } + +diff --git a/runtime/v1/linux/task.go b/runtime/v1/linux/task.go +index 5a8dab1..70908ae 100644 +--- a/runtime/v1/linux/task.go ++++ b/runtime/v1/linux/task.go +@@ -24,6 +24,7 @@ import ( + "errors" + "fmt" + "sync" ++ "time" + + "github.com/containerd/cgroups" + eventstypes "github.com/containerd/containerd/api/events" +@@ -39,6 +40,7 @@ import ( + "github.com/containerd/typeurl" + "github.com/gogo/protobuf/types" + "github.com/sirupsen/logrus" ++ "golang.org/x/sys/unix" + ) + + // Task on a linux based system +@@ -93,12 +95,12 @@ func (t *Task) PID(_ context.Context) (uint32, error) { + } + + // Delete the task and return the exit status +-func (t *Task) Delete(ctx context.Context) (*runtime.Exit, error) { ++func (t *Task) delete(ctx context.Context, force bool, pid uint32) (*runtime.Exit, error) { + rsp, shimErr := t.shim.Delete(ctx, empty) + if shimErr != nil { +- shimErr = errdefs.FromGRPC(shimErr) +- if !errdefs.IsNotFound(shimErr) { +- return nil, shimErr ++ log.G(ctx).WithError(shimErr).Error("failed to delete container, force=%t", force) ++ if !force { ++ return nil, errdefs.FromGRPC(shimErr) + } + } + t.tasks.Delete(ctx, t.id) +@@ -108,6 +110,14 @@ func (t *Task) Delete(ctx context.Context) (*runtime.Exit, error) { + if err := t.bundle.Delete(); err != nil { + log.G(ctx).WithError(err).Error("failed to delete bundle") + } ++ ++ if rsp == nil { ++ rsp = &shim.DeleteResponse{} ++ rsp.ExitStatus = 128 + uint32(unix.SIGKILL) ++ rsp.ExitedAt = time.Now().UTC() ++ rsp.Pid = pid ++ } ++ + if shimErr != nil { + return nil, shimErr + } +@@ -124,6 +134,15 @@ func (t *Task) Delete(ctx context.Context) (*runtime.Exit, error) { + }, nil + } + ++// Delete the task and return the exit status ++func (t *Task) Delete(ctx context.Context) (*runtime.Exit, error) { ++ return t.delete(ctx, false, 0) ++} ++ ++func (t *Task) DeleteForce(ctx context.Context, pid uint32) (*runtime.Exit, error) { ++ return t.delete(ctx, true, pid) ++} ++ + // Start the task + func (t *Task) Start(ctx context.Context) error { + t.mu.Lock() +diff --git a/runtime/v1/shim/service.go b/runtime/v1/shim/service.go +index a08757d..b00ed9c 100644 +--- a/runtime/v1/shim/service.go ++++ b/runtime/v1/shim/service.go +@@ -23,6 +23,7 @@ import ( + "context" + "encoding/json" + "fmt" ++ "io/ioutil" + "os" + "path/filepath" + "sync" +@@ -520,6 +521,9 @@ func (s *Service) checkProcesses(e runc.Exit) { + return + } + if ip, ok := p.(*process.Init); ok { ++ ns := filepath.Base(filepath.Dir(ip.Bundle)) ++ events.ExitAddFile(ns, events.ExitFile(s.id, uint32(e.Pid), uint32(e.Status)), "init exited") ++ ioutil.WriteFile(filepath.Join(ip.Bundle, process.InitExit), []byte(fmt.Sprintf("%d", e.Pid)), 0600) + // Ensure all children are killed + if shouldKillAllOnExit(s.context, s.bundle) { + if err := ip.KillAll(s.context); err != nil { +diff --git a/vendor/github.com/docker/go-events/queue.go b/vendor/github.com/docker/go-events/queue.go +index 4bb770a..5e83b40 100644 +--- a/vendor/github.com/docker/go-events/queue.go ++++ b/vendor/github.com/docker/go-events/queue.go +@@ -4,6 +4,7 @@ import ( + "container/list" + "sync" + ++ topevents "github.com/containerd/containerd/events" + "github.com/sirupsen/logrus" + ) + +@@ -11,11 +12,12 @@ import ( + // by a sink. It is unbounded and thread safe but the sink must be reliable or + // events will be dropped. + type Queue struct { +- dst Sink +- events *list.List +- cond *sync.Cond +- mu sync.Mutex +- closed bool ++ Namespace string ++ dst Sink ++ events *list.List ++ cond *sync.Cond ++ mu sync.Mutex ++ closed bool + } + + // NewQueue returns a queue to the provided Sink dst. +@@ -83,6 +85,12 @@ func (eq *Queue) run() { + "event": event, + "sink": eq.dst, + }).WithError(err).Debug("eventqueue: dropped event") ++ } else { ++ if e, ok := event.(*topevents.Envelope); ok { ++ if ef := e.ExitFile(); ef != "" { ++ topevents.ExitDelFile(eq.Namespace, ef) ++ } ++ } + } + } + } +-- +2.33.0 + diff --git a/patch/0003-containerd-cleanup-residual-runc-and-files-force.patch b/patch/0003-containerd-cleanup-residual-runc-and-files-force.patch new file mode 100644 index 0000000..adec89b --- /dev/null +++ b/patch/0003-containerd-cleanup-residual-runc-and-files-force.patch @@ -0,0 +1,49 @@ +From 05a237b82a23c5750d0b463f60504fea7a227493 Mon Sep 17 00:00:00 2001 +From: jingrui +Date: Mon, 11 Feb 2019 17:40:31 +0800 +Subject: [PATCH] containerd: cleanup residual runc and files force + +reason:kill -9 shim will generate residual runc files, cleanup runc files using +runc delete before create. And if container root path already exists +when call runtime.Create, we try to call runtime.Delete to cleanup it. +But in case runtime.Delete failed, root path will still exists +which causes Create failed with error "container with id exists". +So remove path directly if Delete failed. + +Signed-off-by: jingrui +Signed-off-by: xiadanni +--- + vendor/github.com/containerd/go-runc/runc.go | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/vendor/github.com/containerd/go-runc/runc.go b/vendor/github.com/containerd/go-runc/runc.go +index f5f03ae..0feedeb 100644 +--- a/vendor/github.com/containerd/go-runc/runc.go ++++ b/vendor/github.com/containerd/go-runc/runc.go +@@ -31,6 +31,8 @@ import ( + "strings" + "time" + ++ "github.com/sirupsen/logrus" ++ + specs "github.com/opencontainers/runtime-spec/specs-go" + ) + +@@ -126,6 +128,14 @@ func (o *CreateOpts) args() (out []string, err error) { + + // Create creates a new container and returns its pid if it was created successfully + func (r *Runc) Create(context context.Context, id, bundle string, opts *CreateOpts) error { ++ if _, err := os.Stat(filepath.Join(r.Root, id)); err == nil { ++ logrus.Warnf("cleanup residue runtime with bundle %s root=%s", bundle, r.Root) ++ if dErr := r.Delete(context, id, &DeleteOpts{Force: true}); dErr != nil { ++ logrus.Errorf("runtime force delete return err: %v, remove container root err: %v", ++ dErr, os.RemoveAll(filepath.Join(r.Root, id))) ++ } ++ } ++ + args := []string{"create", "--bundle", bundle} + if opts != nil { + oargs, err := opts.args() +-- +2.33.0 + diff --git a/patch/0004-containerd-shim-Dump-log-to-file-when-docker-receive.patch b/patch/0004-containerd-shim-Dump-log-to-file-when-docker-receive.patch new file mode 100644 index 0000000..23d04e8 --- /dev/null +++ b/patch/0004-containerd-shim-Dump-log-to-file-when-docker-receive.patch @@ -0,0 +1,57 @@ +From 3e25022a5aee939a73d67e0bcbd90dd1d343b9d4 Mon Sep 17 00:00:00 2001 +From: lixiang172 +Date: Tue, 12 Feb 2019 15:22:06 +0800 +Subject: [PATCH] containerd-shim: Dump log to file when docker received signal + +reason: Dump stack log to file when docker received "kill -SIGUSR1 +PID" signal +The name of log files is "shim-stack-[time].log". +The log file can be found at: +/run/docker/containerd/daemon/io.containerd.runtime.v1.linux/moby/container-id/shim-stack-[time].log + +Change-Id: I6d7e03c9a0fd36e9a76f1dd45cfd5312985d03f8 +Signed-off-by: lixiang172 +--- + cmd/containerd-shim/main_unix.go | 3 +++ + vendor/github.com/sirupsen/logrus/exported.go | 4 ++++ + 2 files changed, 7 insertions(+) + +diff --git a/cmd/containerd-shim/main_unix.go b/cmd/containerd-shim/main_unix.go +index 942f354..6c3326f 100644 +--- a/cmd/containerd-shim/main_unix.go ++++ b/cmd/containerd-shim/main_unix.go +@@ -267,6 +267,8 @@ func handleSignals(logger *logrus.Entry, signals chan os.Signal, server *ttrpc.S + } + } + ++const stacksLogNameTemplate = "shim-stacks-%s.log" ++ + func dumpStacks(logger *logrus.Entry) { + var ( + buf []byte +@@ -279,6 +281,7 @@ func dumpStacks(logger *logrus.Entry) { + bufferLen *= 2 + } + buf = buf[:stackSize] ++ logrus.Devour(ioutil.WriteFile(fmt.Sprintf(stacksLogNameTemplate, strings.Replace(time.Now().Format(time.RFC3339), ":", "", -1)), buf, 0600)) + logger.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf) + } + +diff --git a/vendor/github.com/sirupsen/logrus/exported.go b/vendor/github.com/sirupsen/logrus/exported.go +index 017c30c..7acc41f 100644 +--- a/vendor/github.com/sirupsen/logrus/exported.go ++++ b/vendor/github.com/sirupsen/logrus/exported.go +@@ -179,6 +179,10 @@ func FatalFn(fn LogFunction) { + std.FatalFn(fn) + } + ++// Devour will do nothing and return directly ++func Devour(args ...interface{}) { ++} ++ + // Tracef logs a message at level Trace on the standard logger. + func Tracef(format string, args ...interface{}) { + std.Tracef(format, args...) +-- +2.33.0 + diff --git a/patch/0005-containerd-check-shim-alive-when-containerd-is-resta.patch b/patch/0005-containerd-check-shim-alive-when-containerd-is-resta.patch new file mode 100644 index 0000000..283132f --- /dev/null +++ b/patch/0005-containerd-check-shim-alive-when-containerd-is-resta.patch @@ -0,0 +1,63 @@ +From 94b1e21950631130c86be5572e8a89bd27d373bb Mon Sep 17 00:00:00 2001 +From: xueshaojia +Date: Thu, 14 Feb 2019 10:48:14 +0800 +Subject: [PATCH] containerd: check shim alive when containerd is restarted + +reason: When containerd is restarted, it will load all tasks.In some cases, the + containerd-shim is killed and the sock file will exist for a while. + Containerd should check the containerd-shim is available using the sock file. + If the containerd-shim server not responses, do r.cleanupAfterDeadShim + +If containerd-shim and containerd process is killed, container will exit, +however containerd exit event which generates when containerd restart to reload +tasks can not publish to dockerd, because at the time of loading tasks the connection +between dockerd and containerd isn't established. + +So we add this unpublish exit event to file and resend this event after grpc connection +is established. +--- + runtime/v1/linux/runtime.go | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/runtime/v1/linux/runtime.go b/runtime/v1/linux/runtime.go +index a6efd81..544b692 100644 +--- a/runtime/v1/linux/runtime.go ++++ b/runtime/v1/linux/runtime.go +@@ -416,6 +416,9 @@ func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) { + "id": id, + "namespace": ns, + }).Error("connecting to shim") ++ if !events.ExitPending(ns, id, uint32(pid)) { ++ events.ExitAddFile(ns, events.ExitFile(id, uint32(pid), uint32(events.ExitStatusDefault)), "cleanup dirty task") ++ } + err := r.cleanupAfterDeadShim(ctx, bundle, ns, id) + if err != nil { + log.G(ctx).WithError(err).WithField("bundle", bundle.path). +@@ -423,6 +426,24 @@ func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) { + } + continue + } ++ ctxContact, cancel := context.WithTimeout(ctx, 5*time.Second) ++ defer cancel() ++ alive, err := s.IsAlive(ctxContact) ++ if !alive { ++ log.G(ctx).WithError(err).WithFields(logrus.Fields{ ++ "id": id, ++ "namespace": ns, ++ }).Error("contacting to shim") ++ if !events.ExitPending(ns, id, uint32(pid)) { ++ events.ExitAddFile(ns, events.ExitFile(id, uint32(pid), uint32(events.ExitStatusDefault)), "cleanup dirty task") ++ } ++ err := r.cleanupAfterDeadShim(ctx, bundle, ns, id, pid) ++ if err != nil { ++ log.G(ctx).WithError(err).WithField("bundle", bundle.path). ++ Error("cleaning up after dead shim") ++ } ++ continue ++ } + + logDirPath := filepath.Join(r.root, ns, id) + +-- +2.33.0 + diff --git a/patch/0006-containerd-Makefile-modify.patch b/patch/0006-containerd-Makefile-modify.patch new file mode 100644 index 0000000..fc0b947 --- /dev/null +++ b/patch/0006-containerd-Makefile-modify.patch @@ -0,0 +1,101 @@ +From 55064f7d04cb58b7ca9914a96d9831270cdd6887 Mon Sep 17 00:00:00 2001 +From: zhongjiawei +Date: Mon, 7 Aug 2023 17:26:12 +0800 +Subject: [PATCH] containerd:Makefile modify + +--- + Makefile | 37 +++++++++++++++++++++---------------- + version/version.go | 2 +- + 2 files changed, 22 insertions(+), 17 deletions(-) + +diff --git a/Makefile b/Makefile +index f1b28ce..5b5f54c 100644 +--- a/Makefile ++++ b/Makefile +@@ -30,8 +30,8 @@ MANDIR ?= $(DATADIR)/man + TEST_IMAGE_LIST ?= + + # Used to populate variables in version package. +-VERSION ?= $(shell git describe --match 'v[0-9]*' --dirty='.m' --always) +-REVISION=$(shell git rev-parse HEAD)$(shell if ! git diff --no-ext-diff --quiet --exit-code; then echo .m; fi) ++VERSION=$(shell cat ./containerd_version) ++REVISION=$(shell cat ./git-commit | head -c 40) + PACKAGE=github.com/containerd/containerd + SHIM_CGO_ENABLED ?= 0 + +@@ -94,13 +94,11 @@ ifneq ($(STATIC),) + endif + GO_TAGS=$(if $(GO_BUILDTAGS),-tags "$(strip $(GO_BUILDTAGS))",) + +-GO_LDFLAGS=-ldflags '-X $(PKG)/version.Version=$(VERSION) -X $(PKG)/version.Revision=$(REVISION) -X $(PKG)/version.Package=$(PACKAGE) $(EXTRA_LDFLAGS) +-ifneq ($(STATIC),) +- GO_LDFLAGS += -extldflags "-static" +-endif +-GO_LDFLAGS+=' ++BEP_DIR=/tmp/containerd-build-bep ++BEP_FLAGS=-tmpdir=/tmp/containerd-build-bep + +-SHIM_GO_LDFLAGS=-ldflags '-X $(PKG)/version.Version=$(VERSION) -X $(PKG)/version.Revision=$(REVISION) -X $(PKG)/version.Package=$(PACKAGE) -extldflags "-static" $(EXTRA_LDFLAGS)' ++GO_LDFLAGS=-ldflags ' -buildid=IdByIsula -extldflags=-zrelro -extldflags=-znow $(BEP_FLAGS) -X $(PKG)/version.Version=$(VERSION) -X $(PKG)/version.Revision=$(REVISION) $(EXTRA_LDFLAGS)' ++SHIM_GO_LDFLAGS=-ldflags '-extldflags=-static' -ldflags '-buildid=IdByIsula $(BEP_FLAGS) -X $(PKG)/version.Version=$(VERSION) -X $(PKG)/version.Revision=$(REVISION) -linkmode=external -extldflags=-Wl,-z,relro,-z,now' + + # Project packages. + PACKAGES=$(shell $(GO) list ${GO_TAGS} ./... | grep -v /vendor/ | grep -v /integration) +@@ -203,7 +201,7 @@ build: ## build the go packages + + test: ## run tests, except integration tests and tests that require root + @echo "$(WHALE) $@" +- @$(GOTEST) ${TESTFLAGS} ${PACKAGES} ++ @go test ${TESTFLAGS} ./gc + + root-test: ## run tests, except integration tests + @echo "$(WHALE) $@" +@@ -239,18 +237,25 @@ benchmark: ## run benchmarks tests + + FORCE: + +-define BUILD_BINARY +-@echo "$(WHALE) $@" +-@$(GO) build ${DEBUG_GO_GCFLAGS} ${GO_GCFLAGS} ${GO_BUILD_FLAGS} -o $@ ${GO_LDFLAGS} ${GO_TAGS} ./$< +-endef +- + # Build a binary from a cmd. + bin/%: cmd/% FORCE +- $(call BUILD_BINARY) ++ mkdir -p $(BEP_DIR) ++ @echo "$(WHALE) $@${BINARY_SUFFIX}" ++ CGO_ENABLED=1 \ ++ CGO_CFLAGS="-fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2" \ ++ CGO_CPPFLAGS="-fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2" \ ++ CGO_LDFLAGS_ALLOW='-Wl,-z,relro,-z,now' \ ++ CGO_LDFLAGS="-Wl,-z,relro,-z,now -Wl,-z,noexecstack" \ ++ go build ${GO_GCFLAGS} ${GO_BUILD_FLAGS} -o $@${BINARY_SUFFIX} ${GO_LDFLAGS} ${GO_TAGS} ./$< + + bin/containerd-shim: cmd/containerd-shim FORCE # set !cgo and omit pie for a static shim build: https://github.com/golang/go/issues/17789#issuecomment-258542220 + @echo "$(WHALE) $@" +- @CGO_ENABLED=${SHIM_CGO_ENABLED} $(GO) build ${GO_BUILD_FLAGS} -o $@ ${SHIM_GO_LDFLAGS} ${GO_TAGS} ./cmd/containerd-shim ++ CGO_ENABLED=1 \ ++ CGO_CFLAGS="-fstack-protector-strong -fPIE -D_FORTIFY_SOURCE=2 -O2" \ ++ CGO_CPPFLAGS="-fstack-protector-strong -fPIE -D_FORTIFY_SOURCE=2 -O2" \ ++ CGO_LDFLAGS_ALLOW='-Wl,-z,relro,-z,now' \ ++ CGO_LDFLAGS="-Wl,-z,relro,-z,now -Wl,-z,noexecstack" \ ++ go build -buildmode=pie ${GO_BUILD_FLAGS} -o bin/containerd-shim ${SHIM_GO_LDFLAGS} ${GO_TAGS} ./cmd/containerd-shim + + bin/containerd-shim-runc-v1: cmd/containerd-shim-runc-v1 FORCE # set !cgo and omit pie for a static shim build: https://github.com/golang/go/issues/17789#issuecomment-258542220 + @echo "$(WHALE) $@" +diff --git a/version/version.go b/version/version.go +index de124ef..ab2dadc 100644 +--- a/version/version.go ++++ b/version/version.go +@@ -20,7 +20,7 @@ import "runtime" + + var ( + // Package is filled at linking time +- Package = "github.com/containerd/containerd" ++ Package = "" + + // Version holds the complete version number. Filled in at linking time. + Version = "1.6.22+unknown" +-- +2.33.0 + diff --git a/patch/0007-containerd-set-create-and-exec-timeout-to-avild-bloc.patch b/patch/0007-containerd-set-create-and-exec-timeout-to-avild-bloc.patch new file mode 100644 index 0000000..b07af71 --- /dev/null +++ b/patch/0007-containerd-set-create-and-exec-timeout-to-avild-bloc.patch @@ -0,0 +1,195 @@ +From 907578c6d8421d340c353ad27503bbfdb7f422d1 Mon Sep 17 00:00:00 2001 +From: xiadanni +Date: Fri, 15 Feb 2019 06:00:52 +0800 +Subject: [PATCH] containerd:set create and exec timeout to avild block when + command failed + +--- + cmd/containerd-shim/main_unix.go | 2 +- + sys/reaper/reaper_unix.go | 21 +++++++- + .../github.com/containerd/go-runc/monitor.go | 6 +++ + vendor/github.com/containerd/go-runc/runc.go | 54 +++++++++++++++++-- + 4 files changed, 77 insertions(+), 6 deletions(-) + +diff --git a/cmd/containerd-shim/main_unix.go b/cmd/containerd-shim/main_unix.go +index 6c3326f..8dfcd90 100644 +--- a/cmd/containerd-shim/main_unix.go ++++ b/cmd/containerd-shim/main_unix.go +@@ -312,7 +312,7 @@ func (l *remoteEventsPublisher) doPublish(ctx context.Context, topic string, eve + if err != nil { + return err + } +- status, err := reaper.Default.WaitTimeout(cmd, c, 30*time.Second) ++ status, err := reaper.Default.WaitTimeout(cmd, c, 30) + if err != nil { + return fmt.Errorf("failed to publish event: %s: %w", b.String(), err) + } +diff --git a/sys/reaper/reaper_unix.go b/sys/reaper/reaper_unix.go +index 6c4f13b..bf42d21 100644 +--- a/sys/reaper/reaper_unix.go ++++ b/sys/reaper/reaper_unix.go +@@ -22,6 +22,10 @@ package reaper + import ( + "errors" + "fmt" ++ "io/ioutil" ++ "path/filepath" ++ "strconv" ++ "strings" + "sync" + "syscall" + "time" +@@ -119,7 +123,8 @@ func (m *Monitor) Wait(c *exec.Cmd, ec chan runc.Exit) (int, error) { + } + + // WaitTimeout is used to skip the blocked command and kill the left process. +-func (m *Monitor) WaitTimeout(c *exec.Cmd, ec chan runc.Exit, timeout time.Duration) (int, error) { ++func (m *Monitor) WaitTimeout(c *exec.Cmd, ec chan runc.Exit, sec int64) (int, error) { ++ timeout := time.Duration(sec) * time.Second + type exitStatusWrapper struct { + status int + err error +@@ -281,3 +286,17 @@ func exitStatus(status unix.WaitStatus) int { + } + return status.ExitStatus() + } ++ ++func SameProcess(cmd *exec.Cmd, pid int) bool { ++ bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline")) ++ if err != nil { ++ return false ++ } ++ for i := range bytes { ++ if bytes[i] == 0 { ++ bytes[i] = 32 ++ } ++ } ++ cmdline := string(bytes) ++ return strings.EqualFold(cmdline, strings.Join(cmd.Args, " ")+" ") ++} +diff --git a/vendor/github.com/containerd/go-runc/monitor.go b/vendor/github.com/containerd/go-runc/monitor.go +index ff06a3f..9756491 100644 +--- a/vendor/github.com/containerd/go-runc/monitor.go ++++ b/vendor/github.com/containerd/go-runc/monitor.go +@@ -40,6 +40,7 @@ type Exit struct { + type ProcessMonitor interface { + Start(*exec.Cmd) (chan Exit, error) + Wait(*exec.Cmd, chan Exit) (int, error) ++ WaitTimeout(*exec.Cmd, chan Exit, int64) (int, error) + } + + type defaultMonitor struct { +@@ -74,3 +75,8 @@ func (m *defaultMonitor) Wait(c *exec.Cmd, ec chan Exit) (int, error) { + e := <-ec + return e.Status, nil + } ++ ++func (m *defaultMonitor) WaitTimeout(c *exec.Cmd, ec chan Exit, sec int64) (int, error) { ++ e := <-ec ++ return e.Status, nil ++} +diff --git a/vendor/github.com/containerd/go-runc/runc.go b/vendor/github.com/containerd/go-runc/runc.go +index 0feedeb..15fc8e1 100644 +--- a/vendor/github.com/containerd/go-runc/runc.go ++++ b/vendor/github.com/containerd/go-runc/runc.go +@@ -54,8 +54,22 @@ const ( + Text Format = "text" + // DefaultCommand is the default command for Runc + DefaultCommand = "runc" ++ execTimeout = 30 + ) + ++var ( ++ createTimeout int64 = 120 ++) ++ ++func init() { ++ runtimeTimeout, err := convertTime(os.Getenv("DOCKER_RUNTIME_START_TIMEOUT")) ++ if err != nil { ++ logrus.Warnf("init error, wrong runtimeTimeout format: %v", err) ++ } else { ++ createTimeout = runtimeTimeout ++ } ++} ++ + // List returns all containers created inside the provided runc root directory + func (r *Runc) List(context context.Context) ([]*Container, error) { + data, err := cmdOutput(r.command(context, "list", "--format=json"), false, nil) +@@ -151,7 +165,7 @@ func (r *Runc) Create(context context.Context, id, bundle string, opts *CreateOp + cmd.ExtraFiles = opts.ExtraFiles + + if cmd.Stdout == nil && cmd.Stderr == nil { +- data, err := cmdOutput(cmd, true, nil) ++ data, err := cmdOutputTimeout(cmd, true, nil, createTimeout) + defer putBuf(data) + if err != nil { + return fmt.Errorf("%s: %s", err, data.String()) +@@ -169,7 +183,7 @@ func (r *Runc) Create(context context.Context, id, bundle string, opts *CreateOp + } + } + } +- status, err := Monitor.Wait(cmd, ec) ++ status, err := Monitor.WaitTimeout(cmd, ec, createTimeout) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate successfully: %w", cmd.Args[0], &ExitError{status}) + } +@@ -235,7 +249,7 @@ func (r *Runc) Exec(context context.Context, id string, spec specs.Process, opts + opts.Set(cmd) + } + if cmd.Stdout == nil && cmd.Stderr == nil { +- data, err := cmdOutput(cmd, true, opts.Started) ++ data, err := cmdOutputTimeout(cmd, true, opts.Started, createTimeout) + defer putBuf(data) + if err != nil { + return fmt.Errorf("%w: %s", err, data.String()) +@@ -256,7 +270,7 @@ func (r *Runc) Exec(context context.Context, id string, spec specs.Process, opts + } + } + } +- status, err := Monitor.Wait(cmd, ec) ++ status, err := Monitor.WaitTimeout(cmd, ec, execTimeout) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate successfully: %w", cmd.Args[0], &ExitError{status}) + } +@@ -742,6 +756,38 @@ func cmdOutput(cmd *exec.Cmd, combined bool, started chan<- int) (*bytes.Buffer, + return b, err + } + ++func cmdOutputTimeout(cmd *exec.Cmd, combined bool, started chan<- int, timeout int64) (*bytes.Buffer, error) { ++ b := getBuf() ++ defer putBuf(b) ++ ++ cmd.Stdout = b ++ if combined { ++ cmd.Stderr = b ++ } ++ ec, err := Monitor.Start(cmd) ++ if err != nil { ++ return nil, err ++ } ++ if started != nil { ++ started <- cmd.Process.Pid ++ } ++ ++ status, err := Monitor.WaitTimeout(cmd, ec, timeout) ++ if err == nil && status != 0 { ++ err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) ++ } ++ ++ return b, err ++} ++ ++func convertTime(timeout string) (int64, error) { ++ timeDura, err := time.ParseDuration(timeout) ++ if err != nil { ++ return 0, err ++ } ++ return timeDura.Nanoseconds() / 1e9, nil ++} ++ + type ExitError struct { + Status int + } +-- +2.33.0 + diff --git a/patch/0008-containerd-skip-load-task-in-creating-and-optimize-i.patch b/patch/0008-containerd-skip-load-task-in-creating-and-optimize-i.patch new file mode 100644 index 0000000..8d92130 --- /dev/null +++ b/patch/0008-containerd-skip-load-task-in-creating-and-optimize-i.patch @@ -0,0 +1,107 @@ +From f696193bd86e3656e328e6f46feb0ad5366ec017 Mon Sep 17 00:00:00 2001 +From: jingrui +Date: Sat, 23 Feb 2019 15:51:24 +0800 +Subject: [PATCH] containerd: skip load task in creating and optimize init.exit + record + +load task in creating will stuck containerd restore process. + +Change-Id: I2f8b77a88d78597ef2be5122708fc8ab16fad956 +Signed-off-by: jingrui +--- + pkg/process/utils.go | 2 -- + runtime/v1/linux/runtime.go | 7 +++---- + runtime/v1/shim/service.go | 10 ++++++++-- + 3 files changed, 11 insertions(+), 8 deletions(-) + +diff --git a/pkg/process/utils.go b/pkg/process/utils.go +index 5ff04ed..afada02 100644 +--- a/pkg/process/utils.go ++++ b/pkg/process/utils.go +@@ -41,8 +41,6 @@ const ( + RuncRoot = "/run/containerd/runc" + // InitPidFile name of the file that contains the init pid + InitPidFile = "init.pid" +- +- InitExit = "init.exit" + ) + + // safePid is a thread safe wrapper for pid. +diff --git a/runtime/v1/linux/runtime.go b/runtime/v1/linux/runtime.go +index 544b692..421922e 100644 +--- a/runtime/v1/linux/runtime.go ++++ b/runtime/v1/linux/runtime.go +@@ -386,7 +386,6 @@ func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) { + continue + } + id := path.Name() +- log.G(ctx).Infof("load-task %s", id) + // skip hidden directories + if len(id) > 0 && id[0] == '.' { + continue +@@ -398,6 +397,7 @@ func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) { + ) + ctx = namespaces.WithNamespace(ctx, ns) + pid, _ := runc.ReadPidFile(filepath.Join(bundle.path, process.InitPidFile)) ++ log.G(ctx).Infof("load-task %s/%s/%s Pid=%d", r.state, ns, id, pid) + shimExit := make(chan struct{}) + s, err := bundle.NewShimClient(ctx, ns, ShimConnect(r.config, func() { + defer close(shimExit) +@@ -495,11 +495,10 @@ func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) { + continue + } + if pid <= 0 { +- _, err := t.DeleteForce(ctx, 0) +- log.G(ctx).Warnf("delete force %s Pid=%d error=%v", id, pid, err) ++ log.G(ctx).Warnf("skip load task in creating %s", id) + continue + } +- if _, err := os.Stat(filepath.Join(bundle.path, process.InitExit)); err == nil { ++ if events.InitExitExist(bundle.path) { + if !events.ExitPending(ns, t.id, uint32(pid)) { + events.ExitAddFile(ns, events.ExitFile(t.id, uint32(pid), uint32(events.ExitStatusDefault)), "cleanup dirty task") + } +diff --git a/runtime/v1/shim/service.go b/runtime/v1/shim/service.go +index b00ed9c..32431a4 100644 +--- a/runtime/v1/shim/service.go ++++ b/runtime/v1/shim/service.go +@@ -23,10 +23,10 @@ import ( + "context" + "encoding/json" + "fmt" +- "io/ioutil" + "os" + "path/filepath" + "sync" ++ "time" + + "github.com/containerd/console" + eventstypes "github.com/containerd/containerd/api/events" +@@ -148,9 +148,15 @@ func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (_ * + } + defer func() { + if err != nil { ++ logrus.Errorf("create init %s failed error=%v", r.ID, err) + if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { + log.G(ctx).WithError(err2).Warn("Failed to cleanup rootfs mount") + } ++ go func() { ++ time.Sleep(10 * time.Second) ++ os.Exit(0) ++ }() ++ + } + }() + for _, rm := range mounts { +@@ -523,7 +529,7 @@ func (s *Service) checkProcesses(e runc.Exit) { + if ip, ok := p.(*process.Init); ok { + ns := filepath.Base(filepath.Dir(ip.Bundle)) + events.ExitAddFile(ns, events.ExitFile(s.id, uint32(e.Pid), uint32(e.Status)), "init exited") +- ioutil.WriteFile(filepath.Join(ip.Bundle, process.InitExit), []byte(fmt.Sprintf("%d", e.Pid)), 0600) ++ events.InitExitWrite(ip.Bundle, e.Pid) + // Ensure all children are killed + if shouldKillAllOnExit(s.context, s.bundle) { + if err := ip.KillAll(s.context); err != nil { +-- +2.33.0 + diff --git a/patch/0009-containerd-support-kill-D-state-container.patch b/patch/0009-containerd-support-kill-D-state-container.patch new file mode 100644 index 0000000..9b97e3f --- /dev/null +++ b/patch/0009-containerd-support-kill-D-state-container.patch @@ -0,0 +1,68 @@ +From 0e1503aea296e419ec219e36c56edb68f1abaf0f Mon Sep 17 00:00:00 2001 +From: jingrui +Date: Tue, 18 Jun 2019 00:12:41 +0800 +Subject: [PATCH] containerd: support kill D state container + +Change-Id: I80a1c0c4f88530fe9732e6e9a2d1fb222ece118c +Signed-off-by: jingrui +--- + runtime/v1/shim/service.go | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/runtime/v1/shim/service.go b/runtime/v1/shim/service.go +index 32431a4..a3b4a8f 100644 +--- a/runtime/v1/shim/service.go ++++ b/runtime/v1/shim/service.go +@@ -26,6 +26,7 @@ import ( + "os" + "path/filepath" + "sync" ++ "syscall" + "time" + + "github.com/containerd/console" +@@ -47,6 +48,7 @@ import ( + ptypes "github.com/gogo/protobuf/types" + specs "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" ++ "golang.org/x/sys/unix" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + ) +@@ -375,11 +377,33 @@ func (s *Service) Resume(ctx context.Context, r *ptypes.Empty) (*ptypes.Empty, e + + // Kill a process with the provided signal + func (s *Service) Kill(ctx context.Context, r *shimapi.KillRequest) (*ptypes.Empty, error) { ++ delayKill := func(p process.Process) { ++ if s.id != p.ID() || r.Signal != uint32(syscall.SIGKILL) { ++ return ++ } ++ for i := 1; i < 5; i++ { ++ time.Sleep(10 * time.Second) ++ err := p.Kill(ctx, r.Signal, r.All) ++ logrus.Infof("delay kill %s retry %d error=%v", s.id, i, err) ++ if err != nil { ++ err := unix.Kill(p.Pid(), syscall.SIGKILL) ++ logrus.Infof("delay kill-direct %s retry %d error=%v", s.id, i, err) ++ } ++ } ++ ++ logrus.Infof("force exit shim %s ...", s.id) ++ p.SetExited(137) ++ err := p.Delete(ctx) ++ logrus.Infof("force exit shim %s error=%v", s.id, err) ++ os.Exit(0) ++ } ++ + if r.ID == "" { + p, err := s.getInitProcess() + if err != nil { + return nil, err + } ++ go delayKill(p) + if err := p.Kill(ctx, r.Signal, r.All); err != nil { + return nil, errdefs.ToGRPC(err) + } +-- +2.33.0 + diff --git a/patch/0010-containerd-add-shim-exit-when-bundle-dir-does-not-ex.patch b/patch/0010-containerd-add-shim-exit-when-bundle-dir-does-not-ex.patch new file mode 100644 index 0000000..30d0f8f --- /dev/null +++ b/patch/0010-containerd-add-shim-exit-when-bundle-dir-does-not-ex.patch @@ -0,0 +1,39 @@ +From a509386405646122da735e33e0b5e7f9d9e3aaae Mon Sep 17 00:00:00 2001 +From: xiadanni1 +Date: Sat, 13 Jul 2019 06:32:54 +0800 +Subject: [PATCH] containerd:add shim exit when bundle dir does not exist + +reason: when bundle dir is deleted, containerd-shim should exit to avoid +shim.sock is occupied when container restart next time. + +Change-Id: I956412598e17d15f25b91afe1cbb9e24463f04be +Signed-off-by: xiadanni1 +--- + runtime/v1/shim/service.go | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/runtime/v1/shim/service.go b/runtime/v1/shim/service.go +index a3b4a8f..4e9dfee 100644 +--- a/runtime/v1/shim/service.go ++++ b/runtime/v1/shim/service.go +@@ -149,6 +149,17 @@ func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (_ * + Options: r.Options, + } + defer func() { ++ go func() { ++ for i := 0; i < 60; i++ { ++ time.Sleep(time.Second) ++ _, err := os.Stat(r.Bundle) ++ logrus.Devour(err) ++ if os.IsNotExist(err) { ++ logrus.Errorf("bundle dir: %v does not exist, containerd-shim exit", r.Bundle) ++ os.Exit(0) ++ } ++ } ++ }() + if err != nil { + logrus.Errorf("create init %s failed error=%v", r.ID, err) + if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { +-- +2.33.0 + diff --git a/patch/0011-containerd-change-tmpfile-directory-when-exec.patch b/patch/0011-containerd-change-tmpfile-directory-when-exec.patch new file mode 100644 index 0000000..237f340 --- /dev/null +++ b/patch/0011-containerd-change-tmpfile-directory-when-exec.patch @@ -0,0 +1,44 @@ +From db6e7286f573fa346b3e04f84968fc0bf15edee7 Mon Sep 17 00:00:00 2001 +From: wujibin +Date: Wed, 14 Aug 2019 17:18:24 +0800 +Subject: [PATCH] containerd:change tmpfile directory when exec + +reason: tmp file stored /tmp before change, if mountain of containers +are runing, the diretory will exist too many tmp file + +Change-Id: I1879ba9d09dca41a7571131d7447bf67356ea79c +--- + runtime/v1/linux/task.go | 3 --- + vendor/github.com/containerd/go-runc/runc.go | 2 +- + 2 files changed, 1 insertion(+), 4 deletions(-) + +diff --git a/runtime/v1/linux/task.go b/runtime/v1/linux/task.go +index 70908ae..b013466 100644 +--- a/runtime/v1/linux/task.go ++++ b/runtime/v1/linux/task.go +@@ -99,9 +99,6 @@ func (t *Task) delete(ctx context.Context, force bool, pid uint32) (*runtime.Exi + rsp, shimErr := t.shim.Delete(ctx, empty) + if shimErr != nil { + log.G(ctx).WithError(shimErr).Error("failed to delete container, force=%t", force) +- if !force { +- return nil, errdefs.FromGRPC(shimErr) +- } + } + t.tasks.Delete(ctx, t.id) + if err := t.shim.KillShim(ctx); err != nil { +diff --git a/vendor/github.com/containerd/go-runc/runc.go b/vendor/github.com/containerd/go-runc/runc.go +index 15fc8e1..20bb836 100644 +--- a/vendor/github.com/containerd/go-runc/runc.go ++++ b/vendor/github.com/containerd/go-runc/runc.go +@@ -226,7 +226,7 @@ func (r *Runc) Exec(context context.Context, id string, spec specs.Process, opts + if opts.Started != nil { + defer close(opts.Started) + } +- f, err := ioutil.TempFile(os.Getenv("XDG_RUNTIME_DIR"), "runc-process") ++ f, err := ioutil.TempFile(".", "runc-process") + if err != nil { + return err + } +-- +2.33.0 + diff --git a/patch/0012-containerd-stw-gc-sweep-for-arm64.patch b/patch/0012-containerd-stw-gc-sweep-for-arm64.patch new file mode 100644 index 0000000..c5f01c3 --- /dev/null +++ b/patch/0012-containerd-stw-gc-sweep-for-arm64.patch @@ -0,0 +1,53 @@ +From bd5ef07292289252531b73c371e95db1fc0a45c4 Mon Sep 17 00:00:00 2001 +From: jingrui +Date: Fri, 18 Oct 2019 14:49:47 +0800 +Subject: [PATCH] containerd: stw gc sweep for arm64 + +Change-Id: I855c13a21c72bf0e91563db7c11e1348a1a78d55 +Signed-off-by: jingrui +--- + cmd/containerd-shim/main_unix.go | 5 ----- + runtime/v1/shim/client/client.go | 4 ++++ + 2 files changed, 4 insertions(+), 5 deletions(-) + +diff --git a/cmd/containerd-shim/main_unix.go b/cmd/containerd-shim/main_unix.go +index 8dfcd90..8a05d70 100644 +--- a/cmd/containerd-shim/main_unix.go ++++ b/cmd/containerd-shim/main_unix.go +@@ -87,11 +87,6 @@ func init() { + + func main() { + debug.SetGCPercent(40) +- go func() { +- for range time.Tick(30 * time.Second) { +- debug.FreeOSMemory() +- } +- }() + + if debugFlag { + logrus.SetLevel(logrus.DebugLevel) +diff --git a/runtime/v1/shim/client/client.go b/runtime/v1/shim/client/client.go +index 6e5eae5..fa145c8 100644 +--- a/runtime/v1/shim/client/client.go ++++ b/runtime/v1/shim/client/client.go +@@ -27,6 +27,7 @@ import ( + "net" + "os" + "path/filepath" ++ "runtime" + "strconv" + "strings" + "sync" +@@ -218,6 +219,9 @@ func newCommand(binary, daemonAddress string, debug bool, config shim.Config, so + cmd.SysProcAttr = getSysProcAttr() + cmd.ExtraFiles = append(cmd.ExtraFiles, socket) + cmd.Env = append(os.Environ(), "GOMAXPROCS=2") ++ if runtime.GOARCH == "arm64" { ++ cmd.Env = append(cmd.Env, "GODEBUG=gcstoptheworld=2") ++ } + cmd.Stdout = stdout + cmd.Stderr = stderr + return cmd, nil +-- +2.33.0 + diff --git a/patch/0013-containerd-modify-shim-initiative-exit-time-for-post.patch b/patch/0013-containerd-modify-shim-initiative-exit-time-for-post.patch new file mode 100644 index 0000000..877f5d4 --- /dev/null +++ b/patch/0013-containerd-modify-shim-initiative-exit-time-for-post.patch @@ -0,0 +1,86 @@ +From 4f4fd234119a7ccf7ab9e7cc122f30727ba39b81 Mon Sep 17 00:00:00 2001 +From: liuzekun +Date: Thu, 21 Nov 2019 08:23:35 -0500 +Subject: [PATCH] containerd: modify shim initiative exit time for post hook + +reason: Modify shim initiative exit time for post hook. In consideration +of each post hook has a execution time with timeout(default 120s), we +should ensure enough time to call all post hook. + +Signed-off-by: liuzekun +--- + runtime/v1/shim/service.go | 32 ++++++++++++++++++++++++++------ + 1 file changed, 26 insertions(+), 6 deletions(-) + +diff --git a/runtime/v1/shim/service.go b/runtime/v1/shim/service.go +index 4e9dfee..166b866 100644 +--- a/runtime/v1/shim/service.go ++++ b/runtime/v1/shim/service.go +@@ -561,12 +561,32 @@ func (s *Service) checkProcesses(e runc.Exit) { + log.G(s.context).Debugf("process with id:%d wasn't found", e.Pid) + return + } ++ shouldKillAll, bundleSpec := shouldKillAllOnExit(s.context, s.bundle) + if ip, ok := p.(*process.Init); ok { + ns := filepath.Base(filepath.Dir(ip.Bundle)) + events.ExitAddFile(ns, events.ExitFile(s.id, uint32(e.Pid), uint32(e.Status)), "init exited") + events.InitExitWrite(ip.Bundle, e.Pid) ++ go func() { ++ t := 30 ++ defer func() { ++ time.Sleep(time.Duration(t) * time.Second) ++ os.Exit(0) ++ }() ++ if bundleSpec.Hooks == nil { ++ return ++ } ++ postStopHooks := bundleSpec.Hooks.Poststop ++ for _, postStopHook := range postStopHooks { ++ hookTimeout := postStopHook.Timeout ++ if hookTimeout == nil { ++ t += 120 ++ } else { ++ t += *hookTimeout ++ } ++ } ++ }() + // Ensure all children are killed +- if shouldKillAllOnExit(s.context, s.bundle) { ++ if shouldKillAll { + if err := ip.KillAll(s.context); err != nil { + log.G(s.context).WithError(err).WithField("id", ip.ID()). + Error("failed to kill init's children") +@@ -584,25 +604,25 @@ func (s *Service) checkProcesses(e runc.Exit) { + } + } + +-func shouldKillAllOnExit(ctx context.Context, bundlePath string) bool { ++func shouldKillAllOnExit(ctx context.Context, bundlePath string) (bool, specs.Spec) { + var bundleSpec specs.Spec + bundleConfigContents, err := os.ReadFile(filepath.Join(bundlePath, "config.json")) + if err != nil { + log.G(ctx).WithError(err).Error("shouldKillAllOnExit: failed to read config.json") +- return true ++ return true, specs.Spec{} + } + if err := json.Unmarshal(bundleConfigContents, &bundleSpec); err != nil { + log.G(ctx).WithError(err).Error("shouldKillAllOnExit: failed to unmarshal bundle json") +- return true ++ return true, specs.Spec{} + } + if bundleSpec.Linux != nil { + for _, ns := range bundleSpec.Linux.Namespaces { + if ns.Type == specs.PIDNamespace && ns.Path == "" { +- return false ++ return false, bundleSpec + } + } + } +- return true ++ return true, bundleSpec + } + + func (s *Service) getContainerPids(ctx context.Context, id string) ([]uint32, error) { +-- +2.33.0 + diff --git a/patch/0014-containerd-wrap-and-process-return-errors.patch b/patch/0014-containerd-wrap-and-process-return-errors.patch new file mode 100644 index 0000000..e5b53b9 --- /dev/null +++ b/patch/0014-containerd-wrap-and-process-return-errors.patch @@ -0,0 +1,36 @@ +From 648e59028cc546587e877784532cada50d62cfcb Mon Sep 17 00:00:00 2001 +From: liuzekun +Date: Mon, 23 Dec 2019 03:10:49 -0500 +Subject: [PATCH] containerd: wrap and process return errors + +reason: wrap and process return errors + +Signed-off-by: liuzekun +--- + sys/reaper/reaper_unix.go | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/sys/reaper/reaper_unix.go b/sys/reaper/reaper_unix.go +index bf42d21..007e1d0 100644 +--- a/sys/reaper/reaper_unix.go ++++ b/sys/reaper/reaper_unix.go +@@ -31,6 +31,7 @@ import ( + "time" + + runc "github.com/containerd/go-runc" ++ "github.com/sirupsen/logrus" + exec "golang.org/x/sys/execabs" + "golang.org/x/sys/unix" + ) +@@ -112,7 +113,7 @@ func (m *Monitor) Wait(c *exec.Cmd, ec chan runc.Exit) (int, error) { + for e := range ec { + if e.Pid == c.Process.Pid { + // make sure we flush all IO +- c.Wait() ++ logrus.Devour(c.Wait()) + m.Unsubscribe(ec) + return e.Status, nil + } +-- +2.33.0 + diff --git a/patch/0015-containerd-add-timeout-for-shim.patch b/patch/0015-containerd-add-timeout-for-shim.patch new file mode 100644 index 0000000..1769df3 --- /dev/null +++ b/patch/0015-containerd-add-timeout-for-shim.patch @@ -0,0 +1,140 @@ +From db530829309f6f37184748cd6823868b41f0be3c Mon Sep 17 00:00:00 2001 +From: xiadanni +Date: Fri, 3 Jan 2020 03:06:00 +0800 +Subject: [PATCH] containerd:add timeout for shim + +--- + runtime/v1/linux/runtime.go | 2 +- + .../containerd/go-runc/command_other.go | 1 + + vendor/github.com/containerd/go-runc/runc.go | 37 +++++++++++++++---- + 3 files changed, 31 insertions(+), 9 deletions(-) + +diff --git a/runtime/v1/linux/runtime.go b/runtime/v1/linux/runtime.go +index ff8de53..c2b146d 100644 +--- a/runtime/v1/linux/runtime.go ++++ b/runtime/v1/linux/runtime.go +@@ -437,7 +437,7 @@ func (r *Runtime) loadTasks(ctx context.Context, ns string) ([]*Task, error) { + if !events.ExitPending(ns, id, uint32(pid)) { + events.ExitAddFile(ns, events.ExitFile(id, uint32(pid), uint32(events.ExitStatusDefault)), "cleanup dirty task") + } +- err := r.cleanupAfterDeadShim(ctx, bundle, ns, id, pid) ++ err := r.cleanupAfterDeadShim(ctx, bundle, ns, id) + if err != nil { + log.G(ctx).WithError(err).WithField("bundle", bundle.path). + Error("cleaning up after dead shim") +diff --git a/vendor/github.com/containerd/go-runc/command_other.go b/vendor/github.com/containerd/go-runc/command_other.go +index b8fd4b8..75d41be 100644 +--- a/vendor/github.com/containerd/go-runc/command_other.go ++++ b/vendor/github.com/containerd/go-runc/command_other.go +@@ -1,3 +1,4 @@ ++//go:build !linux + // +build !linux + + /* +diff --git a/vendor/github.com/containerd/go-runc/runc.go b/vendor/github.com/containerd/go-runc/runc.go +index 20bb836..ccf3d42 100644 +--- a/vendor/github.com/containerd/go-runc/runc.go ++++ b/vendor/github.com/containerd/go-runc/runc.go +@@ -54,7 +54,9 @@ const ( + Text Format = "text" + // DefaultCommand is the default command for Runc + DefaultCommand = "runc" +- execTimeout = 30 ++ defaultTimeout = 30 ++ startTimeout = 120 ++ updateTimeout = 60 + ) + + var ( +@@ -86,7 +88,7 @@ func (r *Runc) List(context context.Context) ([]*Container, error) { + + // State returns the state for the container provided by id + func (r *Runc) State(context context.Context, id string) (*Container, error) { +- data, err := cmdOutput(r.command(context, "state", id), true, nil) ++ data, err := cmdOutputTimeout(r.command(context, "state", id), true, nil, defaultTimeout) + defer putBuf(data) + if err != nil { + return nil, fmt.Errorf("%s: %s", err, data.String()) +@@ -192,7 +194,7 @@ func (r *Runc) Create(context context.Context, id, bundle string, opts *CreateOp + + // Start will start an already created container + func (r *Runc) Start(context context.Context, id string) error { +- return r.runOrError(r.command(context, "start", id)) ++ return r.runOrErrorTimeout(r.command(context, "start", id), startTimeout) + } + + type ExecOpts struct { +@@ -249,7 +251,7 @@ func (r *Runc) Exec(context context.Context, id string, spec specs.Process, opts + opts.Set(cmd) + } + if cmd.Stdout == nil && cmd.Stderr == nil { +- data, err := cmdOutputTimeout(cmd, true, opts.Started, createTimeout) ++ data, err := cmdOutputTimeout(cmd, true, opts.Started, defaultTimeout) + defer putBuf(data) + if err != nil { + return fmt.Errorf("%w: %s", err, data.String()) +@@ -270,7 +272,7 @@ func (r *Runc) Exec(context context.Context, id string, spec specs.Process, opts + } + } + } +- status, err := Monitor.WaitTimeout(cmd, ec, execTimeout) ++ status, err := Monitor.WaitTimeout(cmd, ec, defaultTimeout) + if err == nil && status != 0 { + err = fmt.Errorf("%s did not terminate successfully: %w", cmd.Args[0], &ExitError{status}) + } +@@ -349,7 +351,7 @@ func (r *Runc) Kill(context context.Context, id string, sig int, opts *KillOpts) + if opts != nil { + args = append(args, opts.args()...) + } +- return r.runOrError(r.command(context, append(args, id, strconv.Itoa(sig))...)) ++ return r.runOrErrorTimeout(r.command(context, append(args, id, strconv.Itoa(sig))...), defaultTimeout) + } + + // Stats return the stats for a container like cpu, memory, and io +@@ -425,7 +427,7 @@ func (r *Runc) Resume(context context.Context, id string) error { + + // Ps lists all the processes inside the container returning their pids + func (r *Runc) Ps(context context.Context, id string) ([]int, error) { +- data, err := cmdOutput(r.command(context, "ps", "--format", "json", id), true, nil) ++ data, err := cmdOutputTimeout(r.command(context, "ps", "--format", "json", id), true, nil, defaultTimeout) + defer putBuf(data) + if err != nil { + return nil, fmt.Errorf("%s: %s", err, data.String()) +@@ -638,7 +640,7 @@ func (r *Runc) Update(context context.Context, id string, resources *specs.Linux + args := []string{"update", "--resources", "-", id} + cmd := r.command(context, args...) + cmd.Stdin = buf +- return r.runOrError(cmd) ++ return r.runOrErrorTimeout(cmd, updateTimeout) + } + + var ErrParseRuncVersion = errors.New("unable to parse runc version") +@@ -731,6 +733,25 @@ func (r *Runc) runOrError(cmd *exec.Cmd) error { + return nil + } + ++func (r *Runc) runOrErrorTimeout(cmd *exec.Cmd, runTimeout int64) error { ++ if cmd.Stdout != nil || cmd.Stderr != nil { ++ ec, err := Monitor.Start(cmd) ++ if err != nil { ++ return err ++ } ++ status, err := Monitor.WaitTimeout(cmd, ec, runTimeout) ++ if err == nil && status != 0 { ++ err = fmt.Errorf("%s did not terminate sucessfully", cmd.Args[0]) ++ } ++ return err ++ } ++ data, err := cmdOutputTimeout(cmd, true, nil, runTimeout) ++ if err != nil { ++ return fmt.Errorf("%s: %s", err, data) ++ } ++ return nil ++} ++ + // callers of cmdOutput are expected to call putBuf on the returned Buffer + // to ensure it is released back to the shared pool after use. + func cmdOutput(cmd *exec.Cmd, combined bool, started chan<- int) (*bytes.Buffer, error) { +-- +2.33.0 + diff --git a/patch/0016-containerd-clean-up-residual-container-after-shim-ab.patch b/patch/0016-containerd-clean-up-residual-container-after-shim-ab.patch new file mode 100644 index 0000000..cd60acd --- /dev/null +++ b/patch/0016-containerd-clean-up-residual-container-after-shim-ab.patch @@ -0,0 +1,88 @@ +From f098df67f8b57c0dfce5d2177e7c3c51eee23b7a Mon Sep 17 00:00:00 2001 +From: zhangtianyang +Date: Thu, 27 Feb 2020 16:51:59 +0800 +Subject: [PATCH] containerd: clean up residual container after shim abnormal + exit + +reason:from update/revert test an occasional failure has been found that +shim process has exited but container is still running, then following exec +call all report ttrpc close error. +the triggering condition is uncertain. this patch will make up the clean +work of the residual container after such failure occurred to avoid +subsequent call errors. + +Change-Id: I0da9d4e46010cbe58f2fda21895caeb301936c47 +Signed-off-by: zhangtianyang +--- + runtime/v1/linux/runtime.go | 7 +++++++ + services/tasks/local.go | 24 ++++++++++++++++++++++++ + 2 files changed, 31 insertions(+) + +diff --git a/runtime/v1/linux/runtime.go b/runtime/v1/linux/runtime.go +index c2b146d..fa03e5c 100644 +--- a/runtime/v1/linux/runtime.go ++++ b/runtime/v1/linux/runtime.go +@@ -560,6 +560,13 @@ func (r *Runtime) cleanupAfterDeadShim(ctx context.Context, bundle *bundle, ns, + return nil + } + ++func (r *Runtime) CleanupAfterDeadShim(ctx context.Context, ns, id string) error { ++ bund := &bundle{id: id, ++ path: filepath.Join(r.state, ns, id), ++ workDir: filepath.Join(r.root, ns, id)} ++ return r.cleanupAfterDeadShim(ctx, bund, ns, id) ++} ++ + func (r *Runtime) terminate(ctx context.Context, bundle *bundle, ns, id string) error { + rt, err := r.getRuntime(ctx, ns, id) + if err != nil { +diff --git a/services/tasks/local.go b/services/tasks/local.go +index 96ed36c..7f60d25 100644 +--- a/services/tasks/local.go ++++ b/services/tasks/local.go +@@ -43,6 +43,7 @@ import ( + "github.com/containerd/containerd/plugin" + "github.com/containerd/containerd/runtime" + "github.com/containerd/containerd/runtime/linux/runctypes" ++ "github.com/containerd/containerd/runtime/v1/linux" + "github.com/containerd/containerd/runtime/v2/runc/options" + "github.com/containerd/containerd/services" + "github.com/containerd/typeurl" +@@ -441,11 +442,34 @@ func (l *local) Kill(ctx context.Context, r *api.KillRequest, _ ...grpc.CallOpti + } + } + if err := p.Kill(ctx, r.Signal, r.All); err != nil { ++ if (r.Signal == 9 || r.Signal == 15) && strings.Contains(err.Error(), "ttrpc: client shutting down") { ++ // not sure under what conditions will cause such ttrpc error. since the error has ++ // happened, we have to make up the clean up work to avoid container residue. ++ cleanErr := l.cleanupResidualContainer(ctx, r, t.Namespace()) ++ log.G(ctx).WithField("clean error", cleanErr).Warnf( ++ "previous actions might encounter failure, try clean up the dead container.") ++ } + return nil, errdefs.ToGRPC(err) + } + return empty, nil + } + ++func (l *local) cleanupResidualContainer(ctx context.Context, r *api.KillRequest, namespace string) error { ++ container, err := l.getContainer(ctx, r.ContainerID) ++ if err != nil { ++ return fmt.Errorf("failed to get container %s, %v", r.ContainerID, err) ++ } ++ rt, err := l.getRuntime(container.Runtime.Name) ++ if err != nil { ++ return fmt.Errorf("failed to get runtime %s, %v", container.Runtime.Name, err) ++ } ++ lRuntime, ok := rt.(*linux.Runtime) ++ if !ok { ++ return fmt.Errorf("no clean work for runtime other than linux ones") ++ } ++ return lRuntime.CleanupAfterDeadShim(ctx, namespace, r.ContainerID) ++} ++ + func (l *local) ListPids(ctx context.Context, r *api.ListPidsRequest, _ ...grpc.CallOption) (*api.ListPidsResponse, error) { + t, err := l.getTask(ctx, r.ContainerID) + if err != nil { +-- +2.33.0 + diff --git a/patch/0017-containerd-add-LLT-for-containerd-shim-timeout-requi.patch b/patch/0017-containerd-add-LLT-for-containerd-shim-timeout-requi.patch new file mode 100644 index 0000000..1671a92 --- /dev/null +++ b/patch/0017-containerd-add-LLT-for-containerd-shim-timeout-requi.patch @@ -0,0 +1,113 @@ +From 454edc405b301dad778114c5669db618d6c0770e Mon Sep 17 00:00:00 2001 +From: xiadanni1 +Date: Tue, 3 Mar 2020 06:29:56 +0800 +Subject: [PATCH] containerd:add LLT for containerd-shim timeout requirement + +reason:add LLT testcases for containerd-shim timeout requirement. + +Change-Id: If422542b72f3550d86a6eba6b19d0cdea2d2a660 +Signed-off-by: xiadanni1 +--- + .../containerd/go-runc/runc_test.go | 90 +++++++++++++++++++ + 1 file changed, 90 insertions(+) + create mode 100644 vendor/github.com/containerd/go-runc/runc_test.go + +diff --git a/vendor/github.com/containerd/go-runc/runc_test.go b/vendor/github.com/containerd/go-runc/runc_test.go +new file mode 100644 +index 0000000..8f9212d +--- /dev/null ++++ b/vendor/github.com/containerd/go-runc/runc_test.go +@@ -0,0 +1,90 @@ ++package runc ++ ++import ( ++ "context" ++ "os" ++ "os/exec" ++ "testing" ++ ++ specs "github.com/opencontainers/runtime-spec/specs-go" ++) ++ ++func TestRuncCommandInvoke(t *testing.T) { ++ rc := &Runc{ ++ Command: "/bin/true", ++ } ++ ctx := context.Background() ++ id := "containerid" ++ bundle := "bundlepath" ++ ++ createOpts := CreateOpts{} ++ err := rc.Create(ctx, id, bundle, &createOpts) ++ if err != nil { ++ t.Errorf("Create command invoke error, %v", err) ++ } ++ ++ err = rc.Start(ctx, id) ++ if err != nil { ++ t.Errorf("Start command invoke error, %v", err) ++ } ++ ++ execSpec := specs.Process{} ++ nullIO, _ := NewNullIO() ++ execOpts := ExecOpts{IO: nullIO} ++ err = rc.Exec(ctx, id, execSpec, &execOpts) ++ if err != nil { ++ t.Errorf("Exec command invoke error, %v", err) ++ } ++ ++ execOptsnil := ExecOpts{} ++ err = rc.Exec(ctx, id, execSpec, &execOptsnil) ++ if err != nil { ++ t.Errorf("Exec command invoke error, %v", err) ++ } ++ ++ killOpts := KillOpts{} ++ err = rc.Kill(ctx, id, 9, &killOpts) ++ if err != nil { ++ t.Errorf("Kill command invoke error, %v", err) ++ } ++ ++ resource := specs.LinuxResources{} ++ err = rc.Update(ctx, id, &resource) ++ if err != nil { ++ t.Errorf("Update command invoke error, %v", err) ++ } ++ ++ _, err = rc.State(ctx, id) ++ if err == nil { ++ t.Errorf("State command invoke should return error") ++ } ++ ++ _, err = rc.Ps(ctx, id) ++ if err == nil { ++ t.Errorf("Ps command invoke should return error") ++ } ++} ++ ++func TestRunOrErrorTimeout(t *testing.T) { ++ rc := &Runc{} ++ ++ cmd := exec.Cmd{Path: "/bin/bash2"} ++ cmd.Stdout = os.Stdout ++ err := rc.runOrErrorTimeout(&cmd, 10) ++ if err == nil { ++ t.Errorf("runOrErrorTimeout should return error") ++ } ++ ++ cmd = exec.Cmd{Path: "/usr/bin/sleep", Args: []string{"2"}} ++ cmd.Stdout = os.Stdout ++ rc.runOrErrorTimeout(&cmd, 1) ++ if err == nil { ++ t.Errorf("runOrErrorTimeout should return error") ++ } ++ ++ cmd = exec.Cmd{Path: "/usr/bin/sleep", Args: []string{"2"}} ++ rc.runOrErrorTimeout(&cmd, 1) ++ if err == nil { ++ t.Errorf("runOrErrorTimeout should return error") ++ } ++} +-- +2.33.0 + diff --git a/patch/0018-containerd-save-dumpstack-to-file.patch b/patch/0018-containerd-save-dumpstack-to-file.patch new file mode 100644 index 0000000..da02bbd --- /dev/null +++ b/patch/0018-containerd-save-dumpstack-to-file.patch @@ -0,0 +1,40 @@ +From 67bf28cc777513cb52c39d0e7961420c1690173b Mon Sep 17 00:00:00 2001 +From: xiadanni1 +Date: Tue, 3 Mar 2020 09:01:22 +0800 +Subject: [PATCH] containerd:save dumpstack to file + +Change-Id: I54a41a13b4523de279337a9ff208347859c0fb4d +Signed-off-by: xiadanni1 +--- + cmd/containerd/command/main.go | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/cmd/containerd/command/main.go b/cmd/containerd/command/main.go +index 9897dff..3d54b50 100644 +--- a/cmd/containerd/command/main.go ++++ b/cmd/containerd/command/main.go +@@ -20,11 +20,13 @@ import ( + gocontext "context" + "fmt" + "io" ++ "io/ioutil" + "net" + "os" + "os/signal" + "path/filepath" + "runtime" ++ "strings" + "time" + + "github.com/containerd/containerd/defaults" +@@ -376,6 +378,7 @@ func dumpStacks(writeToFile bool) { + bufferLen *= 2 + } + buf = buf[:stackSize] ++ logrus.Devour(ioutil.WriteFile(fmt.Sprintf("/var/run/docker/containerd/containerd-stacks-%s.log", strings.Replace(time.Now().Format(time.RFC3339), ":", "", -1)), buf, 0600)) + log.L.Infof("=== BEGIN goroutine stack dump ===\n%s\n=== END goroutine stack dump ===", buf) + + if writeToFile { +-- +2.33.0 + diff --git a/patch/0019-containerd-add-timeout-for-delete-command.patch b/patch/0019-containerd-add-timeout-for-delete-command.patch new file mode 100644 index 0000000..b360f9f --- /dev/null +++ b/patch/0019-containerd-add-timeout-for-delete-command.patch @@ -0,0 +1,135 @@ +From fea270498ee58eb1a3632f564d4f3b72e9e713e7 Mon Sep 17 00:00:00 2001 +From: xiadanni +Date: Tue, 3 Mar 2020 06:31:18 +0800 +Subject: [PATCH] containerd:add timeout for delete command + +Change-Id: I620d2f19a8ac9086b5c83792a6fe49b0389da87d +Signed-off-by: xiadanni1 +--- + runtime/v1/linux/task.go | 2 +- + sys/reaper/reaper_unix.go | 18 ----------- + .../github.com/containerd/go-runc/monitor.go | 32 +++++++++++++++++-- + vendor/github.com/containerd/go-runc/runc.go | 3 +- + 4 files changed, 33 insertions(+), 22 deletions(-) + +diff --git a/runtime/v1/linux/task.go b/runtime/v1/linux/task.go +index b013466..4145846 100644 +--- a/runtime/v1/linux/task.go ++++ b/runtime/v1/linux/task.go +@@ -98,7 +98,7 @@ func (t *Task) PID(_ context.Context) (uint32, error) { + func (t *Task) delete(ctx context.Context, force bool, pid uint32) (*runtime.Exit, error) { + rsp, shimErr := t.shim.Delete(ctx, empty) + if shimErr != nil { +- log.G(ctx).WithError(shimErr).Error("failed to delete container, force=%t", force) ++ log.G(ctx).WithError(shimErr).Errorf("failed to delete container, force=%t", force) + } + t.tasks.Delete(ctx, t.id) + if err := t.shim.KillShim(ctx); err != nil { +diff --git a/sys/reaper/reaper_unix.go b/sys/reaper/reaper_unix.go +index 007e1d0..61c2e8a 100644 +--- a/sys/reaper/reaper_unix.go ++++ b/sys/reaper/reaper_unix.go +@@ -22,10 +22,6 @@ package reaper + import ( + "errors" + "fmt" +- "io/ioutil" +- "path/filepath" +- "strconv" +- "strings" + "sync" + "syscall" + "time" +@@ -287,17 +283,3 @@ func exitStatus(status unix.WaitStatus) int { + } + return status.ExitStatus() + } +- +-func SameProcess(cmd *exec.Cmd, pid int) bool { +- bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline")) +- if err != nil { +- return false +- } +- for i := range bytes { +- if bytes[i] == 0 { +- bytes[i] = 32 +- } +- } +- cmdline := string(bytes) +- return strings.EqualFold(cmdline, strings.Join(cmd.Args, " ")+" ") +-} +diff --git a/vendor/github.com/containerd/go-runc/monitor.go b/vendor/github.com/containerd/go-runc/monitor.go +index 9756491..73c8ac1 100644 +--- a/vendor/github.com/containerd/go-runc/monitor.go ++++ b/vendor/github.com/containerd/go-runc/monitor.go +@@ -17,9 +17,16 @@ + package runc + + import ( ++ "io/ioutil" + "os/exec" ++ "path/filepath" ++ "strconv" ++ "strings" + "syscall" + "time" ++ ++ "github.com/pkg/errors" ++ "github.com/sirupsen/logrus" + ) + + var Monitor ProcessMonitor = &defaultMonitor{} +@@ -77,6 +84,27 @@ func (m *defaultMonitor) Wait(c *exec.Cmd, ec chan Exit) (int, error) { + } + + func (m *defaultMonitor) WaitTimeout(c *exec.Cmd, ec chan Exit, sec int64) (int, error) { +- e := <-ec +- return e.Status, nil ++ select { ++ case <-time.After(time.Duration(sec) * time.Second): ++ if SameProcess(c, c.Process.Pid) { ++ logrus.Devour(syscall.Kill(c.Process.Pid, syscall.SIGKILL)) ++ } ++ return 0, errors.Errorf("timeout %ds for cmd(pid=%d): %s, %s", sec, c.Process.Pid, c.Path, c.Args) ++ case e := <-ec: ++ return e.Status, nil ++ } ++} ++ ++func SameProcess(cmd *exec.Cmd, pid int) bool { ++ bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline")) ++ if err != nil { ++ return false ++ } ++ for i := range bytes { ++ if bytes[i] == 0 { ++ bytes[i] = 32 ++ } ++ } ++ cmdline := string(bytes) ++ return strings.EqualFold(cmdline, strings.Join(cmd.Args, " ")+" ") + } +diff --git a/vendor/github.com/containerd/go-runc/runc.go b/vendor/github.com/containerd/go-runc/runc.go +index ccf3d42..552515c 100644 +--- a/vendor/github.com/containerd/go-runc/runc.go ++++ b/vendor/github.com/containerd/go-runc/runc.go +@@ -57,6 +57,7 @@ const ( + defaultTimeout = 30 + startTimeout = 120 + updateTimeout = 60 ++ deleteTimeout = 120 + ) + + var ( +@@ -328,7 +329,7 @@ func (r *Runc) Delete(context context.Context, id string, opts *DeleteOpts) erro + if opts != nil { + args = append(args, opts.args()...) + } +- return r.runOrError(r.command(context, append(args, id)...)) ++ return r.runOrErrorTimeout(r.command(context, append(args, id)...), deleteTimeout) + } + + // KillOpts specifies options for killing a container and its processes +-- +2.33.0 + diff --git a/patch/0020-containerd-check-if-bundle-exists-before-create-bund.patch b/patch/0020-containerd-check-if-bundle-exists-before-create-bund.patch new file mode 100644 index 0000000..5181256 --- /dev/null +++ b/patch/0020-containerd-check-if-bundle-exists-before-create-bund.patch @@ -0,0 +1,66 @@ +From 9d29bd060a8a0fa5783d6bbaff6ce57326b2c065 Mon Sep 17 00:00:00 2001 +From: xiadanni1 +Date: Fri, 6 Nov 2020 10:19:26 +0800 +Subject: [PATCH] containerd: check if bundle exists before create bundle + +reason: If container starts following tightly the last stop, bundle +directory may be deleted by the not yet completed stop, which may cause +container start fail. So we add bundle check during start to avoid this, +if bundle exists, wait for it to clean up. + +Signed-off-by: xiadanni1 +--- + runtime/v1/linux/bundle.go | 17 ++++++++++++++++- + 1 file changed, 16 insertions(+), 1 deletion(-) + +diff --git a/runtime/v1/linux/bundle.go b/runtime/v1/linux/bundle.go +index b1830d0..d01d41b 100644 +--- a/runtime/v1/linux/bundle.go ++++ b/runtime/v1/linux/bundle.go +@@ -26,12 +26,14 @@ import ( + "fmt" + "os" + "path/filepath" ++ "time" + + "github.com/containerd/containerd/events/exchange" + "github.com/containerd/containerd/runtime/linux/runctypes" + "github.com/containerd/containerd/runtime/v1/shim" + "github.com/containerd/containerd/runtime/v1/shim/client" + "github.com/opencontainers/runtime-spec/specs-go" ++ "github.com/sirupsen/logrus" + ) + + // loadBundle loads an existing bundle from disk +@@ -49,6 +51,19 @@ func newBundle(id, path, workDir string, spec []byte) (b *bundle, err error) { + return nil, err + } + path = filepath.Join(path, id) ++ workDir = filepath.Join(workDir, id) ++ ++ for waitTime := 10 * time.Millisecond; ; waitTime *= 2 { ++ if _, err = os.Stat(workDir); err != nil { ++ break ++ } ++ logrus.Debugf("bundle-check: wait time %v", waitTime) ++ if waitTime > 2*time.Second { ++ logrus.Warnf("bundle-check: waiting cleanup bundle timeout, start anyway") ++ break ++ } ++ time.Sleep(waitTime) ++ } + if err := os.Mkdir(path, 0700); err != nil { + return nil, err + } +@@ -60,7 +75,7 @@ func newBundle(id, path, workDir string, spec []byte) (b *bundle, err error) { + if err := prepareBundleDirectoryPermissions(path, spec); err != nil { + return nil, err + } +- workDir = filepath.Join(workDir, id) ++ + if err := os.MkdirAll(workDir, 0711); err != nil { + return nil, err + } +-- +2.33.0 + diff --git a/patch/0021-containerd-kill-container-init-process-if-runc-start.patch b/patch/0021-containerd-kill-container-init-process-if-runc-start.patch new file mode 100644 index 0000000..ce36533 --- /dev/null +++ b/patch/0021-containerd-kill-container-init-process-if-runc-start.patch @@ -0,0 +1,104 @@ +From 4cb4c0ce6500539b4c6e4bf83a4ed1510d698338 Mon Sep 17 00:00:00 2001 +From: xiadanni +Date: Mon, 1 Feb 2021 19:36:53 +0800 +Subject: [PATCH] containerd: kill container init process if runc start returns + error + +Signed-off-by: xiadanni +--- + pkg/process/init.go | 4 +++ + utils/utils.go | 60 +++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 64 insertions(+) + create mode 100644 utils/utils.go + +diff --git a/pkg/process/init.go b/pkg/process/init.go +index 26aebdc..d373851 100644 +--- a/pkg/process/init.go ++++ b/pkg/process/init.go +@@ -34,6 +34,7 @@ import ( + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/pkg/stdio" ++ "github.com/containerd/containerd/utils" + "github.com/containerd/fifo" + runc "github.com/containerd/go-runc" + google_protobuf "github.com/gogo/protobuf/types" +@@ -262,6 +263,9 @@ func (p *Init) Start(ctx context.Context) error { + + func (p *Init) start(ctx context.Context) error { + err := p.runtime.Start(ctx, p.id) ++ if err != nil { ++ utils.KillInitProcess(p.id, p.pid) ++ } + return p.runtimeError(err, "OCI runtime start failed") + } + +diff --git a/utils/utils.go b/utils/utils.go +new file mode 100644 +index 0000000..772b15d +--- /dev/null ++++ b/utils/utils.go +@@ -0,0 +1,60 @@ ++/* ++Use of this source code is governed by Apache-2.0 ++license that can be found in the LICENSE file. ++Description: common functions ++Author: Danni Xia ++Create: 2021-01-30 ++*/ ++ ++package utils ++ ++import ( ++ "encoding/json" ++ "io/ioutil" ++ "path/filepath" ++ "strconv" ++ "strings" ++ "syscall" ++ ++ "github.com/sirupsen/logrus" ++) ++ ++type baseState struct { ++ InitProcessStartTime string `json:"init_process_start"` ++} ++ ++func KillInitProcess(cid string, pid int) { ++ if IsInitProcess(cid, pid) { ++ syscall.Kill(pid, syscall.SIGKILL) ++ } ++} ++ ++func IsInitProcess(cid string, pid int) bool { ++ stateBytes, err1 := ioutil.ReadFile(filepath.Join("/var/run/docker/runtime-runc/moby", cid, "state.json")) ++ statBytes, err2 := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) ++ if err1 != nil || err2 != nil { ++ return true ++ } ++ ++ s := strings.Split(string(statBytes), ")") ++ if len(s) < 1 { ++ return true ++ } ++ ++ statFields := strings.Split(strings.TrimSpace(s[len(s)-1]), " ") ++ if len(statFields) < 20 { ++ return true ++ } ++ ++ var baseState baseState ++ if err := json.Unmarshal(stateBytes, &baseState); err != nil { ++ return true ++ } ++ ++ if baseState.InitProcessStartTime == statFields[19] { ++ return true ++ } ++ ++ logrus.Warnf("process(pid:%d, start time:%s) is not container %s init process", pid, statFields[19], cid) ++ return false ++} +-- +2.33.0 + diff --git a/patch/0022-containerd-fix-containerd-shim-residual-when-kill-co.patch b/patch/0022-containerd-fix-containerd-shim-residual-when-kill-co.patch new file mode 100644 index 0000000..4a37f16 --- /dev/null +++ b/patch/0022-containerd-fix-containerd-shim-residual-when-kill-co.patch @@ -0,0 +1,45 @@ +From 246fa098a96a14321da47d5df491ead7800b9c92 Mon Sep 17 00:00:00 2001 +From: xiadanni +Date: Fri, 19 Feb 2021 16:37:48 +0800 +Subject: [PATCH] containerd: fix containerd-shim residual when kill containerd + during starting container + +after shim process started, containerd will write shim socket address +to address file, but if containerd is killed before write file, new +containerd process could not get shim socket address, and will not +kill it even if that shim could not work. +so we write address file ahead of starting shim process. + +Signed-off-by: xiadanni +--- + runtime/v1/shim/client/client.go | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/runtime/v1/shim/client/client.go b/runtime/v1/shim/client/client.go +index fa145c8..64a9aa2 100644 +--- a/runtime/v1/shim/client/client.go ++++ b/runtime/v1/shim/client/client.go +@@ -93,6 +93,10 @@ func WithStart(binary, address, daemonAddress, cgroup string, debug bool, exitHa + go io.Copy(stdoutCopy, stdoutLog) + go io.Copy(stderrCopy, stderrLog) + ++ if err := writeFile(filepath.Join(config.Path, "address"), address); err != nil { ++ return nil, nil, err ++ } ++ + cmd, err := newCommand(binary, daemonAddress, debug, config, f, stdoutLog, stderrLog) + if err != nil { + return nil, nil, err +@@ -123,9 +127,6 @@ func WithStart(binary, address, daemonAddress, cgroup string, debug bool, exitHa + "debug": debug, + }).Infof("shim %s started", binary) + +- if err := writeFile(filepath.Join(config.Path, "address"), address); err != nil { +- return nil, nil, err +- } + if err := writeFile(filepath.Join(config.Path, "shim.pid"), strconv.Itoa(cmd.Process.Pid)); err != nil { + return nil, nil, err + } +-- +2.33.0 + diff --git a/patch/0023-containerd-fix-exec-event-missing-due-to-pid-reuse.patch b/patch/0023-containerd-fix-exec-event-missing-due-to-pid-reuse.patch new file mode 100644 index 0000000..692e21f --- /dev/null +++ b/patch/0023-containerd-fix-exec-event-missing-due-to-pid-reuse.patch @@ -0,0 +1,156 @@ +From fab5e09d4bdcce7748e036a8820078d826d00d49 Mon Sep 17 00:00:00 2001 +From: jingrui +Date: Sat, 20 Feb 2021 09:06:22 +0800 +Subject: [PATCH] containerd: fix exec event missing due to pid reuse + +When many exec request exit at nearly sametime, the Exit can match with +wrong process and return directly, the event for right process will lost +in this case. + +time="2021-02-19T21:10:12.250841280+08:00" level=info msg=event Pid=11623 containerID=a32a1b7923db55ebdc7483e2b9cd986e5efc750b989ad3507eb866835e8e37f4 execID=0b412ecaed98f9ea71168599a9363b8aa3b047187eadaa74973bb6c63a66118d module=libcontainerd namespace=moby topic=/tasks/exec-started +time="2021-02-19T21:10:12+08:00" level=info msg="try publish event(1) /tasks/exit &TaskExit{ContainerID:a32a1b7923db55ebdc7483e2b9cd986e5efc750b989ad3507eb866835e8e37f4,ID:0b412ecaed98f9ea71168599a9363b8aa3b047187eadaa74973bb6c63a66118d,Pid:11623,ExitStatus:0,ExitedAt:2021-02-19 21:10:12.27697416 +0800 CST m=+1893.164673481,} " +time="2021-02-19T21:11:02.944643980+08:00" level=debug msg="starting exec command 64cd335311e9b3c1c11e7360a374e3218efeb02e6578d7bc0811bad3f1820e16 in container a32a1b7923db55ebdc7483e2b9cd986e5efc750b989ad3507eb866835e8e37f4" +time="2021-02-19T21:11:06.201162360+08:00" level=debug msg="event published" ns=moby topic="/tasks/exec-started" type=containerd.events.TaskExecStarted +time="2021-02-19T21:11:57.961615320+08:00" level=warning msg="Ignoring Exit Event, no such exec command found" container=a32a1b7923db55ebdc7483e2b9cd986e5efc750b989ad3507eb866835e8e37f4 exec-id=0b412ecaed98f9ea71168599a9363b8aa3b047187eadaa74973bb6c63a66118d exec-pid=11623 + +From logs above, execID=0b412ecae with Pid=11623 exit and event +published, but new exec execID=64cd335 command reuse the Pid, but Exit +event still match previous execID=0b412ecae. so exit event for +execID=64cd335 will lost. + +Change-Id: If591a282a1cc0305758130a936ee8b92c88acc6c +Signed-off-by: jingrui +--- + pkg/process/exec.go | 4 ++ + runtime/v1/shim/service.go | 92 +++++++++++++++++++------------------- + 2 files changed, 50 insertions(+), 46 deletions(-) + +diff --git a/pkg/process/exec.go b/pkg/process/exec.go +index dcd7592..9916042 100644 +--- a/pkg/process/exec.go ++++ b/pkg/process/exec.go +@@ -90,6 +90,10 @@ func (e *execProcess) SetExited(status int) { + defer e.mu.Unlock() + + e.execState.SetExited(status) ++ ++ e.pid.Lock() ++ e.pid.pid = -1 ++ e.pid.Unlock() + } + + func (e *execProcess) setExited(status int) { +diff --git a/runtime/v1/shim/service.go b/runtime/v1/shim/service.go +index 166b866..dd1a935 100644 +--- a/runtime/v1/shim/service.go ++++ b/runtime/v1/shim/service.go +@@ -548,60 +548,60 @@ func (s *Service) processExits() { + } + + func (s *Service) checkProcesses(e runc.Exit) { +- var p process.Process + s.mu.Lock() +- for _, proc := range s.processes { +- if proc.Pid() == e.Pid { +- p = proc +- break +- } +- } +- s.mu.Unlock() +- if p == nil { +- log.G(s.context).Debugf("process with id:%d wasn't found", e.Pid) +- return +- } ++ defer s.mu.Unlock() ++ ++ match := 0 + shouldKillAll, bundleSpec := shouldKillAllOnExit(s.context, s.bundle) +- if ip, ok := p.(*process.Init); ok { +- ns := filepath.Base(filepath.Dir(ip.Bundle)) +- events.ExitAddFile(ns, events.ExitFile(s.id, uint32(e.Pid), uint32(e.Status)), "init exited") +- events.InitExitWrite(ip.Bundle, e.Pid) +- go func() { +- t := 30 +- defer func() { +- time.Sleep(time.Duration(t) * time.Second) +- os.Exit(0) +- }() +- if bundleSpec.Hooks == nil { +- return ++ ++ for _, p := range s.processes { ++ if p.Pid() == e.Pid { ++ match++ ++ if match > 1 { ++ logrus.Warnf("exit for pid=%d match %d processes", e.Pid, match) + } +- postStopHooks := bundleSpec.Hooks.Poststop +- for _, postStopHook := range postStopHooks { +- hookTimeout := postStopHook.Timeout +- if hookTimeout == nil { +- t += 120 +- } else { +- t += *hookTimeout ++ if ip, ok := p.(*process.Init); ok { ++ ns := filepath.Base(filepath.Dir(ip.Bundle)) ++ events.ExitAddFile(ns, events.ExitFile(s.id, uint32(e.Pid), uint32(e.Status)), "init exited") ++ events.InitExitWrite(ip.Bundle, e.Pid) ++ go func() { ++ t := 30 ++ defer func() { ++ time.Sleep(time.Duration(t) * time.Second) ++ os.Exit(0) ++ }() ++ if bundleSpec.Hooks == nil { ++ return ++ } ++ postStopHooks := bundleSpec.Hooks.Poststop ++ for _, postStopHook := range postStopHooks { ++ hookTimeout := postStopHook.Timeout ++ if hookTimeout == nil { ++ t += 120 ++ } else { ++ t += *hookTimeout ++ } ++ } ++ }() ++ // Ensure all children are killed ++ if shouldKillAll { ++ if err := ip.KillAll(s.context); err != nil { ++ log.G(s.context).WithError(err).WithField("id", ip.ID()). ++ Error("failed to kill init's children") ++ } + } + } +- }() +- // Ensure all children are killed +- if shouldKillAll { +- if err := ip.KillAll(s.context); err != nil { +- log.G(s.context).WithError(err).WithField("id", ip.ID()). +- Error("failed to kill init's children") ++ ++ p.SetExited(e.Status) ++ s.events <- &eventstypes.TaskExit{ ++ ContainerID: s.id, ++ ID: p.ID(), ++ Pid: uint32(e.Pid), ++ ExitStatus: uint32(e.Status), ++ ExitedAt: p.ExitedAt(), + } + } + } +- +- p.SetExited(e.Status) +- s.events <- &eventstypes.TaskExit{ +- ContainerID: s.id, +- ID: p.ID(), +- Pid: uint32(e.Pid), +- ExitStatus: uint32(e.Status), +- ExitedAt: p.ExitedAt(), +- } + } + + func shouldKillAllOnExit(ctx context.Context, bundlePath string) (bool, specs.Spec) { +-- +2.33.0 + diff --git a/patch/0024-containerd-fix-dm-left-when-pause-contaienr-and-kill.patch b/patch/0024-containerd-fix-dm-left-when-pause-contaienr-and-kill.patch new file mode 100644 index 0000000..1f80391 --- /dev/null +++ b/patch/0024-containerd-fix-dm-left-when-pause-contaienr-and-kill.patch @@ -0,0 +1,35 @@ +From 2a40033d0fa4ae8cf843631c25562163edffc02d Mon Sep 17 00:00:00 2001 +From: chenjiankun +Date: Mon, 19 Apr 2021 17:08:09 +0800 +Subject: [PATCH] containerd: fix dm left when pause contaienr and kill shim + + +when shim process be killed, we will delete the runtime, but if the +status is paused, it can't be delete. So we need to resume the shim +process before delete it. +--- + runtime/v1/linux/runtime.go | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/runtime/v1/linux/runtime.go b/runtime/v1/linux/runtime.go +index fa03e5c..18a0a40 100644 +--- a/runtime/v1/linux/runtime.go ++++ b/runtime/v1/linux/runtime.go +@@ -572,6 +572,14 @@ func (r *Runtime) terminate(ctx context.Context, bundle *bundle, ns, id string) + if err != nil { + return err + } ++ state, err := rt.State(ctx, id) ++ if err == nil && state.Status == "paused" { ++ logrus.Warnf("container %s status is paused, try to resume before delete", id) ++ err := rt.Resume(ctx, id) ++ if err != nil { ++ log.G(ctx).WithError(err).Errorf("runtime resume %s error", id) ++ } ++ } + if err := rt.Delete(ctx, id, &runc.DeleteOpts{ + Force: true, + }); err != nil { +-- +2.33.0 + diff --git a/patch/0025-containerd-drop-opt-package.patch b/patch/0025-containerd-drop-opt-package.patch new file mode 100644 index 0000000..80ec645 --- /dev/null +++ b/patch/0025-containerd-drop-opt-package.patch @@ -0,0 +1,25 @@ +From 4fedcedb0c8cc293ee45959d64d68377b720b8fa Mon Sep 17 00:00:00 2001 +From: xiadanni +Date: Thu, 5 Aug 2021 15:24:21 +0800 +Subject: [PATCH] containerd: drop opt package + +Signed-off-by: xiadanni +--- + cmd/containerd/builtins.go | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/cmd/containerd/builtins.go b/cmd/containerd/builtins.go +index 8c6f1fe..5c67768 100644 +--- a/cmd/containerd/builtins.go ++++ b/cmd/containerd/builtins.go +@@ -32,7 +32,6 @@ import ( + _ "github.com/containerd/containerd/services/introspection" + _ "github.com/containerd/containerd/services/leases" + _ "github.com/containerd/containerd/services/namespaces" +- _ "github.com/containerd/containerd/services/opt" + _ "github.com/containerd/containerd/services/snapshots" + _ "github.com/containerd/containerd/services/tasks" + _ "github.com/containerd/containerd/services/version" +-- +2.33.0 + diff --git a/patch/0026-containerd-fix-race-access-for-mobySubcribed.patch b/patch/0026-containerd-fix-race-access-for-mobySubcribed.patch new file mode 100644 index 0000000..77dbbe0 --- /dev/null +++ b/patch/0026-containerd-fix-race-access-for-mobySubcribed.patch @@ -0,0 +1,62 @@ +From 23c28aca8fbb161c69836b76d03a826fb339421b Mon Sep 17 00:00:00 2001 +From: zhongjiawei +Date: Thu, 10 Aug 2023 20:49:32 +0800 +Subject: [PATCH] containerd:fix race access for mobySubcribed + +Signed-off-by: zhongjiawei +--- + events/exchange/exchange.go | 7 ++++--- + runtime/v1/shim/client/client.go | 3 +++ + 2 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/events/exchange/exchange.go b/events/exchange/exchange.go +index 162e7be..0c2337f 100644 +--- a/events/exchange/exchange.go ++++ b/events/exchange/exchange.go +@@ -20,6 +20,7 @@ import ( + "context" + "fmt" + "strings" ++ "sync/atomic" + "time" + + "github.com/containerd/containerd/errdefs" +@@ -49,10 +50,10 @@ func NewExchange() *Exchange { + var _ events.Publisher = &Exchange{} + var _ events.Forwarder = &Exchange{} + var _ events.Subscriber = &Exchange{} +-var mobySubcribed = false ++var mobySubcribed = int32(0) + + func MobySubscribed() bool { +- return mobySubcribed ++ return atomic.LoadInt32(&mobySubcribed) == 1 + } + + // Forward accepts an envelope to be directly distributed on the exchange. +@@ -170,7 +171,7 @@ func (e *Exchange) Subscribe(ctx context.Context, fs ...string) (ch <-chan *even + for _, s := range fs { + if !MobySubscribed() && s == "namespace==moby,topic~=|^/tasks/|" { + queue.Namespace = "moby" +- mobySubcribed = true ++ atomic.StoreInt32(&mobySubcribed, 1) + } + } + +diff --git a/runtime/v1/shim/client/client.go b/runtime/v1/shim/client/client.go +index 64a9aa2..965a5cf 100644 +--- a/runtime/v1/shim/client/client.go ++++ b/runtime/v1/shim/client/client.go +@@ -70,6 +70,9 @@ func WithStart(binary, address, daemonAddress, cgroup string, debug bool, exitHa + + f, err := socket.File() + if err != nil { ++ if err1 := RemoveSocket(address); err1 != nil { ++ logrus.Warningf("failed to remove socket %s: %w", address, err1) ++ } + return nil, nil, fmt.Errorf("failed to get fd for socket %s: %w", address, err) + } + defer f.Close() +-- +2.33.0 + diff --git a/series.conf b/series.conf index 459d06a..f09db8d 100644 --- a/series.conf +++ b/series.conf @@ -1 +1,26 @@ -patch/0001-containerd-add-check-in-spec.patch +patch/0001-containerd-event-resend-exit-event-when-detect-containerd-resta.patch +patch/0002-containerd-cleanup-container-when-containerd-dockerd.patch +patch/0003-containerd-cleanup-residual-runc-and-files-force.patch +patch/0004-containerd-shim-Dump-log-to-file-when-docker-receive.patch +patch/0005-containerd-check-shim-alive-when-containerd-is-resta.patch +patch/0006-containerd-Makefile-modify.patch +patch/0007-containerd-set-create-and-exec-timeout-to-avild-bloc.patch +patch/0008-containerd-skip-load-task-in-creating-and-optimize-i.patch +patch/0009-containerd-support-kill-D-state-container.patch +patch/0010-containerd-add-shim-exit-when-bundle-dir-does-not-ex.patch +patch/0011-containerd-change-tmpfile-directory-when-exec.patch +patch/0012-containerd-stw-gc-sweep-for-arm64.patch +patch/0013-containerd-modify-shim-initiative-exit-time-for-post.patch +patch/0014-containerd-wrap-and-process-return-errors.patch +patch/0015-containerd-add-timeout-for-shim.patch +patch/0016-containerd-clean-up-residual-container-after-shim-ab.patch +patch/0017-containerd-add-LLT-for-containerd-shim-timeout-requi.patch +patch/0018-containerd-save-dumpstack-to-file.patch +patch/0019-containerd-add-timeout-for-delete-command.patch +patch/0020-containerd-check-if-bundle-exists-before-create-bund.patch +patch/0021-containerd-kill-container-init-process-if-runc-start.patch +patch/0022-containerd-fix-containerd-shim-residual-when-kill-co.patch +patch/0023-containerd-fix-exec-event-missing-due-to-pid-reuse.patch +patch/0024-containerd-fix-dm-left-when-pause-contaienr-and-kill.patch +patch/0025-containerd-drop-opt-package.patch +patch/0026-containerd-fix-race-access-for-mobySubcribed.patch -- Gitee