From 301462fde4899cd4ac695ec43c05b2ac523e2ac0 Mon Sep 17 00:00:00 2001 From: zhangxiaoyu Date: Fri, 15 Jul 2022 17:14:11 +0800 Subject: [PATCH] refactor patch code of utils commands and so on Signed-off-by: zhangxiaoyu --- 0001-huawei-adapt-to-huawei-4.0.3.patch | 22319 ---------------- ...tch-code-of-utils-commands-and-so-on.patch | 2340 ++ 0002-add-mount-label-for-rootfs.patch | 1214 - 0003-format-code-and-verify-mount-mode.patch | 423 - ...nition-of-the-thread-attributes-obje.patch | 162 - ...ug-caused-by-fstype-being-NULL-durin.patch | 65 - ...-catch-signal-SIGTERM-in-lxc-monitor.patch | 40 - ...e-instead-of-security_context_t-beca.patch | 27 - ...ss-correct-mount-dir-as-root-to-hook.patch | 26 - ...refact-cgroup-manager-to-single-file.patch | 4416 --- ...-adjust-log-level-from-error-to-warn.patch | 76 - ...dd-make-private-for-root.path-parent.patch | 28 - ...ssible-to-bind-mount-proc-and-sys-fs.patch | 204 - ...nix-domain-sockets-instead-of-abstra.patch | 401 - 0014-api-add-get-container-metrics-api.patch | 266 - ...olution-optimization-and-enhancement.patch | 92 - ...d-using-void-pointers-in-caclulation.patch | 40 - ...ix-compilation-errors-without-libcap.patch | 32 - ...ix-io-data-miss-when-exec-with-pipes.patch | 132 - ...otal_inactive_file-metric-for-memory.patch | 39 - 0020-support-cgroup-v2.patch | 582 - 0021-support-isula-exec-workdir.patch | 90 - ...or-message-if-process-workdir-failed.patch | 32 - 0023-log-support-long-syslog-tag.patch | 32 - ...-adjust-log-level-from-error-to-warn.patch | 68 - ...len-first-and-malloc-read-buff-by-le.patch | 160 - ...oredump-when-cgroup-get-return-error.patch | 57 - 0027-add-help-for-new-arguments.patch | 70 - ...omp-init-and-destroy-notifier.cookie.patch | 37 - ...st-use-origin-loop-if-do-not-have-io.patch | 26 - 0030-conf-fix-a-memory-leak.patch | 48 - ...fix-lsm_se_mount_context-memory-leak.patch | 25 - 0032-disable-lxc_keep-with-oci-image.patch | 169 - ...at-the-idmap-pointer-itself-is-freed.patch | 71 - ...ng-fix-cgroup-attach-cgroup-creation.patch | 74 - 0035-adapt-upstream-compiler-settings.patch | 29 - 0036-compile-in-android-env.patch | 200 - 0037-fix-always-print-and-temp-len.patch | 39 - ...ust-print-error-when-new-lock-failed.patch | 30 - 0039-fix-bug-of-memory-free.patch | 98 - ...ay-to-convert-selinux-label-to-shared.path | 107 - ...free-the-pointer-returned-by-dirname.patch | 38 - ...d-x-permission-when-create-directory.patch | 26 - ...layload-and-attach-cgroup-if-no-cont.patch | 128 - lxc.spec | 52 +- 45 files changed, 2348 insertions(+), 32282 deletions(-) delete mode 100644 0001-huawei-adapt-to-huawei-4.0.3.patch create mode 100644 0001-refactor-patch-code-of-utils-commands-and-so-on.patch delete mode 100644 0002-add-mount-label-for-rootfs.patch delete mode 100644 0003-format-code-and-verify-mount-mode.patch delete mode 100644 0004-Removes-the-definition-of-the-thread-attributes-obje.patch delete mode 100644 0005-solve-coredump-bug-caused-by-fstype-being-NULL-durin.patch delete mode 100644 0006-SIGTERM-do-not-catch-signal-SIGTERM-in-lxc-monitor.patch delete mode 100644 0007-Using-string-type-instead-of-security_context_t-beca.patch delete mode 100644 0008-hook-pass-correct-mount-dir-as-root-to-hook.patch delete mode 100644 0009-cgroup-refact-cgroup-manager-to-single-file.patch delete mode 100644 0010-cgfsng-adjust-log-level-from-error-to-warn.patch delete mode 100644 0011-rootfs-add-make-private-for-root.path-parent.patch delete mode 100644 0012-mount-make-possible-to-bind-mount-proc-and-sys-fs.patch delete mode 100644 0013-use-path-based-unix-domain-sockets-instead-of-abstra.patch delete mode 100644 0014-api-add-get-container-metrics-api.patch delete mode 100644 0015-Streaming-IO-solution-optimization-and-enhancement.patch delete mode 100644 0016-avoid-using-void-pointers-in-caclulation.patch delete mode 100644 0017-fix-compilation-errors-without-libcap.patch delete mode 100644 0018-IO-fix-io-data-miss-when-exec-with-pipes.patch delete mode 100644 0019-metrics-add-total_inactive_file-metric-for-memory.patch delete mode 100644 0020-support-cgroup-v2.patch delete mode 100644 0021-support-isula-exec-workdir.patch delete mode 100644 0022-print-error-message-if-process-workdir-failed.patch delete mode 100644 0023-log-support-long-syslog-tag.patch delete mode 100644 0024-log-adjust-log-level-from-error-to-warn.patch delete mode 100644 0025-get-cgroup-data-len-first-and-malloc-read-buff-by-le.patch delete mode 100644 0026-coredump-fix-coredump-when-cgroup-get-return-error.patch delete mode 100644 0027-add-help-for-new-arguments.patch delete mode 100644 0028-seccomp-init-and-destroy-notifier.cookie.patch delete mode 100644 0029-just-use-origin-loop-if-do-not-have-io.patch delete mode 100644 0030-conf-fix-a-memory-leak.patch delete mode 100644 0031-fix-lsm_se_mount_context-memory-leak.patch delete mode 100644 0032-disable-lxc_keep-with-oci-image.patch delete mode 100644 0033-conf-ensure-that-the-idmap-pointer-itself-is-freed.patch delete mode 100644 0034-cgfsng-fix-cgroup-attach-cgroup-creation.patch delete mode 100644 0035-adapt-upstream-compiler-settings.patch delete mode 100644 0036-compile-in-android-env.patch delete mode 100644 0037-fix-always-print-and-temp-len.patch delete mode 100644 0038-just-print-error-when-new-lock-failed.patch delete mode 100644 0039-fix-bug-of-memory-free.patch delete mode 100644 0040-refactor-the-way-to-convert-selinux-label-to-shared.path delete mode 100644 0041-do-not-free-the-pointer-returned-by-dirname.patch delete mode 100644 0042-add-x-permission-when-create-directory.patch delete mode 100644 0043-do-not-operate-playload-and-attach-cgroup-if-no-cont.patch diff --git a/0001-huawei-adapt-to-huawei-4.0.3.patch b/0001-huawei-adapt-to-huawei-4.0.3.patch deleted file mode 100644 index c75eba8..0000000 --- a/0001-huawei-adapt-to-huawei-4.0.3.patch +++ /dev/null @@ -1,22319 +0,0 @@ -From 8a62b519510080bb361cdd058d0e7a5edd955a95 Mon Sep 17 00:00:00 2001 -From: lifeng68 -Date: Wed, 15 Jul 2020 09:32:32 +0800 -Subject: [PATCH 01/10] huawei: adapt to huawei 4.0.3 - -Signed-off-by: lifeng68 ---- - CODING_STYLE.md | 13 +- - .../apparmor/abstractions/start-container.in | 2 - - config/init/common/lxc-net.in | 2 +- - config/templates/common.conf.in | 31 - - config/templates/userns.conf.in | 8 - - config/yum/lxc-patch.py | 1 + - configure.ac | 59 +- - doc/ja/lxc.container.conf.sgml.in | 20 +- - doc/lxc.container.conf.sgml.in | 18 +- - hooks/Makefile.am | 3 + - src/include/fexecve.c | 6 +- - src/include/openpty.c | 28 +- - src/include/openpty.h | 10 +- - src/lxc/Makefile.am | 47 +- - src/lxc/af_unix.c | 37 +- - src/lxc/af_unix.h | 34 +- - src/lxc/api_extensions.h | 1 - - src/lxc/attach.c | 543 +++- - src/lxc/attach.h | 6 + - src/lxc/attach_options.h | 21 +- - src/lxc/cgroups/cgfsng.c | 1363 +++++++-- - src/lxc/cgroups/cgroup.c | 2 +- - src/lxc/cgroups/cgroup.h | 27 +- - src/lxc/cgroups/cgroup2_devices.c | 41 +- - src/lxc/cmd/lxc-update-config.in | 2 +- - src/lxc/cmd/lxc_init.c | 2 +- - src/lxc/cmd/lxc_monitord.c | 2 +- - src/lxc/cmd/lxc_user_nic.c | 60 +- - src/lxc/cmd/lxc_usernsexec.c | 10 +- - src/lxc/commands.c | 293 +- - src/lxc/commands.h | 18 +- - src/lxc/commands_utils.c | 5 +- - src/lxc/compiler.h | 18 - - src/lxc/conf.c | 2517 ++++++++++++++--- - src/lxc/conf.h | 111 +- - src/lxc/confile.c | 645 ++++- - src/lxc/confile.h | 11 +- - src/lxc/confile_utils.c | 12 - - src/lxc/confile_utils.h | 10 +- - src/lxc/criu.c | 27 +- - src/lxc/exec_commands.c | 416 +++ - src/lxc/exec_commands.h | 73 + - src/lxc/execute.c | 23 +- - src/lxc/file_utils.h | 49 +- - src/lxc/initutils.c | 4 + - src/lxc/isulad_utils.c | 99 + - src/lxc/isulad_utils.h | 20 + - src/lxc/json/defs.c | 205 ++ - src/lxc/json/defs.h | 37 + - src/lxc/json/json_common.c | 1153 ++++++++ - src/lxc/json/json_common.h | 185 ++ - src/lxc/json/logger_json_file.c | 246 ++ - src/lxc/json/logger_json_file.h | 45 + - src/lxc/json/oci_runtime_hooks.c | 52 + - src/lxc/json/oci_runtime_hooks.h | 15 + - src/lxc/json/oci_runtime_spec.c | 195 ++ - src/lxc/json/oci_runtime_spec.h | 37 + - src/lxc/json/read-file.c | 95 + - src/lxc/json/read-file.h | 11 + - src/lxc/log.c | 72 +- - src/lxc/log.h | 23 +- - src/lxc/lsm/apparmor.c | 12 +- - src/lxc/lxc.h | 20 +- - src/lxc/lxccontainer.c | 674 ++++- - src/lxc/lxccontainer.h | 129 +- - src/lxc/lxclock.c | 27 + - src/lxc/lxclock.h | 4 + - src/lxc/macro.h | 17 - - src/lxc/mainloop.c | 15 +- - src/lxc/mainloop.h | 4 - - src/lxc/memory_utils.h | 8 +- - src/lxc/namespace.c | 27 + - src/lxc/namespace.h | 90 + - src/lxc/network.c | 105 +- - src/lxc/network.h | 4 +- - src/lxc/path.c | 655 +++++ - src/lxc/path.h | 65 + - src/lxc/process_utils.h | 290 -- - src/lxc/{process_utils.c => raw_syscalls.c} | 71 +- - src/lxc/raw_syscalls.h | 94 + - src/lxc/rexec.c | 14 +- - src/lxc/seccomp.c | 582 +++- - src/lxc/start.c | 1052 ++++++- - src/lxc/start.h | 43 +- - src/lxc/storage/block.c | 86 + - src/lxc/storage/block.h | 41 + - src/lxc/storage/btrfs.c | 11 + - src/lxc/storage/dir.c | 36 +- - src/lxc/storage/loop.c | 36 +- - src/lxc/storage/overlay.c | 8 + - src/lxc/storage/rsync.c | 8 +- - src/lxc/storage/storage.c | 29 +- - src/lxc/storage/storage_utils.c | 56 +- - src/lxc/storage/zfs.c | 15 +- - src/lxc/string_utils.c | 1 + - src/lxc/sync.h | 4 + - src/lxc/syscall_numbers.h | 112 +- - src/lxc/syscall_wrappers.h | 24 - - src/lxc/terminal.c | 1113 +++++++- - src/lxc/terminal.h | 67 +- - src/lxc/tools/arguments.h | 23 + - src/lxc/tools/lxc_attach.c | 399 ++- - src/lxc/tools/lxc_ls.c | 16 +- - src/lxc/tools/lxc_start.c | 95 + - src/lxc/utils.c | 297 +- - src/lxc/utils.h | 95 +- - src/lxc/uuid.c | 2 +- - src/tests/Makefile.am | 10 +- - src/tests/attach.c | 9 + - src/tests/console.c | 24 +- - src/tests/containertests.c | 2 +- - src/tests/lxc-test-no-new-privs | 6 +- - src/tests/lxc-test-usernsexec | 368 --- - src/tests/lxc_raw_clone.c | 2 +- - templates/lxc-oci.in | 3 +- - 115 files changed, 13657 insertions(+), 2464 deletions(-) - create mode 100644 src/lxc/exec_commands.c - create mode 100644 src/lxc/exec_commands.h - create mode 100644 src/lxc/isulad_utils.c - create mode 100644 src/lxc/isulad_utils.h - create mode 100644 src/lxc/json/defs.c - create mode 100644 src/lxc/json/defs.h - create mode 100755 src/lxc/json/json_common.c - create mode 100755 src/lxc/json/json_common.h - create mode 100644 src/lxc/json/logger_json_file.c - create mode 100644 src/lxc/json/logger_json_file.h - create mode 100644 src/lxc/json/oci_runtime_hooks.c - create mode 100644 src/lxc/json/oci_runtime_hooks.h - create mode 100644 src/lxc/json/oci_runtime_spec.c - create mode 100644 src/lxc/json/oci_runtime_spec.h - create mode 100644 src/lxc/json/read-file.c - create mode 100644 src/lxc/json/read-file.h - create mode 100644 src/lxc/path.c - create mode 100644 src/lxc/path.h - delete mode 100644 src/lxc/process_utils.h - rename src/lxc/{process_utils.c => raw_syscalls.c} (68%) - create mode 100644 src/lxc/raw_syscalls.h - create mode 100644 src/lxc/storage/block.c - create mode 100644 src/lxc/storage/block.h - delete mode 100755 src/tests/lxc-test-usernsexec - -diff --git a/CODING_STYLE.md b/CODING_STYLE.md -index bf8b304a5..6e2ad8562 100644 ---- a/CODING_STYLE.md -+++ b/CODING_STYLE.md -@@ -733,11 +733,11 @@ __do_closedir __attribute__((__cleanup__(__auto_closedir__))) - ``` - For example: - ```c --void turn_into_dependent_mounts(void) -+void remount_all_slave(void) - { - __do_free char *line = NULL; - __do_fclose FILE *f = NULL; -- __do_close int memfd = -EBADF, mntinfo_fd = -EBADF; -+ __do_close_prot_errno int memfd = -EBADF, mntinfo_fd = -EBADF; - int ret; - ssize_t copied; - size_t len = 0; -@@ -780,7 +780,7 @@ again: - return; - } - -- f = fdopen(memfd, "re"); -+ f = fdopen(memfd, "r"); - if (!f) { - SYSERROR("Failed to open copy of \"/proc/self/mountinfo\" to mark all shared. Continuing"); - return; -@@ -810,11 +810,12 @@ again: - null_endofword(target); - ret = mount(NULL, target, NULL, MS_SLAVE, NULL); - if (ret < 0) { -- SYSERROR("Failed to recursively turn old root mount tree into dependent mount. Continuing..."); -+ SYSERROR("Failed to make \"%s\" MS_SLAVE", target); -+ ERROR("Continuing..."); - continue; - } -- TRACE("Recursively turned old root mount tree into dependent mount"); -+ TRACE("Remounted \"%s\" as MS_SLAVE", target); - } -- TRACE("Turned all mount table entries into dependent mount"); -+ TRACE("Remounted all mount table entries as MS_SLAVE"); - } - ``` -diff --git a/config/apparmor/abstractions/start-container.in b/config/apparmor/abstractions/start-container.in -index 9998f1121..f2b48235d 100644 ---- a/config/apparmor/abstractions/start-container.in -+++ b/config/apparmor/abstractions/start-container.in -@@ -21,8 +21,6 @@ - # allow pre-mount hooks to stage mounts under /var/lib/lxc// - mount -> /var/lib/lxc/{**,}, - -- mount /dev/.lxc-boot-id -> /proc/sys/kernel/random/boot_id, -- - # required for some pre-mount hooks - mount fstype=overlayfs, - mount fstype=aufs, -diff --git a/config/init/common/lxc-net.in b/config/init/common/lxc-net.in -index a7dfa6f19..df9f1181d 100644 ---- a/config/init/common/lxc-net.in -+++ b/config/init/common/lxc-net.in -@@ -46,7 +46,7 @@ _ifdown() { - _ifup() { - MASK=`_netmask2cidr ${LXC_NETMASK}` - CIDR_ADDR="${LXC_ADDR}/${MASK}" -- ip addr add ${CIDR_ADDR} broadcast + dev ${LXC_BRIDGE} -+ ip addr add ${CIDR_ADDR} dev ${LXC_BRIDGE} - ip link set dev ${LXC_BRIDGE} address $LXC_BRIDGE_MAC - ip link set dev ${LXC_BRIDGE} up - } -diff --git a/config/templates/common.conf.in b/config/templates/common.conf.in -index 286c5e4a3..c4b3bdcce 100644 ---- a/config/templates/common.conf.in -+++ b/config/templates/common.conf.in -@@ -15,8 +15,6 @@ lxc.cap.drop = mac_admin mac_override sys_time sys_module sys_rawio - # Ensure hostname is changed on clone - lxc.hook.clone = @LXCHOOKDIR@/clonehostname - --# Default legacy cgroup configuration --# - # CGroup whitelist - lxc.cgroup.devices.deny = a - ## Allow any mknod (but not reading/writing the node) -@@ -44,35 +42,6 @@ lxc.cgroup.devices.allow = c 136:* rwm - ### fuse - lxc.cgroup.devices.allow = c 10:229 rwm - --# Default unified cgroup configuration --# --# CGroup whitelist --lxc.cgroup2.devices.deny = a --## Allow any mknod (but not reading/writing the node) --lxc.cgroup2.devices.allow = c *:* m --lxc.cgroup2.devices.allow = b *:* m --## Allow specific devices --### /dev/null --lxc.cgroup2.devices.allow = c 1:3 rwm --### /dev/zero --lxc.cgroup2.devices.allow = c 1:5 rwm --### /dev/full --lxc.cgroup2.devices.allow = c 1:7 rwm --### /dev/tty --lxc.cgroup2.devices.allow = c 5:0 rwm --### /dev/console --lxc.cgroup2.devices.allow = c 5:1 rwm --### /dev/ptmx --lxc.cgroup2.devices.allow = c 5:2 rwm --### /dev/random --lxc.cgroup2.devices.allow = c 1:8 rwm --### /dev/urandom --lxc.cgroup2.devices.allow = c 1:9 rwm --### /dev/pts/* --lxc.cgroup2.devices.allow = c 136:* rwm --### fuse --lxc.cgroup2.devices.allow = c 10:229 rwm -- - # Setup the default mounts - lxc.mount.auto = cgroup:mixed proc:mixed sys:mixed - lxc.mount.entry = /sys/fs/fuse/connections sys/fs/fuse/connections none bind,optional 0 0 -diff --git a/config/templates/userns.conf.in b/config/templates/userns.conf.in -index 69d992680..19013da5b 100644 ---- a/config/templates/userns.conf.in -+++ b/config/templates/userns.conf.in -@@ -1,15 +1,7 @@ - # CAP_SYS_ADMIN in init-user-ns is required for cgroup.devices --# --# Default legacy cgroup configuration --# - lxc.cgroup.devices.deny = - lxc.cgroup.devices.allow = - --# Default unified cgroup configuration --# --lxc.cgroup2.devices.deny = --lxc.cgroup2.devices.allow = -- - # Start with a full set of capabilities in user namespaces. - lxc.cap.drop = - lxc.cap.keep = -diff --git a/config/yum/lxc-patch.py b/config/yum/lxc-patch.py -index fd48298d6..d639e8425 100644 ---- a/config/yum/lxc-patch.py -+++ b/config/yum/lxc-patch.py -@@ -24,6 +24,7 @@ - import os - from fnmatch import fnmatch - from yum.plugins import TYPE_INTERACTIVE -+from yum.plugins import PluginYumExit - - requires_api_version = '2.0' - plugin_type = (TYPE_INTERACTIVE,) -diff --git a/configure.ac b/configure.ac -index 059d57d38..9eb6dcb2b 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -43,6 +43,7 @@ AM_INIT_AUTOMAKE([-Wall -Werror -Wno-portability subdir-objects]) - AC_CANONICAL_HOST - AM_PROG_CC_C_O - AC_USE_SYSTEM_EXTENSIONS -+CFLAGS=`echo "${CFLAGS#\-g}"` - - # Test if we have a new enough compiler. - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ -@@ -119,6 +120,9 @@ AM_CONDITIONAL([DISTRO_UBUNTU], [test "x$with_distro" = "xubuntu"]) - - AC_CONFIG_LINKS([config/etc/default.conf:config/etc/${distroconf}]) - -+# Check yajl -+PKG_CHECK_MODULES([YAJL], [yajl >= 2],[],[AC_MSG_ERROR([You must install yajl >= 2])]) -+ - # Check for init system type - AC_MSG_CHECKING([for init system type]) - AC_ARG_WITH([init-script], -@@ -187,6 +191,11 @@ AC_ARG_ENABLE([werror], - [AS_HELP_STRING([--disable-werror], [do not treat warnings as errors])], - [enable_werror=$enableval], [enable_werror=yes]) - -+AC_ARG_ENABLE([debug], -+ [AC_HELP_STRING([--enable-debug], -+ [set -g into cflags [default=no]])], -+ [], [enable_debug=no]) -+ - # Allow disabling rpath - AC_ARG_ENABLE([rpath], - [AS_HELP_STRING([--enable-rpath], [set rpath in executables [default=no]])], -@@ -487,7 +496,7 @@ AC_ARG_WITH([rootfs-path], - # cgroup pattern specification - AC_ARG_WITH([cgroup-pattern], - [AS_HELP_STRING([--with-cgroup-pattern=pattern], [pattern for container cgroups])], -- [with_cgroup_pattern=$withval], [with_cgroup_pattern=['']]) -+ [with_cgroup_pattern=$withval], [with_cgroup_pattern=['lxc/%n']]) - - # The path for the apparmor_parser's cache for generated apparmor profiles - AC_ARG_WITH([apparmor-cache-dir], -@@ -622,10 +631,7 @@ AC_CHECK_HEADER([ifaddrs.h], - AC_HEADER_MAJOR - - # Check for some syscalls functions --AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create move_mount open_tree execveat clone3]) --AC_CHECK_TYPES([struct clone_args], [], [], [[#include ]]) --AC_CHECK_MEMBERS([struct clone_args.set_tid],[],[],[[#include ]]) --AC_CHECK_MEMBERS([struct clone_args.cgroup],[],[],[[#include ]]) -+AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr faccessat gettid memfd_create]) - - # Check for strerror_r() support. Defines: - # - HAVE_STRERROR_R if available -@@ -732,7 +738,6 @@ AX_CHECK_COMPILE_FLAG([-fno-strict-aliasing], [CFLAGS="$CFLAGS -fno-strict-alias - AX_CHECK_COMPILE_FLAG([-fstack-clash-protection], [CFLAGS="$CFLAGS -fstack-clash-protection"],,[-Werror]) - AX_CHECK_LINK_FLAG([-fstack-protector-strong], [CFLAGS="$CFLAGS -fstack-protector-strong"],,[-Werror]) - AX_CHECK_LINK_FLAG([--param=ssp-buffer-size=4], [CFLAGS="$CFLAGS --param=ssp-buffer-size=4"],,[-Werror]) --AX_CHECK_COMPILE_FLAG([-g], [CFLAGS="$CFLAGS -g"],,[-Werror]) - AX_CHECK_COMPILE_FLAG([--mcet -fcf-protection], [CFLAGS="$CFLAGS --mcet -fcf-protection"],,[-Werror]) - AX_CHECK_COMPILE_FLAG([-Werror=implicit-function-declaration], [CFLAGS="$CFLAGS -Werror=implicit-function-declaration"],,[-Werror]) - AX_CHECK_COMPILE_FLAG([-Wlogical-op], [CFLAGS="$CFLAGS -Wlogical-op"],,[-Werror]) -@@ -756,40 +761,24 @@ AX_CHECK_COMPILE_FLAG([-Wnested-externs], [CFLAGS="$CFLAGS -Wnested-externs"],,[ - AX_CHECK_COMPILE_FLAG([-fasynchronous-unwind-tables], [CFLAGS="$CFLAGS -fasynchronous-unwind-tables"],,[-Werror]) - AX_CHECK_COMPILE_FLAG([-pipe], [CFLAGS="$CFLAGS -pipe"],,[-Werror]) - AX_CHECK_COMPILE_FLAG([-fexceptions], [CFLAGS="$CFLAGS -fexceptions"],,[-Werror]) --AX_CHECK_COMPILE_FLAG([-Warray-bounds], [CFLAGS="$CFLAGS -Warray-bounds"],,[-Werror]) --AX_CHECK_COMPILE_FLAG([-Wrestrict], [CFLAGS="$CFLAGS -Wrestrict"],,[-Werror]) --AX_CHECK_COMPILE_FLAG([-Wreturn-local-addr], [CFLAGS="$CFLAGS -Wreturn-local-addr"],,[-Werror]) --AX_CHECK_COMPILE_FLAG([-Wstringop-overflow], [CFLAGS="$CFLAGS -Wstringop-overflow"],,[-Werror]) - - AX_CHECK_LINK_FLAG([-z relro], [LDFLAGS="$LDFLAGS -z relro"],,[]) - AX_CHECK_LINK_FLAG([-z now], [LDFLAGS="$LDFLAGS -z now"],,[]) -+AX_CHECK_LINK_FLAG([-z noexecstack], [LDFLAGS="$LDFLAGS -z noexecstack"],,[]) - --CFLAGS="$CFLAGS -Wvla -std=gnu11 -fms-extensions" -+CFLAGS="$CFLAGS -Wvla -std=gnu11 -D_FORTIFY_SOURCE=2 -Wall -fPIC -fPIE -pie" - if test "x$enable_werror" = "xyes"; then - CFLAGS="$CFLAGS -Werror" - fi - -+if test "x$enable_debug" = "xyes"; then -+ CFLAGS="$CFLAGS -g" -+fi -+ - AC_ARG_ENABLE([thread-safety], - [AS_HELP_STRING([--enable-thread-safety], [enforce thread-safety otherwise fail the build [default=yes]])], - [enable_thread_safety=$enableval], [enable_thread_safety=yes]) - AM_CONDITIONAL([ENFORCE_THREAD_SAFETY], [test "x$enable_thread_safety" = "xyes"]) --if test "x$enable_thread_safety" = "xyes"; then -- AC_DEFINE([ENFORCE_THREAD_SAFETY], 1, [enforce thread-safety otherwise fail the build]) -- AC_MSG_RESULT([yes]) --else -- AC_MSG_RESULT([no]) --fi -- --AC_ARG_ENABLE([coverity-build], -- [AS_HELP_STRING([--enable-coverity-build], [build for use with Coverity [default=no]])], -- [enable_coverity_build=$enableval], [enable_coverity_build=no]) --AM_CONDITIONAL([ENABLE_COVERITY_BUILD], [test "x$enable_coverity_build" = "xyes"]) --if test "x$enable_coverity_build" = "xyes"; then -- AC_DEFINE([ENABLE_COVERITY_BUILD], 1, [build for use with Coverity]) -- AC_MSG_RESULT([yes]) --else -- AC_MSG_RESULT([no]) --fi - - AC_ARG_ENABLE([dlog], - [AS_HELP_STRING([--enable-dlog], [enable dlog support [default=no]])], -@@ -815,6 +804,17 @@ else - AC_MSG_RESULT([no]) - fi - -+AC_MSG_CHECKING([Whether adapt to iSulad]) -+AC_ARG_ENABLE([isulad], -+ [AC_HELP_STRING([--enable-isulad], [enable adapt to iSulad [default=yes]])], -+ [adapt_isulad=$enableval], [adapt_isulad=yes]) -+AM_CONDITIONAL([HAVE_ISULAD], [test "x$adapt_isulad" = "xyes"]) -+if test "x$adapt_isulad" = "xyes"; then -+ AC_DEFINE([HAVE_ISULAD], 1, [adapt to iSulad]) -+ AC_MSG_RESULT([yes]) -+else -+ AC_MSG_RESULT([no]) -+fi - # Files requiring some variable expansion - AC_CONFIG_FILES([ - Makefile -@@ -1061,10 +1061,9 @@ Documentation: - - user documentation: $enable_doc - - Debugging: -+ - tests: $enable_tests - - ASAN: $enable_asan -- - Coverity: $enable_coverity_build - - mutex debugging: $enable_mutex_debugging -- - tests: $enable_tests - - Paths: - - Logs in configpath: $enable_configpath_log -diff --git a/doc/ja/lxc.container.conf.sgml.in b/doc/ja/lxc.container.conf.sgml.in -index 38b623243..fc692b409 100644 ---- a/doc/ja/lxc.container.conf.sgml.in -+++ b/doc/ja/lxc.container.conf.sgml.in -@@ -713,25 +713,25 @@ by KATOH Yasufumi - modes are , and - . It defaults to mode. - In mode TX processing up to L3 happens on the stack instance -- attached to the dependent device and packets are switched to the stack instance of the -- parent device for the L2 processing and routing from that instance will be -- used before packets are queued on the outbound device. In this mode the dependent devices -+ attached to the slave device and packets are switched to the stack instance of the -+ master device for the L2 processing and routing from that instance will be -+ used before packets are queued on the outbound device. In this mode the slaves - will not receive nor can send multicast / broadcast traffic. - In mode TX processing is very similar to the L3 mode except that - iptables (conn-tracking) works in this mode and hence it is L3-symmetric (L3s). - This will have slightly less performance but that shouldn't matter since you are - choosing this mode over plain-L3 mode to make conn-tracking work. - In mode TX processing happens on the stack instance attached to -- the dependent device and packets are switched and queued to the parent device to send -- out. In this mode the dependent devices will RX/TX multicast and broadcast (if applicable) as well. -+ the slave device and packets are switched and queued to the master device to send -+ out. In this mode the slaves will RX/TX multicast and broadcast (if applicable) as well. - specifies the isolation mode. - The accepted isolation values are , - and . - It defaults to . -- In isolation mode dependent devices can cross-talk among themselves -- apart from talking through the parent device. -+ In isolation mode slaves can cross-talk among themselves -+ apart from talking through the master device. - In isolation mode the port is set in private mode. -- i.e. port won't allow cross communication between dependent devices. -+ i.e. port won't allow cross communication between slaves. - In isolation mode the port is set in VEPA mode. - i.e. port will offload switching functionality to the external entity as - described in 802.1Qbg. -@@ -1548,7 +1548,7 @@ by KATOH Yasufumi - fstab フォーマットの一行と同じフォーマットのマウントポイントの指定をします。 - - -- 加えて、LXC では rshared や rprivate といったマウント・プロパゲーションオプションと、独自の 3 つのマウントオプションが使えます。 -+ 加えて、LXC では rslave や rprivate といったマウント・プロパゲーションオプションと、独自の 3 つのマウントオプションが使えます。 - は、マウントが失敗しても失敗を返さずに無視します。 - は、マウントポイントをマウントする際にディレクトリもしくはファイルを作成します。 - を指定すると、マウントされたコンテナルートからの相対パスとして取得されます。 -diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in -index 3ed71c214..ae04e3af3 100644 ---- a/doc/lxc.container.conf.sgml.in -+++ b/doc/lxc.container.conf.sgml.in -@@ -530,25 +530,25 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - modes are , and - . It defaults to mode. - In mode TX processing up to L3 happens on the stack instance -- attached to the dependent device and packets are switched to the stack instance of the -- parent device for the L2 processing and routing from that instance will be -- used before packets are queued on the outbound device. In this mode the dependent devices -+ attached to the slave device and packets are switched to the stack instance of the -+ master device for the L2 processing and routing from that instance will be -+ used before packets are queued on the outbound device. In this mode the slaves - will not receive nor can send multicast / broadcast traffic. - In mode TX processing is very similar to the L3 mode except that - iptables (conn-tracking) works in this mode and hence it is L3-symmetric (L3s). - This will have slightly less performance but that shouldn't matter since you are - choosing this mode over plain-L3 mode to make conn-tracking work. - In mode TX processing happens on the stack instance attached to -- the dependent device and packets are switched and queued to the parent device to send devices -- out. In this mode the dependent devices will RX/TX multicast and broadcast (if applicable) as well. -+ the slave device and packets are switched and queued to the master device to send -+ out. In this mode the slaves will RX/TX multicast and broadcast (if applicable) as well. - specifies the isolation mode. - The accepted isolation values are , - and . - It defaults to . -- In isolation mode dependent devices can cross-talk among themselves -- apart from talking through the parent device. -+ In isolation mode slaves can cross-talk among themselves -+ apart from talking through the master device. - In isolation mode the port is set in private mode. -- i.e. port won't allow cross communication between dependent devices. -+ i.e. port won't allow cross communication between slaves. - In isolation mode the port is set in VEPA mode. - i.e. port will offload switching functionality to the external entity as - described in 802.1Qbg. -@@ -1164,7 +1164,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Specify a mount point corresponding to a line in the - fstab format. - -- Moreover lxc supports mount propagation, such as rshared or -+ Moreover lxc supports mount propagation, such as rslave or - rprivate, and adds three additional mount options. - don't fail if mount does not work. - or -diff --git a/hooks/Makefile.am b/hooks/Makefile.am -index 5ae73d72c..ddfd4bc32 100644 ---- a/hooks/Makefile.am -+++ b/hooks/Makefile.am -@@ -10,6 +10,8 @@ hooks_SCRIPTS = \ - squid-deb-proxy-client \ - nvidia - -+ -+if !HAVE_ISULAD - binhooks_PROGRAMS = \ - unmount-namespace - -@@ -20,5 +22,6 @@ if IS_BIONIC - unmount_namespace_SOURCES += \ - ../src/include/lxcmntent.c ../src/include/lxcmntent.h - endif -+endif - - EXTRA_DIST=$(hooks_SCRIPTS) -diff --git a/src/include/fexecve.c b/src/include/fexecve.c -index 40d2b5b46..123f27309 100644 ---- a/src/include/fexecve.c -+++ b/src/include/fexecve.c -@@ -29,7 +29,7 @@ - #include - #include "config.h" - #include "macro.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - - int fexecve(int fd, char *const argv[], char *const envp[]) - { -@@ -41,9 +41,11 @@ int fexecve(int fd, char *const argv[], char *const envp[]) - return -1; - } - -- execveat(fd, "", argv, envp, AT_EMPTY_PATH); -+#ifdef __NR_execveat -+ lxc_raw_execveat(fd, "", argv, envp, AT_EMPTY_PATH); - if (errno != ENOSYS) - return -1; -+#endif - - ret = snprintf(procfd, sizeof(procfd), "/proc/self/fd/%d", fd); - if (ret < 0 || (size_t)ret >= sizeof(procfd)) { -diff --git a/src/include/openpty.c b/src/include/openpty.c -index 7804d4c98..01579c517 100644 ---- a/src/include/openpty.c -+++ b/src/include/openpty.c -@@ -34,43 +34,43 @@ - - #define _PATH_DEVPTMX "/dev/ptmx" - --int openpty (int *aptmx, int *apts, char *name, struct termios *termp, -+int openpty (int *amaster, int *aslave, char *name, struct termios *termp, - struct winsize *winp) - { - char buf[PATH_MAX]; -- int ptmx, pts; -+ int master, slave; - -- ptmx = open(_PATH_DEVPTMX, O_RDWR); -- if (ptmx == -1) -+ master = open(_PATH_DEVPTMX, O_RDWR); -+ if (master == -1) - return -1; - -- if (grantpt(ptmx)) -+ if (grantpt(master)) - goto fail; - -- if (unlockpt(ptmx)) -+ if (unlockpt(master)) - goto fail; - -- if (ptsname_r(ptmx, buf, sizeof buf)) -+ if (ptsname_r(master, buf, sizeof buf)) - goto fail; - -- pts = open(buf, O_RDWR | O_NOCTTY); -- if (pts == -1) -+ slave = open(buf, O_RDWR | O_NOCTTY); -+ if (slave == -1) - goto fail; - - /* XXX Should we ignore errors here? */ - if (termp) -- tcsetattr(pts, TCSAFLUSH, termp); -+ tcsetattr(slave, TCSAFLUSH, termp); - if (winp) -- ioctl(pts, TIOCSWINSZ, winp); -+ ioctl(slave, TIOCSWINSZ, winp); - -- *aptmx = ptmx; -- *apts = pts; -+ *amaster = master; -+ *aslave = slave; - if (name != NULL) - strcpy(name, buf); - - return 0; - - fail: -- close(ptmx); -+ close(master); - return -1; - } -diff --git a/src/include/openpty.h b/src/include/openpty.h -index cb452e52a..6e7bf8d2d 100644 ---- a/src/include/openpty.h -+++ b/src/include/openpty.h -@@ -27,12 +27,10 @@ - #include - #include - --/* -- * Create pseudo tty ptmx pts pair with @__name and set terminal -- * attributes according to @__termp and @__winp and return handles for both -- * ends in @__aptmx and @__apts. -- */ --extern int openpty (int *__aptmx, int *__apts, char *__name, -+/* Create pseudo tty master slave pair with NAME and set terminal -+ attributes according to TERMP and WINP and return handles for both -+ ends in AMASTER and ASLAVE. */ -+extern int openpty (int *__amaster, int *__aslave, char *__name, - const struct termios *__termp, - const struct winsize *__winp); - -diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am -index d1e23647e..0e1ba8da9 100644 ---- a/src/lxc/Makefile.am -+++ b/src/lxc/Makefile.am -@@ -27,7 +27,7 @@ noinst_HEADERS = api_extensions.h \ - memory_utils.h \ - monitor.h \ - namespace.h \ -- process_utils.h \ -+ raw_syscalls.h \ - rexec.h \ - start.h \ - state.h \ -@@ -52,6 +52,16 @@ noinst_HEADERS = api_extensions.h \ - utils.h \ - uuid.h - -+if HAVE_ISULAD -+noinst_HEADERS += isulad_utils.h path.h \ -+ json/json_common.h json/defs.h \ -+ json/oci_runtime_hooks.h \ -+ json/logger_json_file.h \ -+ json/oci_runtime_spec.h \ -+ json/read-file.h \ -+ exec_commands.h -+endif -+ - if IS_BIONIC - noinst_HEADERS += ../include/fexecve.h \ - ../include/lxcmntent.h \ -@@ -128,13 +138,14 @@ liblxc_la_SOURCES = af_unix.c af_unix.h \ - network.c network.h \ - monitor.c monitor.h \ - parse.c parse.h \ -- process_utils.c process_utils.h \ -+ raw_syscalls.c raw_syscalls.h \ - ringbuf.c ringbuf.h \ - rtnl.c rtnl.h \ - state.c state.h \ - start.c start.h \ - storage/btrfs.c storage/btrfs.h \ - storage/dir.c storage/dir.h \ -+ storage/block.c storage/block.h \ - storage/loop.c storage/loop.h \ - storage/lvm.c storage/lvm.h \ - storage/nbd.c storage/nbd.h \ -@@ -154,6 +165,18 @@ liblxc_la_SOURCES = af_unix.c af_unix.h \ - version.h \ - $(LSM_SOURCES) - -+if HAVE_ISULAD -+liblxc_la_SOURCES += isulad_utils.c isulad_utils.h \ -+ path.c path.h \ -+ json/json_common.c json/json_common.h \ -+ json/defs.h json/defs.c \ -+ json/oci_runtime_hooks.c json/oci_runtime_hooks.h \ -+ json/logger_json_file.c json/logger_json_file.h \ -+ json/oci_runtime_spec.c json/oci_runtime_spec.h \ -+ json/read-file.c json/read-file.h \ -+ exec_commands.c exec_commands.h -+endif -+ - if IS_BIONIC - liblxc_la_SOURCES += ../include/fexecve.c ../include/fexecve.h \ - ../include/lxcmntent.c ../include/lxcmntent.h \ -@@ -212,6 +235,10 @@ AM_CFLAGS = -DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \ - -I $(top_srcdir)/src/lxc/storage \ - -I $(top_srcdir)/src/lxc/cgroups - -+if HAVE_ISULAD -+AM_CFLAGS += -I $(top_srcdir)/src/lxc/json -+AM_CFLAGS += -DHAVE_ISULAD -+endif - if ENABLE_APPARMOR - AM_CFLAGS += -DHAVE_APPARMOR - endif -@@ -249,6 +276,10 @@ liblxc_la_CFLAGS += -fsanitize=address \ - -fno-omit-frame-pointer - endif - -+if HAVE_ISULAD -+liblxc_la_CFLAGS += -D_FORTIFY_SOURCE=2 -Wall -+endif -+ - if ENABLE_UBSAN - liblxc_la_CFLAGS += -fsanitize=undefined - endif -@@ -258,6 +289,12 @@ liblxc_la_LDFLAGS = -pthread \ - -Wl,-soname,liblxc.so.$(firstword $(subst ., ,@LXC_ABI@)) \ - -version-info @LXC_ABI_MAJOR@ - -+if HAVE_ISULAD -+liblxc_la_LDFLAGS += @YAJL_LIBS@ -Wl,-z,relro \ -+ -Wl,-z,now \ -+ -Wl,-z,noexecstack -+endif -+ - liblxc_la_LIBADD = $(CAP_LIBS) \ - $(OPENSSL_LIBS) \ - $(SELINUX_LIBS) \ -@@ -384,7 +421,7 @@ init_lxc_SOURCES = cmd/lxc_init.c \ - initutils.c initutils.h \ - memory_utils.h \ - parse.c parse.h \ -- process_utils.c process_utils.h \ -+ raw_syscalls.c raw_syscalls.h \ - syscall_numbers.h \ - string_utils.c string_utils.h - -@@ -395,7 +432,7 @@ lxc_monitord_SOURCES = cmd/lxc_monitord.c \ - log.c log.h \ - mainloop.c mainloop.h \ - monitor.c monitor.h \ -- process_utils.c process_utils.h \ -+ raw_syscalls.c raw_syscalls.h \ - syscall_numbers.h \ - utils.c utils.h - lxc_user_nic_SOURCES = cmd/lxc_user_nic.c \ -@@ -404,7 +441,7 @@ lxc_user_nic_SOURCES = cmd/lxc_user_nic.c \ - memory_utils.h \ - network.c network.h \ - parse.c parse.h \ -- process_utils.c process_utils.h \ -+ raw_syscalls.c raw_syscalls.h \ - syscall_numbers.h \ - file_utils.c file_utils.h \ - string_utils.c string_utils.h \ -diff --git a/src/lxc/af_unix.c b/src/lxc/af_unix.c -index 5cf54917f..9f268be60 100644 ---- a/src/lxc/af_unix.c -+++ b/src/lxc/af_unix.c -@@ -18,7 +18,7 @@ - #include "log.h" - #include "macro.h" - #include "memory_utils.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "utils.h" - - #ifndef HAVE_STRLCPY -@@ -168,7 +168,7 @@ int lxc_unix_send_fds(int fd, int *sendfds, int num_sendfds, void *data, - } - - static int lxc_abstract_unix_recv_fds_iov(int fd, int *recvfds, int num_recvfds, -- struct iovec *iov, size_t iovlen) -+ struct iovec *iov, size_t iovlen, unsigned int timeout) - { - __do_free char *cmsgbuf = NULL; - int ret; -@@ -188,8 +188,24 @@ static int lxc_abstract_unix_recv_fds_iov(int fd, int *recvfds, int num_recvfds, - msg.msg_iov = iov; - msg.msg_iovlen = iovlen; - -+#ifdef HAVE_ISULAD -+ struct timeval out; -+ if (timeout > 0) { -+ memset(&out, 0, sizeof(out)); -+ out.tv_sec = timeout / 1000000; -+ out.tv_usec = timeout % 1000000; -+ ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, -+ (const void *)&out, sizeof(out)); -+ if (ret < 0) { -+ ERROR("Failed to set %u timeout on containter " -+ "state socket", timeout); -+ return ret; -+ } -+ } -+#endif -+ - do { -- ret = recvmsg(fd, &msg, MSG_CMSG_CLOEXEC); -+ ret = recvmsg(fd, &msg, 0); - } while (ret < 0 && errno == EINTR); - if (ret < 0 || ret == 0) - return ret; -@@ -220,8 +236,21 @@ int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds, - .iov_base = data ? data : buf, - .iov_len = data ? size : sizeof(buf), - }; -- return lxc_abstract_unix_recv_fds_iov(fd, recvfds, num_recvfds, &iov, 1); -+ return lxc_abstract_unix_recv_fds_iov(fd, recvfds, num_recvfds, &iov, 1, 0); -+} -+ -+#ifdef HAVE_ISULAD -+int lxc_abstract_unix_recv_fds_timeout(int fd, int *recvfds, int num_recvfds, -+ void *data, size_t size, unsigned int timeout) -+{ -+ char buf[1] = {0}; -+ struct iovec iov = { -+ .iov_base = data ? data : buf, -+ .iov_len = data ? size : sizeof(buf), -+ }; -+ return lxc_abstract_unix_recv_fds_iov(fd, recvfds, num_recvfds, &iov, 1, timeout); - } -+#endif - - int lxc_abstract_unix_send_credential(int fd, void *data, size_t size) - { -diff --git a/src/lxc/af_unix.h b/src/lxc/af_unix.h -index 5a1482c35..6943a61ee 100644 ---- a/src/lxc/af_unix.h -+++ b/src/lxc/af_unix.h -@@ -7,38 +7,28 @@ - #include - #include - --#include "compiler.h" -- - /* does not enforce \0-termination */ - extern int lxc_abstract_unix_open(const char *path, int type, int flags); - extern void lxc_abstract_unix_close(int fd); - /* does not enforce \0-termination */ - extern int lxc_abstract_unix_connect(const char *path); -- - extern int lxc_abstract_unix_send_fds(int fd, int *sendfds, int num_sendfds, -- void *data, size_t size) --__access_r(2, 3) __access_r(4, 5); -- --extern int lxc_abstract_unix_send_fds_iov(int fd, int *sendfds, int num_sendfds, -- struct iovec *iov, size_t iovlen) --__access_r(2, 3); -- --extern int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds, -- void *data, size_t size) --__access_r(2, 3) __access_r(4, 5); -- -+ void *data, size_t size); -+extern int lxc_abstract_unix_send_fds_iov(int fd, int *sendfds, -+ int num_sendfds, struct iovec *iov, -+ size_t iovlen); - extern int lxc_unix_send_fds(int fd, int *sendfds, int num_sendfds, void *data, - size_t size); -- --extern int lxc_abstract_unix_send_credential(int fd, void *data, size_t size) --__access_r(2, 3); -- --extern int lxc_abstract_unix_rcv_credential(int fd, void *data, size_t size) --__access_w(2, 3); -- -+extern int lxc_abstract_unix_recv_fds(int fd, int *recvfds, int num_recvfds, -+ void *data, size_t size); -+extern int lxc_abstract_unix_send_credential(int fd, void *data, size_t size); -+extern int lxc_abstract_unix_rcv_credential(int fd, void *data, size_t size); - extern int lxc_unix_sockaddr(struct sockaddr_un *ret, const char *path); - extern int lxc_unix_connect(struct sockaddr_un *addr); - extern int lxc_unix_connect_type(struct sockaddr_un *addr, int type); - extern int lxc_socket_set_timeout(int fd, int rcv_timeout, int snd_timeout); -- -+#ifdef HAVE_ISULAD -+int lxc_abstract_unix_recv_fds_timeout(int fd, int *recvfds, int num_recvfds, -+ void *data, size_t size, unsigned int timeout); -+#endif - #endif /* __LXC_AF_UNIX_H */ -diff --git a/src/lxc/api_extensions.h b/src/lxc/api_extensions.h -index 3afdc35b9..9ff071edf 100644 ---- a/src/lxc/api_extensions.h -+++ b/src/lxc/api_extensions.h -@@ -38,7 +38,6 @@ static char *api_extensions[] = { - "cgroup2_devices", - #endif - "cgroup2", -- "pidfd", - }; - - static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions); -diff --git a/src/lxc/attach.c b/src/lxc/attach.c -index 38e16f2d1..068cc5f8e 100644 ---- a/src/lxc/attach.c -+++ b/src/lxc/attach.c -@@ -40,7 +40,7 @@ - #include "mainloop.h" - #include "memory_utils.h" - #include "namespace.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "syscall_wrappers.h" - #include "terminal.h" - #include "utils.h" -@@ -49,6 +49,25 @@ - #include - #endif - -+#ifdef HAVE_ISULAD -+#include "exec_commands.h" -+ -+typedef enum { -+ ATTACH_INIT, -+ ATTACH_TIMEOUT, -+ ATTACH_MAX, -+} attach_timeout_t; -+ -+static volatile attach_timeout_t g_attach_timeout_state = ATTACH_INIT; -+ -+struct attach_timeout_conf { -+ int64_t timeout; -+ unsigned long long start_time; -+ pid_t pid; -+}; -+ -+#endif -+ - lxc_log_define(attach, lxc); - - /* Define default options if no options are supplied by the user. */ -@@ -194,8 +213,12 @@ int lxc_attach_remount_sys_proc(void) - if (ret < 0) - return log_error_errno(-1, errno, "Failed to unshare mount namespace"); - -- if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) -- SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing..."); -+ if (detect_shared_rootfs()) { -+ if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) { -+ SYSERROR("Failed to make / rslave"); -+ ERROR("Continuing..."); -+ } -+ } - - /* Assume /proc is always mounted, so remount it. */ - ret = umount2("/proc", MNT_DETACH); -@@ -625,23 +648,69 @@ static signed long get_personality(const char *name, const char *lxcpath) - - struct attach_clone_payload { - int ipc_socket; -- int terminal_pts_fd; -+ int terminal_slave_fd; - lxc_attach_options_t *options; - struct lxc_proc_context_info *init_ctx; - lxc_attach_exec_t exec_function; - void *exec_payload; -+#ifdef HAVE_ISULAD -+ struct lxc_terminal *terminal; -+#endif - }; - - static void lxc_put_attach_clone_payload(struct attach_clone_payload *p) - { - close_prot_errno_disarm(p->ipc_socket); -- close_prot_errno_disarm(p->terminal_pts_fd); -+ close_prot_errno_disarm(p->terminal_slave_fd); - if (p->init_ctx) { - lxc_proc_put_context_info(p->init_ctx); - p->init_ctx = NULL; - } - } - -+#ifdef HAVE_ISULAD -+static int isulad_set_attach_pipes(struct lxc_terminal *terminal) -+{ -+ int ret = 0; -+ if (terminal->pipes[0][1] >= 0) { -+ close(terminal->pipes[0][1]); -+ terminal->pipes[0][1] = -1; -+ } -+ -+ if (terminal->pipes[0][0] >= 0) { -+ ret = dup2(terminal->pipes[0][0], STDIN_FILENO); -+ if (ret < 0) -+ goto out; -+ } -+ -+ if (terminal->pipes[1][0] >= 0) { -+ close(terminal->pipes[1][0]); -+ terminal->pipes[1][0] = -1; -+ } -+ -+ if (terminal->pipes[1][1] >= 0) { -+ ret = dup2(terminal->pipes[1][1], STDOUT_FILENO); -+ if (ret < 0) -+ goto out; -+ } -+ if (terminal->pipes[2][0] >= 0) { -+ close(terminal->pipes[2][0]); -+ terminal->pipes[2][0] = -1; -+ } -+ -+ if (terminal->pipes[2][1] >= 0) { -+ ret = dup2(terminal->pipes[2][1], STDERR_FILENO); -+ if (ret < 0) -+ goto out; -+ } -+ -+ setsid(); -+out: -+ return ret; -+} -+ -+#endif -+ - static int attach_child_main(struct attach_clone_payload *payload) - { - int lsm_fd, ret; -@@ -654,6 +723,31 @@ static int attach_child_main(struct attach_clone_payload *payload) - bool needs_lsm = (options->namespaces & CLONE_NEWNS) && - (options->attach_flags & LXC_ATTACH_LSM) && - init_ctx->lsm_label; -+#ifdef HAVE_ISULAD -+ int msg_fd = -1; -+ sigset_t mask; -+ -+ /*isulad: record errpipe fd*/ -+ msg_fd = init_ctx->container->lxc_conf->errpipe[1]; -+ init_ctx->container->lxc_conf->errpipe[1] = -1; -+ /*isulad: set system umask */ -+ umask(init_ctx->container->lxc_conf->umask); -+ -+ /*isulad: restore default signal handlers and unblock all signals*/ -+ for (int i = 1; i < NSIG; i++) -+ signal(i, SIG_DFL); -+ -+ ret = sigfillset(&mask); -+ if (ret < 0) { -+ SYSERROR("Failed to fill signal mask"); -+ goto on_error;; -+ } -+ ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); -+ if (ret < 0) { -+ SYSERROR("Failed to set signal mask"); -+ goto on_error; -+ } -+#endif - - /* A description of the purpose of this functionality is provided in the - * lxc-attach(1) manual page. We have to remount here and not in the -@@ -695,6 +789,24 @@ static int attach_child_main(struct attach_clone_payload *payload) - TRACE("Dropped capabilities"); - } - -+#ifdef HAVE_ISULAD -+ /* isulad: set workdir */ -+ if (init_ctx->container->lxc_conf->init_cwd) { -+ char *init_cwd; -+ init_cwd = init_ctx->container->lxc_conf->init_cwd; -+ /* try to create workdir if not exist */ -+ struct stat st; -+ if (stat(init_cwd, &st) < 0 && mkdir_p(init_cwd, 0750) < 0) { -+ SYSERROR("Try to create directory \"%s\" as workdir failed when attach", init_cwd); -+ goto on_error; -+ } -+ if (chdir(init_cwd)) { -+ SYSERROR("Could not change directory to \"%s\" when attach", init_cwd); -+ goto on_error; -+ } -+ } -+#endif -+ - /* Always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL) - * if you want this to be a no-op). - */ -@@ -736,8 +848,10 @@ static int attach_child_main(struct attach_clone_payload *payload) - goto on_error; - } - -+#ifndef HAVE_ISULAD - if (!lxc_setgroups(0, NULL) && errno != EPERM) - goto on_error; -+#endif - - if (options->namespaces & CLONE_NEWUSER) { - /* Check whether nsuid 0 has a mapping. */ -@@ -770,6 +884,13 @@ static int attach_child_main(struct attach_clone_payload *payload) - else - new_gid = ns_root_gid; - -+#ifdef HAVE_ISULAD -+ // isulad: set env home in container -+ if (lxc_setup_env_home(new_uid) < 0) { -+ goto on_error; -+ } -+#endif -+ - if ((init_ctx->container && init_ctx->container->lxc_conf && - init_ctx->container->lxc_conf->no_new_privs) || - (options->attach_flags & LXC_ATTACH_NO_NEW_PRIVS)) { -@@ -810,10 +931,12 @@ static int attach_child_main(struct attach_clone_payload *payload) - goto on_error; - } - -+#ifndef HAVE_ISULAD - close(payload->ipc_socket); - payload->ipc_socket = -EBADF; - lxc_proc_put_context_info(init_ctx); - payload->init_ctx = NULL; -+#endif - - /* The following is done after the communication socket is shut down. - * That way, all errors that might (though unlikely) occur up until this -@@ -856,13 +979,33 @@ static int attach_child_main(struct attach_clone_payload *payload) - } - - if (options->attach_flags & LXC_ATTACH_TERMINAL) { -- ret = lxc_terminal_prepare_login(payload->terminal_pts_fd); -+ -+#ifdef HAVE_ISULAD -+ /* isulad: dup2 pipe[0][0] to container stdin, pipe[1][1] to container stdout, pipe[2][1] to container stderr */ -+ if (payload->terminal->disable_pty) { -+ ret = isulad_set_attach_pipes(payload->terminal); -+ if (ret < 0) { -+ SYSERROR("Failed to prepare terminal file pipes"); -+ goto on_error; -+ } -+ } -+ -+ if(!payload->terminal->disable_pty && payload->terminal_slave_fd >= 0) { -+ ret = lxc_terminal_prepare_login(payload->terminal_slave_fd); -+ if (ret < 0) { -+ SYSERROR("Failed to prepare terminal file descriptor %d", payload->terminal_slave_fd); -+ goto on_error; -+ } -+ } -+#else -+ ret = lxc_terminal_prepare_login(payload->terminal_slave_fd); - if (ret < 0) { -- SYSERROR("Failed to prepare terminal file descriptor %d", payload->terminal_pts_fd); -+ SYSERROR("Failed to prepare terminal file descriptor %d", payload->terminal_slave_fd); - goto on_error; - } - -- TRACE("Prepared terminal file descriptor %d", payload->terminal_pts_fd); -+ TRACE("Prepared terminal file descriptor %d", payload->terminal_slave_fd); -+#endif - } - - /* Avoid unnecessary syscalls. */ -@@ -872,6 +1015,17 @@ static int attach_child_main(struct attach_clone_payload *payload) - if (new_gid == ns_root_gid) - new_gid = LXC_INVALID_GID; - -+#ifdef HAVE_ISULAD -+ if (prctl(PR_SET_KEEPCAPS, 1) < 0) { -+ SYSERROR("Failed to keep permitted capabilities"); -+ goto on_error; -+ } -+ -+ if (!lxc_setgroups(init_ctx->container->lxc_conf->init_groups_len, -+ init_ctx->container->lxc_conf->init_groups)) -+ goto on_error; -+#endif -+ - /* Make sure that the processes STDIO is correctly owned by the user that we are switching to */ - ret = fix_stdio_permissions(new_uid); - if (ret) -@@ -880,8 +1034,27 @@ static int attach_child_main(struct attach_clone_payload *payload) - if (!lxc_switch_uid_gid(new_uid, new_gid)) - goto on_error; - -+#ifdef HAVE_ISULAD -+ if (prctl(PR_SET_KEEPCAPS, 0) < 0) { -+ SYSERROR("Failed to clear permitted capabilities"); -+ goto on_error; -+ } -+ -+ if (lxc_drop_caps(init_ctx->container->lxc_conf) != 0) { -+ ERROR("Failed to drop caps."); -+ goto on_error; -+ } -+ -+ close(payload->ipc_socket); -+ payload->ipc_socket = -EBADF; -+ lxc_proc_put_context_info(init_ctx); -+ payload->init_ctx = NULL; -+ _exit(payload->exec_function(payload->exec_payload, msg_fd)); -+#else - /* We're done, so we can now do whatever the user intended us to do. */ - _exit(payload->exec_function(payload->exec_payload)); -+#endif -+ - - on_error: - lxc_put_attach_clone_payload(payload); -@@ -889,12 +1062,31 @@ on_error: - } - - static int lxc_attach_terminal(struct lxc_conf *conf, -- struct lxc_terminal *terminal) -+ struct lxc_terminal *terminal, lxc_attach_options_t *options) - { - int ret; - - lxc_terminal_init(terminal); - -+#ifdef HAVE_ISULAD -+ /* isulad: if we pass fifo in option, use them as init fifos */ -+ if (options->init_fifo[0]) { -+ free(terminal->init_fifo[0]); -+ terminal->init_fifo[0] = safe_strdup(options->init_fifo[0]); -+ } -+ if (options->init_fifo[1]) { -+ free(terminal->init_fifo[1]); -+ terminal->init_fifo[1] = safe_strdup(options->init_fifo[1]); -+ } -+ if (options->init_fifo[2]) { -+ free(terminal->init_fifo[2]); -+ terminal->init_fifo[2] = safe_strdup(options->init_fifo[2]); -+ } -+ -+ terminal->disable_pty = options->disable_pty; -+ terminal->open_stdin = options->open_stdin; -+#endif -+ - ret = lxc_terminal_create(terminal); - if (ret < 0) - return log_error(-1, "Failed to create terminal"); -@@ -932,14 +1124,14 @@ static int lxc_attach_terminal_mainloop_init(struct lxc_terminal *terminal, - return 0; - } - --static inline void lxc_attach_terminal_close_ptmx(struct lxc_terminal *terminal) -+static inline void lxc_attach_terminal_close_master(struct lxc_terminal *terminal) - { -- close_prot_errno_disarm(terminal->ptmx); -+ close_prot_errno_disarm(terminal->master); - } - --static inline void lxc_attach_terminal_close_pts(struct lxc_terminal *terminal) -+static inline void lxc_attach_terminal_close_slave(struct lxc_terminal *terminal) - { -- close_prot_errno_disarm(terminal->pts); -+ close_prot_errno_disarm(terminal->slave); - } - - static inline void lxc_attach_terminal_close_peer(struct lxc_terminal *terminal) -@@ -952,9 +1144,125 @@ static inline void lxc_attach_terminal_close_log(struct lxc_terminal *terminal) - close_prot_errno_disarm(terminal->log_fd); - } - -+#ifdef HAVE_ISULAD -+/* isulad: attach timeout thread function */ -+static void* wait_attach_timeout(void *arg) -+{ -+ struct attach_timeout_conf *conf = (struct attach_timeout_conf *)arg; -+ -+ if (!conf || conf->timeout < 1) -+ goto out; -+ sleep(conf->timeout); -+ if (lxc_process_alive(conf->pid, conf->start_time)) { -+ g_attach_timeout_state = ATTACH_TIMEOUT; -+ if (kill(conf->pid, SIGKILL) < 0) { -+ ERROR("Failed to send signal %d to pid %d", SIGKILL, conf->pid); -+ } -+ } -+ -+out: -+ free(conf); -+ return ((void *)0); -+} -+ -+/* isulad: create attach timeout thread */ -+static int create_attach_timeout_thread(int64_t attach_timeout, pid_t pid) -+{ -+ int ret = 0; -+ pthread_t ptid; -+ pthread_attr_t attr; -+ struct attach_timeout_conf *timeout_conf = NULL; -+ -+ timeout_conf = malloc(sizeof(struct attach_timeout_conf)); -+ if (timeout_conf == NULL) { -+ ERROR("Failed to malloc attach timeout conf"); -+ ret = -1; -+ goto out; -+ } -+ -+ memset(timeout_conf, 0, sizeof(struct attach_timeout_conf)); -+ timeout_conf->timeout = attach_timeout; -+ timeout_conf->pid = pid; -+ timeout_conf->start_time = lxc_get_process_startat(pid); -+ -+ pthread_attr_init(&attr); -+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); -+ ret = pthread_create(&ptid, &attr, wait_attach_timeout, timeout_conf); -+ if (ret != 0) { -+ ERROR("Create attach wait timeout thread failed"); -+ free(timeout_conf); -+ goto out; -+ } -+ -+out: -+ return ret; -+} -+ -+static int attach_signal_handler(int fd, uint32_t events, void *data, -+ struct lxc_epoll_descr *descr) -+{ -+ int ret; -+ siginfo_t info; -+ struct signalfd_siginfo siginfo; -+ pid_t *pid = data; -+ -+ ret = lxc_read_nointr(fd, &siginfo, sizeof(siginfo)); -+ if (ret < 0) -+ return log_error(LXC_MAINLOOP_ERROR, "Failed to read signal info from signal file descriptor %d", fd); -+ -+ if (ret != sizeof(siginfo)) -+ return log_error(LXC_MAINLOOP_ERROR, "Unexpected size for struct signalfd_siginfo"); -+ -+ /* Check whether init is running. */ -+ info.si_pid = 0; -+ ret = waitid(P_PID, *pid, &info, WEXITED | WNOWAIT | WNOHANG); -+ if (ret == 0 && info.si_pid == *pid) { -+ return log_error(LXC_MAINLOOP_CLOSE, "Container attach init process %d exited", *pid); -+ } -+ -+ return LXC_MAINLOOP_CONTINUE; -+} -+ -+static int isulad_setup_signal_fd(sigset_t *oldmask) -+{ -+ int ret; -+ sigset_t mask; -+ const int signals[] = {SIGBUS, SIGILL, SIGSEGV, SIGWINCH}; -+ -+ /* Block everything except serious error signals. */ -+ ret = sigfillset(&mask); -+ if (ret < 0) -+ return -EBADF; -+ -+ for (int sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) { -+ ret = sigdelset(&mask, signals[sig]); -+ if (ret < 0) -+ return -EBADF; -+ } -+ -+ ret = pthread_sigmask(SIG_BLOCK, &mask, oldmask); -+ if (ret < 0) -+ return log_error_errno(-EBADF, errno, -+ "Failed to set signal mask"); -+ -+ ret = signalfd(-1, &mask, SFD_CLOEXEC); -+ if (ret < 0) -+ return log_error_errno(-EBADF, -+ errno, "Failed to create signal file descriptor"); -+ -+ TRACE("Created signal file descriptor %d", ret); -+ -+ return ret; -+} -+ -+int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, -+ void *exec_payload, lxc_attach_options_t *options, -+ pid_t *attached_process, char **err_msg) -+#else - int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - void *exec_payload, lxc_attach_options_t *options, - pid_t *attached_process) -+#endif - { - int i, ret, status; - int ipc_sockets[2]; -@@ -966,6 +1274,13 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - struct lxc_conf *conf; - char *name, *lxcpath; - struct attach_clone_payload payload = {0}; -+#ifdef HAVE_ISULAD -+ struct lxc_exec_command_handler exec_command; -+ const char *suffix = options->suffix; -+ -+ exec_command.maincmd_fd = -1; -+ exec_command.terminal = &terminal; -+#endif - - ret = access("/proc/self/ns", X_OK); - if (ret) -@@ -1014,8 +1329,14 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - } - } - conf = init_ctx->container->lxc_conf; -- if (!conf) -- return log_error_errno(-EINVAL, EINVAL, "Missing container confifg"); -+ -+#ifdef HAVE_ISULAD -+ // always switch uid and gid for attach -+ if (options->uid == -1) -+ options->uid = init_ctx->container->lxc_conf->init_uid; -+ if (options->gid == -1) -+ options->gid = init_ctx->container->lxc_conf->init_gid; -+#endif - - if (!fetch_seccomp(init_ctx->container, options)) - WARN("Failed to get seccomp policy"); -@@ -1090,7 +1411,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - } - - if (options->attach_flags & LXC_ATTACH_TERMINAL) { -- ret = lxc_attach_terminal(conf, &terminal); -+ ret = lxc_attach_terminal(conf, &terminal, options); - if (ret < 0) { - ERROR("Failed to setup new terminal"); - free(cwd); -@@ -1099,6 +1420,12 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - } - - terminal.log_fd = options->log_fd; -+#ifdef HAVE_ISULAD -+ if (suffix != NULL) { -+ exec_command.maincmd_fd = lxc_exec_cmd_init(name, lxcpath, suffix); -+ exec_command.terminal = &terminal; -+ } -+#endif - } else { - lxc_terminal_init(&terminal); - } -@@ -1139,10 +1466,38 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); - if (ret < 0) { - SYSERROR("Could not set up required IPC mechanism for attaching"); -+#ifdef HAVE_ISULAD -+ if (options->attach_flags & LXC_ATTACH_TERMINAL) { -+ lxc_terminal_delete(&terminal); -+ lxc_terminal_conf_free(&terminal); -+ if (exec_command.maincmd_fd != -1) { -+ close(exec_command.maincmd_fd); -+ } -+ } -+#endif -+ free(cwd); -+ lxc_proc_put_context_info(init_ctx); -+ return -1; -+ } -+ -+#ifdef HAVE_ISULAD -+ /* isulad: pipdfd for get error message of child or grandchild process. */ -+ if (pipe2(conf->errpipe, O_CLOEXEC) != 0) { -+ SYSERROR("Failed to init errpipe"); -+ if (options->attach_flags & LXC_ATTACH_TERMINAL) { -+ lxc_terminal_delete(&terminal); -+ lxc_terminal_conf_free(&terminal); -+ if (exec_command.maincmd_fd != -1) { -+ close(exec_command.maincmd_fd); -+ } -+ } -+ close(ipc_sockets[0]); -+ close(ipc_sockets[1]); - free(cwd); - lxc_proc_put_context_info(init_ctx); - return -1; - } -+#endif - - /* Create intermediate subprocess, two reasons: - * 1. We can't setns() in the child itself, since we want to make -@@ -1154,6 +1509,17 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - pid = fork(); - if (pid < 0) { - SYSERROR("Failed to create first subprocess"); -+#ifdef HAVE_ISULAD -+ if (options->attach_flags & LXC_ATTACH_TERMINAL) { -+ lxc_terminal_delete(&terminal); -+ lxc_terminal_conf_free(&terminal); -+ if (exec_command.maincmd_fd != -1) { -+ close(exec_command.maincmd_fd); -+ } -+ } -+ close(ipc_sockets[0]); -+ close(ipc_sockets[1]); -+#endif - free(cwd); - lxc_proc_put_context_info(init_ctx); - return -1; -@@ -1163,13 +1529,38 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - int ret_parent = -1; - pid_t to_cleanup_pid = pid; - struct lxc_epoll_descr descr = {0}; -+#ifdef HAVE_ISULAD -+ int isulad_sigfd; -+ sigset_t isulad_oldmask; -+ struct lxc_epoll_descr isulad_descr = {0}; -+#endif - - /* close unneeded file descriptors */ - close(ipc_sockets[1]); - free(cwd); -+#ifdef HAVE_ISULAD -+ /* isulad: close errpipe */ -+ close(conf->errpipe[1]); -+ conf->errpipe[1] = -1; -+ /* isulad: close pipe after clone */ -+ if (terminal.pipes[0][0] >= 0) { -+ close(terminal.pipes[0][0]); -+ terminal.pipes[0][0] = -1; -+ } -+ -+ if (terminal.pipes[1][1] >= 0) { -+ close(terminal.pipes[1][1]); -+ terminal.pipes[1][1] = -1; -+ } -+ -+ if (terminal.pipes[2][1] >= 0) { -+ close(terminal.pipes[2][1]); -+ terminal.pipes[2][1] = -1; -+ } -+#endif - lxc_proc_close_ns_fd(init_ctx); - if (options->attach_flags & LXC_ATTACH_TERMINAL) -- lxc_attach_terminal_close_pts(&terminal); -+ lxc_attach_terminal_close_slave(&terminal); - - /* Attach to cgroup, if requested. */ - if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) { -@@ -1200,7 +1591,11 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - - /* Setup resource limits */ - if (!lxc_list_empty(&conf->limits)) { -+#ifdef HAVE_ISULAD -+ ret = setup_resource_limits(&conf->limits, pid, -1); -+#else - ret = setup_resource_limits(&conf->limits, pid); -+#endif - if (ret < 0) - goto on_error; - } -@@ -1210,9 +1605,28 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - if (ret < 0) - goto on_error; - -+#ifdef HAVE_ISULAD -+ ret = lxc_attach_terminal_mainloop_init(&terminal, &isulad_descr); -+ if (ret < 0) -+ goto on_error; -+ -+ if (suffix != NULL) { -+ (void)lxc_exec_cmd_mainloop_add(&descr, &exec_command); -+ } -+#endif - TRACE("Initialized terminal mainloop"); - } - -+#ifdef HAVE_ISULAD -+ /* The signal fd has to be created before forking otherwise if the child -+ * process exits before we setup the signal fd, the event will be lost -+ * and the command will be stuck. -+ */ -+ isulad_sigfd = isulad_setup_signal_fd(&isulad_oldmask); -+ if (isulad_sigfd < 0) -+ goto close_mainloop; -+#endif -+ - /* Let the child process know to go ahead. */ - status = 0; - ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status)); -@@ -1273,7 +1687,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - TRACE("Sent LSM label file descriptor %d to child", labelfd); - } - -- if (conf->seccomp.seccomp) { -+ if (conf && conf->seccomp.seccomp) { - ret = lxc_seccomp_recv_notifier_fd(&conf->seccomp, ipc_sockets[0]); - if (ret < 0) - goto close_mainloop; -@@ -1290,6 +1704,34 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - - *attached_process = attached_pid; - -+#ifdef HAVE_ISULAD -+ if (options->timeout > 0) { -+ ret = create_attach_timeout_thread(options->timeout, *attached_process); -+ if (ret) { -+ ERROR("Failed to create attach timeout thread for container."); -+ goto close_mainloop; -+ } -+ } -+ /* isulad: read error msg from pipe */ -+ ssize_t size_read; -+ char errbuf[BUFSIZ + 1] = {0}; -+ pid_t tmp_pid = *attached_process; -+ -+ size_read = read(conf->errpipe[0], errbuf, BUFSIZ); -+ if (size_read > 0) { -+ if (err_msg) -+ *err_msg = safe_strdup(errbuf); -+ goto close_mainloop; -+ } -+ if (options->attach_flags & LXC_ATTACH_TERMINAL) { -+ ret = lxc_mainloop_add_handler(&descr, isulad_sigfd, attach_signal_handler, &tmp_pid); -+ if (ret < 0) { -+ ERROR("Failed to add signal handler for %d to mainloop", tmp_pid); -+ goto close_mainloop; -+ } -+ } -+#endif -+ - /* Now shut down communication with child, we're done. */ - shutdown(ipc_sockets[0], SHUT_RDWR); - close(ipc_sockets[0]); -@@ -1298,6 +1740,15 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - ret_parent = 0; - to_cleanup_pid = -1; - -+ #ifdef HAVE_ISULAD -+ // iSulad: close stdin pipe if we do not want open_stdin with container stdin -+ if (!terminal.open_stdin) { -+ if (terminal.pipes[0][1] > 0) { -+ close(terminal.pipes[0][1]); -+ terminal.pipes[0][1] = -1; -+ } -+ } -+ #endif - if (options->attach_flags & LXC_ATTACH_TERMINAL) { - ret = lxc_mainloop(&descr, -1); - if (ret < 0) { -@@ -1306,9 +1757,20 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - } - } - -+#ifdef HAVE_ISULAD -+ // do lxc_mainloop to make sure we do not lose any output -+ (void)lxc_mainloop(&isulad_descr, 100); -+ if (g_attach_timeout_state == ATTACH_TIMEOUT && err_msg != NULL && *err_msg == NULL) { -+ *err_msg = safe_strdup("Attach exceeded timeout"); -+ } -+#endif - close_mainloop: -- if (options->attach_flags & LXC_ATTACH_TERMINAL) -+ if (options->attach_flags & LXC_ATTACH_TERMINAL) { -+#ifdef HAVE_ISULAD -+ lxc_mainloop_close(&isulad_descr); -+#endif - lxc_mainloop_close(&descr); -+ } - - on_error: - if (ipc_sockets[0] >= 0) { -@@ -1322,6 +1784,11 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - if (options->attach_flags & LXC_ATTACH_TERMINAL) { - lxc_terminal_delete(&terminal); - lxc_terminal_conf_free(&terminal); -+#ifdef HAVE_ISULAD -+ if (exec_command.maincmd_fd != -1) { -+ close(exec_command.maincmd_fd); -+ } -+#endif - } - - lxc_proc_put_context_info(init_ctx); -@@ -1331,10 +1798,21 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - /* close unneeded file descriptors */ - close_prot_errno_disarm(ipc_sockets[0]); - -+#ifdef HAVE_ISULAD -+ /* isulad: close errpipe */ -+ close(conf->errpipe[0]); -+ conf->errpipe[0] = -1; -+#endif -+ - if (options->attach_flags & LXC_ATTACH_TERMINAL) { -- lxc_attach_terminal_close_ptmx(&terminal); -+ lxc_attach_terminal_close_master(&terminal); - lxc_attach_terminal_close_peer(&terminal); - lxc_attach_terminal_close_log(&terminal); -+#ifdef HAVE_ISULAD -+ if (exec_command.maincmd_fd != -1) { -+ close(exec_command.maincmd_fd); -+ } -+#endif - } - - /* Wait for the parent to have setup cgroups. */ -@@ -1377,9 +1855,12 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - payload.ipc_socket = ipc_sockets[1]; - payload.options = options; - payload.init_ctx = init_ctx; -- payload.terminal_pts_fd = terminal.pts; -+ payload.terminal_slave_fd = terminal.slave; - payload.exec_function = exec_function; - payload.exec_payload = exec_payload; -+#ifdef HAVE_ISULAD -+ payload.terminal = &terminal; -+#endif - - pid = lxc_raw_clone(CLONE_PARENT, NULL); - if (pid < 0) { -@@ -1390,7 +1871,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - } - - if (pid == 0) { -- if (options->attach_flags & LXC_ATTACH_TERMINAL) { -+ if (options->attach_flags & LXC_ATTACH_TERMINAL && terminal.tty_state) { - ret = pthread_sigmask(SIG_SETMASK, - &terminal.tty_state->oldmask, NULL); - if (ret < 0) { -@@ -1406,9 +1887,9 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - _exit(EXIT_FAILURE); - } - -- if (options->attach_flags & LXC_ATTACH_TERMINAL) -- lxc_attach_terminal_close_pts(&terminal); -- -+ if (options->attach_flags & LXC_ATTACH_TERMINAL) { -+ lxc_attach_terminal_close_slave(&terminal); -+ } - /* Tell grandparent the pid of the pid of the newly created child. */ - ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid)); - if (ret != sizeof(pid)) { -@@ -1430,7 +1911,11 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - _exit(EXIT_SUCCESS); - } - -+#ifdef HAVE_ISULAD -+int lxc_attach_run_command(void *payload, int msg_fd) -+#else - int lxc_attach_run_command(void *payload) -+#endif - { - int ret = -1; - lxc_attach_command_t *cmd = payload; -@@ -1446,11 +1931,19 @@ int lxc_attach_run_command(void *payload) - break; - } - } -+#ifdef HAVE_ISULAD -+ /* isulad: write error messages */ -+ lxc_write_error_message(msg_fd, "exec: \"%s\": %s.", cmd->program, strerror(errno)); -+#endif - - return log_error_errno(ret, errno, "Failed to exec \"%s\"", cmd->program); - } - -+#ifdef HAVE_ISULAD -+int lxc_attach_run_shell(void* payload, int msg_fd) -+#else - int lxc_attach_run_shell(void* payload) -+#endif - { - __do_free char *buf = NULL; - uid_t uid; -diff --git a/src/lxc/attach.h b/src/lxc/attach.h -index ef5a6c19c..831634424 100644 ---- a/src/lxc/attach.h -+++ b/src/lxc/attach.h -@@ -20,9 +20,15 @@ struct lxc_proc_context_info { - int ns_fd[LXC_NS_MAX]; - }; - -+#ifdef HAVE_ISULAD -+extern int lxc_attach(struct lxc_container *container, -+ lxc_attach_exec_t exec_function, void *exec_payload, -+ lxc_attach_options_t *options, pid_t *attached_process, char **err_msg); -+#else - extern int lxc_attach(struct lxc_container *container, - lxc_attach_exec_t exec_function, void *exec_payload, - lxc_attach_options_t *options, pid_t *attached_process); -+#endif - - extern int lxc_attach_remount_sys_proc(void); - -diff --git a/src/lxc/attach_options.h b/src/lxc/attach_options.h -index 63e62d4ff..5767560fe 100644 ---- a/src/lxc/attach_options.h -+++ b/src/lxc/attach_options.h -@@ -26,7 +26,7 @@ enum { - - /* The following are off by default: */ - LXC_ATTACH_REMOUNT_PROC_SYS = 0x00010000, /*!< Remount /proc filesystem */ -- LXC_ATTACH_LSM_NOW = 0x00020000, /*!< TODO: currently unused */ -+ LXC_ATTACH_LSM_NOW = 0x00020000, /*!< FIXME: unknown */ - /* Set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges. */ - LXC_ATTACH_NO_NEW_PRIVS = 0x00040000, /*!< PR_SET_NO_NEW_PRIVS */ - LXC_ATTACH_TERMINAL = 0x00080000, /*!< Allocate new terminal for attached process. */ -@@ -49,7 +49,11 @@ enum { - * - * \return Function should return \c 0 on success, and any other value to denote failure. - */ -+#ifdef HAVE_ISULAD -+typedef int (*lxc_attach_exec_t)(void* payload, int msg_fd); -+#else - typedef int (*lxc_attach_exec_t)(void* payload); -+#endif - - /*! - * LXC attach options for \ref lxc_container \c attach(). -@@ -113,6 +117,12 @@ typedef struct lxc_attach_options_t { - - /*! File descriptor to log output. */ - int log_fd; -+ -+ char *init_fifo[3]; /* isulad: default fifos for the start */ -+ int64_t timeout;/* isulad: Seconds for waiting on a container to attach/exec before it is killed*/ -+ const char *suffix; -+ bool disable_pty; -+ bool open_stdin; - } lxc_attach_options_t; - - /*! Default attach options to use */ -@@ -131,6 +141,7 @@ typedef struct lxc_attach_options_t { - /* .stdout_fd = */ 1, \ - /* .stderr_fd = */ 2, \ - /* .log_fd = */ -EBADF, \ -+ /* .init_fifo = */ {NULL, NULL, NULL}, \ - } - - /*! -@@ -148,7 +159,11 @@ typedef struct lxc_attach_command_t { - * - * \return \c -1 on error, exit code of lxc_attach_command_t program on success. - */ -+#ifdef HAVE_ISULAD -+extern int lxc_attach_run_command(void* payload, int msg_fd); -+#else - extern int lxc_attach_run_command(void* payload); -+#endif - - /*! - * \brief Run a shell command in the container. -@@ -157,7 +172,11 @@ extern int lxc_attach_run_command(void* payload); - * - * \return Exit code of shell. - */ -+#ifdef HAVE_ISULAD -+extern int lxc_attach_run_shell(void* payload, int msg_fd); -+#else - extern int lxc_attach_run_shell(void* payload); -+#endif - - #ifdef __cplusplus - } -diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c -index 603940683..4a0961f13 100644 ---- a/src/lxc/cgroups/cgfsng.c -+++ b/src/lxc/cgroups/cgfsng.c -@@ -27,7 +27,6 @@ - #include - #include - #include --#include - #include - #include - -@@ -215,6 +214,7 @@ static char *read_file(const char *fnam) - return move_ptr(buf); - } - -+#ifndef HAVE_ISULAD - /* Taken over modified from the kernel sources. */ - #define NBITS 32 /* bits in uint32_t */ - #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) -@@ -477,12 +477,14 @@ static bool copy_parent_file(const char *parent_cgroup, - value, child_cgroup, file); - return true; - } -+#endif - - static inline bool is_unified_hierarchy(const struct hierarchy *h) - { - return h->version == CGROUP2_SUPER_MAGIC; - } - -+#ifndef HAVE_ISULAD - /* - * Initialize the cpuset hierarchy in first directory of @cgroup_leaf and set - * cgroup.clone_children so that children inherit settings. Since the -@@ -562,6 +564,7 @@ static int cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, - - return fret; - } -+#endif - - /* Given two null-terminated lists of strings, return true if any string is in - * both. -@@ -673,7 +676,7 @@ static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line, - if (!dup) - return NULL; - -- lxc_iterate_parts(tok, dup, sep) -+ lxc_iterate_parts (tok, dup, sep) - must_append_controller(klist, nlist, &aret, tok); - } - *p2 = ' '; -@@ -726,7 +729,6 @@ static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char - new->container_base_path = container_base_path; - new->version = type; - new->cgfd_con = -EBADF; -- new->cgfd_limit = -EBADF; - new->cgfd_mon = -EBADF; - - newentry = append_null_to_list((void ***)h); -@@ -948,6 +950,115 @@ static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist, - TRACE("named subsystem %d: %s", k, *it); - } - -+struct generic_userns_exec_data { -+ struct hierarchy **hierarchies; -+ const char *container_cgroup; -+ struct lxc_conf *conf; -+ uid_t origuid; /* target uid in parent namespace */ -+ char *path; -+}; -+ -+#ifdef HAVE_ISULAD -+ -+static int isulad_cgroup_tree_remove(struct hierarchy **hierarchies, -+ const char *container_cgroup) -+{ -+ if (!container_cgroup || !hierarchies) -+ return 0; -+ -+ for (int i = 0; hierarchies[i]; i++) { -+ struct hierarchy *h = hierarchies[i]; -+ int ret; -+ -+ if (!h->container_full_path) { -+ h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, container_cgroup, NULL); -+ } -+ -+ ret = lxc_rm_rf(h->container_full_path); -+ if (ret < 0) { -+ SYSERROR("Failed to destroy \"%s\"", h->container_full_path); -+ return -1; -+ } -+ -+ free_disarm(h->container_full_path); -+ } -+ -+ return 0; -+} -+ -+static int isulad_cgroup_tree_remove_wrapper(void *data) -+{ -+ struct generic_userns_exec_data *arg = data; -+ uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid; -+ gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid; -+ int ret; -+ -+ if (!lxc_setgroups(0, NULL) && errno != EPERM) -+ return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)"); -+ -+ ret = setresgid(nsgid, nsgid, nsgid); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)", -+ (int)nsgid, (int)nsgid, (int)nsgid); -+ -+ ret = setresuid(nsuid, nsuid, nsuid); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)", -+ (int)nsuid, (int)nsuid, (int)nsuid); -+ -+ return isulad_cgroup_tree_remove(arg->hierarchies, arg->container_cgroup); -+} -+ -+__cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ int ret; -+ -+ if (!ops) { -+ ERROR("Called with uninitialized cgroup operations"); -+ return false; -+ } -+ -+ if (!ops->hierarchies) { -+ return false; -+ } -+ -+ if (!handler) { -+ ERROR("Called with uninitialized handler"); -+ return false; -+ } -+ -+ if (!handler->conf) { -+ ERROR("Called with uninitialized conf"); -+ return false; -+ } -+ -+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX -+ ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices); -+ if (ret < 0) -+ WARN("Failed to detach bpf program from cgroup"); -+#endif -+ -+ if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) { -+ struct generic_userns_exec_data wrap = { -+ .conf = handler->conf, -+ .container_cgroup = ops->container_cgroup, -+ .hierarchies = ops->hierarchies, -+ .origuid = 0, -+ }; -+ ret = userns_exec_1(handler->conf, isulad_cgroup_tree_remove_wrapper, -+ &wrap, "cgroup_tree_remove_wrapper"); -+ } else { -+ ret = isulad_cgroup_tree_remove(ops->hierarchies, ops->container_cgroup); -+ } -+ if (ret < 0) { -+ SYSWARN("Failed to destroy cgroups"); -+ return false; -+ } -+ -+ return true; -+} -+#else - static int cgroup_tree_remove(struct hierarchy **hierarchies, - const char *container_cgroup) - { -@@ -958,29 +1069,19 @@ static int cgroup_tree_remove(struct hierarchy **hierarchies, - struct hierarchy *h = hierarchies[i]; - int ret; - -- if (!h->container_limit_path) -+ if (!h->container_full_path) - continue; - -- ret = lxc_rm_rf(h->container_limit_path); -+ ret = lxc_rm_rf(h->container_full_path); - if (ret < 0) -- WARN("Failed to destroy \"%s\"", h->container_limit_path); -+ WARN("Failed to destroy \"%s\"", h->container_full_path); - -- if (h->container_limit_path != h->container_full_path) -- free_disarm(h->container_limit_path); - free_disarm(h->container_full_path); - } - - return 0; - } - --struct generic_userns_exec_data { -- struct hierarchy **hierarchies; -- const char *container_cgroup; -- struct lxc_conf *conf; -- uid_t origuid; /* target uid in parent namespace */ -- char *path; --}; -- - static int cgroup_tree_remove_wrapper(void *data) - { - struct generic_userns_exec_data *arg = data; -@@ -1048,7 +1149,15 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops, - if (ret < 0) - SYSWARN("Failed to destroy cgroups"); - } -+#endif - -+#ifdef HAVE_ISULAD -+__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ return; -+} -+#else - __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, - struct lxc_handler *handler) - { -@@ -1082,7 +1191,6 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, - for (int i = 0; ops->hierarchies[i]; i++) { - __do_free char *pivot_path = NULL; - struct hierarchy *h = ops->hierarchies[i]; -- size_t offset; - int ret; - - if (!h->monitor_full_path) -@@ -1094,21 +1202,16 @@ __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, - goto try_lxc_rm_rf; - } - -- if (conf && conf->cgroup_meta.monitor_dir) -- pivot_path = must_make_path(h->mountpoint, h->container_base_path, -- conf->cgroup_meta.monitor_dir, CGROUP_PIVOT, NULL); -- else if (conf && conf->cgroup_meta.dir) -- pivot_path = must_make_path(h->mountpoint, h->container_base_path, -- conf->cgroup_meta.dir, CGROUP_PIVOT, NULL); -+ if (conf && conf->cgroup_meta.dir) -+ pivot_path = must_make_path(h->mountpoint, -+ h->container_base_path, -+ conf->cgroup_meta.dir, -+ CGROUP_PIVOT, NULL); - else -- pivot_path = must_make_path(h->mountpoint, h->container_base_path, -+ pivot_path = must_make_path(h->mountpoint, -+ h->container_base_path, - CGROUP_PIVOT, NULL); - -- offset = strlen(h->mountpoint) + strlen(h->container_base_path); -- -- if (cg_legacy_handle_cpuset_hierarchy(h, pivot_path + offset)) -- SYSWARN("Failed to initialize cpuset %s/" CGROUP_PIVOT, pivot_path); -- - ret = mkdir_p(pivot_path, 0755); - if (ret < 0 && errno != EEXIST) { - ERROR("Failed to create %s", pivot_path); -@@ -1127,6 +1230,15 @@ try_lxc_rm_rf: - WARN("Failed to destroy \"%s\"", h->monitor_full_path); - } - } -+#endif -+ -+#ifdef HAVE_ISULAD -+__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ return true; -+} -+#else - - static int mkdir_eexist_on_last(const char *dir, mode_t mode) - { -@@ -1150,18 +1262,16 @@ static int mkdir_eexist_on_last(const char *dir, mode_t mode) - - ret = mkdir(makeme, mode); - if (ret < 0 && ((errno != EEXIST) || (orig_len == cur_len))) -- return log_warn_errno(-1, errno, "Failed to create directory \"%s\"", makeme); -+ return log_error_errno(-1, errno, "Failed to create directory \"%s\"", makeme); - } while (tmp != dir); - - return 0; - } - --static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, -- struct hierarchy *h, const char *cgroup_tree, -- const char *cgroup_leaf, bool payload, -- const char *cgroup_limit_dir) -+static bool cgroup_tree_create(struct hierarchy *h, const char *cgroup_tree, -+ const char *cgroup_leaf, bool payload) - { -- __do_free char *path = NULL, *limit_path = NULL; -+ __do_free char *path = NULL; - int ret, ret_cpuset; - - path = must_make_path(h->mountpoint, h->container_base_path, cgroup_leaf, NULL); -@@ -1172,37 +1282,6 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, - if (ret_cpuset < 0) - return log_error_errno(false, errno, "Failed to handle legacy cpuset controller"); - -- if (payload && cgroup_limit_dir) { -- /* with isolation both parts need to not already exist */ -- limit_path = must_make_path(h->mountpoint, -- h->container_base_path, -- cgroup_limit_dir, NULL); -- -- ret = mkdir_eexist_on_last(limit_path, 0755); -- if (ret < 0) -- return log_debug_errno(false, -- errno, "Failed to create %s limiting cgroup", -- limit_path); -- -- h->cgfd_limit = lxc_open_dirfd(limit_path); -- if (h->cgfd_limit < 0) -- return log_error_errno(false, errno, -- "Failed to open %s", path); -- h->container_limit_path = move_ptr(limit_path); -- -- /* -- * With isolation the devices legacy cgroup needs to be -- * iinitialized early, as it typically contains an 'a' (all) -- * line, which is not possible once a subdirectory has been -- * created. -- */ -- if (string_in_list(h->controllers, "devices")) { -- ret = ops->setup_limits_legacy(ops, conf, true); -- if (ret < 0) -- return ret; -- } -- } -- - ret = mkdir_eexist_on_last(path, 0755); - if (ret < 0) { - /* -@@ -1211,7 +1290,7 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, - * directory for us to ensure correct initialization. - */ - if (ret_cpuset != 1 || cgroup_tree) -- return log_debug_errno(false, errno, "Failed to create %s cgroup", path); -+ return log_error_errno(false, errno, "Failed to create %s cgroup", path); - } - - if (payload) { -@@ -1219,10 +1298,6 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, - if (h->cgfd_con < 0) - return log_error_errno(false, errno, "Failed to open %s", path); - h->container_full_path = move_ptr(path); -- if (h->cgfd_limit < 0) -- h->cgfd_limit = h->cgfd_con; -- if (!h->container_limit_path) -- h->container_limit_path = h->container_full_path; - } else { - h->cgfd_mon = lxc_open_dirfd(path); - if (h->cgfd_mon < 0) -@@ -1235,15 +1310,11 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf, - - static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload) - { -- __do_free char *full_path = NULL, *__limit_path = NULL; -- char *limit_path = NULL; -+ __do_free char *full_path = NULL; - - if (payload) { - __lxc_unused __do_close int fd = move_fd(h->cgfd_con); - full_path = move_ptr(h->container_full_path); -- limit_path = move_ptr(h->container_limit_path); -- if (limit_path != full_path) -- __limit_path = limit_path; - } else { - __lxc_unused __do_close int fd = move_fd(h->cgfd_mon); - full_path = move_ptr(h->monitor_full_path); -@@ -1251,38 +1322,6 @@ static void cgroup_tree_leaf_remove(struct hierarchy *h, bool payload) - - if (full_path && rmdir(full_path)) - SYSWARN("Failed to rmdir(\"%s\") cgroup", full_path); -- if (limit_path && rmdir(limit_path)) -- SYSWARN("Failed to rmdir(\"%s\") cgroup", limit_path); --} -- --/* -- * Check we have no lxc.cgroup.dir, and that lxc.cgroup.dir.limit_prefix is a -- * proper prefix directory of lxc.cgroup.dir.payload. -- * -- * Returns the prefix length if it is set, otherwise zero on success. -- */ --static bool check_cgroup_dir_config(struct lxc_conf *conf) --{ -- const char *monitor_dir = conf->cgroup_meta.monitor_dir, -- *container_dir = conf->cgroup_meta.container_dir, -- *namespace_dir = conf->cgroup_meta.namespace_dir; -- -- /* none of the new options are set, all is fine */ -- if (!monitor_dir && !container_dir && !namespace_dir) -- return true; -- -- /* some are set, make sure lxc.cgroup.dir is not also set*/ -- if (conf->cgroup_meta.dir) -- return log_error_errno(false, EINVAL, -- "lxc.cgroup.dir conflicts with lxc.cgroup.dir.payload/monitor"); -- -- /* make sure both monitor and payload are set */ -- if (!monitor_dir || !container_dir) -- return log_error_errno(false, EINVAL, -- "lxc.cgroup.dir.payload and lxc.cgroup.dir.monitor must both be set"); -- -- /* namespace_dir may be empty */ -- return true; - } - - __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, -@@ -1293,7 +1332,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, - int idx = 0; - int i; - size_t len; -- char *suffix = NULL; -+ char *suffix; - struct lxc_conf *conf; - - if (!ops) -@@ -1310,13 +1349,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, - - conf = handler->conf; - -- if (!check_cgroup_dir_config(conf)) -- return false; -- -- if (conf->cgroup_meta.monitor_dir) { -- cgroup_tree = NULL; -- monitor_cgroup = strdup(conf->cgroup_meta.monitor_dir); -- } else if (conf->cgroup_meta.dir) { -+ if (conf->cgroup_meta.dir) { - cgroup_tree = conf->cgroup_meta.dir; - monitor_cgroup = must_concat(&len, conf->cgroup_meta.dir, "/", - DEFAULT_MONITOR_CGROUP_PREFIX, -@@ -1340,36 +1373,252 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, - if (!monitor_cgroup) - return ret_set_errno(false, ENOMEM); - -- if (!conf->cgroup_meta.monitor_dir) { -- suffix = monitor_cgroup + len - CGROUP_CREATE_RETRY_LEN; -- *suffix = '\0'; -- } -+ suffix = monitor_cgroup + len - CGROUP_CREATE_RETRY_LEN; -+ *suffix = '\0'; - do { -- if (idx && suffix) -+ if (idx) - sprintf(suffix, "-%d", idx); - - for (i = 0; ops->hierarchies[i]; i++) { -- if (cgroup_tree_create(ops, handler->conf, -- ops->hierarchies[i], cgroup_tree, -- monitor_cgroup, false, NULL)) -+ if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, monitor_cgroup, false)) - continue; - -- DEBUG("Failed to create cgroup \"%s\"", ops->hierarchies[i]->monitor_full_path ?: "(null)"); -+ ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->monitor_full_path ?: "(null)"); - for (int j = 0; j < i; j++) - cgroup_tree_leaf_remove(ops->hierarchies[j], false); - - idx++; - break; - } -- } while (ops->hierarchies[i] && idx > 0 && idx < 1000 && suffix); -+ } while (ops->hierarchies[i] && idx > 0 && idx < 1000); - -- if (idx == 1000 || (!suffix && idx != 0)) -- return log_error_errno(false, ERANGE, "Failed to create monitor cgroup"); -+ if (idx == 1000) -+ return ret_set_errno(false, ERANGE); - - ops->monitor_cgroup = move_ptr(monitor_cgroup); - return log_info(true, "The monitor process uses \"%s\" as cgroup", ops->monitor_cgroup); - } -+#endif -+ -+#ifdef HAVE_ISULAD -+ -+static bool isulad_copy_parent_file(char *path, char *file) -+{ -+ int ret; -+ int len = 0; -+ char *value = NULL; -+ char *current = NULL; -+ char *fpath = NULL; -+ char *lastslash = NULL; -+ char oldv; -+ -+ fpath = must_make_path(path, file, NULL); -+ current = read_file(fpath); -+ -+ if (current == NULL) { -+ SYSERROR("Failed to read file \"%s\"", fpath); -+ free(fpath); -+ return false; -+ } -+ -+ if (strcmp(current, "\n") != 0) { -+ free(fpath); -+ free(current); -+ return true; -+ } -+ -+ free(fpath); -+ free(current); -+ -+ lastslash = strrchr(path, '/'); -+ if (lastslash == NULL) { -+ ERROR("Failed to detect \"/\" in \"%s\"", path); -+ return false; -+ } -+ oldv = *lastslash; -+ *lastslash = '\0'; -+ fpath = must_make_path(path, file, NULL); -+ *lastslash = oldv; -+ len = lxc_read_from_file(fpath, NULL, 0); -+ if (len <= 0) -+ goto on_error; -+ -+ value = must_realloc(NULL, len + 1); -+ ret = lxc_read_from_file(fpath, value, len); -+ if (ret != len) -+ goto on_error; -+ free(fpath); -+ -+ fpath = must_make_path(path, file, NULL); -+ ret = lxc_write_to_file(fpath, value, len, false, 0666); -+ if (ret < 0) -+ SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath); -+ free(fpath); -+ free(value); -+ return ret >= 0; -+ -+on_error: -+ SYSERROR("Failed to read file \"%s\"", fpath); -+ free(fpath); -+ free(value); -+ return false; -+} -+ -+static bool build_sub_cpuset_cgroup_dir(char *cgpath) -+{ -+ int ret; -+ -+ ret = mkdir_p(cgpath, 0755); -+ if (ret < 0) { -+ if (errno != EEXIST) { -+ SYSERROR("Failed to create directory \"%s\"", cgpath); -+ return false; -+ } -+ } -+ -+ /* copy parent's settings */ -+ if (!isulad_copy_parent_file(cgpath, "cpuset.cpus")) { -+ SYSERROR("Failed to copy \"cpuset.cpus\" settings"); -+ return false; -+ } -+ -+ /* copy parent's settings */ -+ if (!isulad_copy_parent_file(cgpath, "cpuset.mems")) { -+ SYSERROR("Failed to copy \"cpuset.mems\" settings"); -+ return false; -+ } -+ -+ return true; -+} -+ -+static bool isulad_cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname) -+{ -+ char *cgpath, *slash; -+ bool sub_mk_success = false; -+ -+ if (!string_in_list(h->controllers, "cpuset")) -+ return true; -+ -+ cgname += strspn(cgname, "/"); -+ -+ slash = strchr(cgname, '/'); -+ -+ if (slash != NULL) { -+ while (slash) { -+ *slash = '\0'; -+ cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); -+ sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath); -+ free(cgpath); -+ *slash = '/'; -+ if (!sub_mk_success) { -+ return false; -+ } -+ slash = strchr(slash + 1, '/'); -+ } -+ } -+ -+ cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); -+ sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath); -+ free(cgpath); -+ if (!sub_mk_success) { -+ return false; -+ } -+ -+ return true; -+} -+ -+static int isulad_mkdir_eexist_on_last(const char *dir, mode_t mode) -+{ -+ const char *tmp = dir; -+ const char *orig = dir; -+ -+ do { -+ int ret; -+ size_t cur_len; -+ char *makeme; -+ -+ dir = tmp + strspn(tmp, "/"); -+ tmp = dir + strcspn(dir, "/"); -+ -+ errno = ENOMEM; -+ cur_len = dir - orig; -+ makeme = strndup(orig, cur_len); -+ if (!makeme) -+ return -1; -+ -+ ret = mkdir(makeme, mode); -+ if (ret < 0) { -+ if (errno != EEXIST) { -+ SYSERROR("Failed to create directory \"%s\"", makeme); -+ free(makeme); -+ return -1; -+ } -+ } -+ free(makeme); -+ -+ } while (tmp != dir); -+ -+ return 0; -+} - -+static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, int errfd) -+{ -+ int ret; -+ __do_free char *path = NULL; -+ -+ path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); -+ -+ if (file_exists(path)) { // it must not already exist -+ ERROR("Cgroup path \"%s\" already exist.", path); -+ lxc_write_error_message(errfd, "%s:%d: Cgroup path \"%s\" already exist.", -+ __FILE__, __LINE__, path); -+ return false; -+ } -+ -+ if (!isulad_cg_legacy_handle_cpuset_hierarchy(h, cgname)) { -+ ERROR("Failed to handle legacy cpuset controller"); -+ return false; -+ } -+ -+ ret = isulad_mkdir_eexist_on_last(path, 0755); -+ if (ret < 0) { -+ ERROR("Failed to create cgroup \"%s\"", path); -+ return false; -+ } -+ -+ h->cgfd_con = lxc_open_dirfd(path); -+ if (h->cgfd_con < 0) -+ return log_error_errno(false, errno, "Failed to open %s", path); -+ -+ if (h->container_full_path == NULL) { -+ h->container_full_path = move_ptr(path); -+ } -+ -+ return true; -+} -+ -+/* isulad: create hierarchies path, if fail, return the error */ -+__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ int i; -+ char *container_cgroup = ops->container_cgroup; -+ -+ if (!container_cgroup) { -+ ERROR("cgfsng_create container_cgroup is invalid"); -+ return false; -+ } -+ -+ for (i = 0; ops->hierarchies[i]; i++) { -+ if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, ops->errfd)) { -+ SYSERROR("Failed to create %s", ops->hierarchies[i]->container_full_path); -+ return false; -+ } -+ } -+ -+ return true; -+} -+#else - /* - * Try to create the same cgroup in all hierarchies. Start with cgroup_pattern; - * next cgroup_pattern-1, -2, ..., -999. -@@ -1377,14 +1626,12 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, - __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, - struct lxc_handler *handler) - { -- __do_free char *container_cgroup = NULL, -- *__cgroup_tree = NULL, -- *limiting_cgroup = NULL; -+ __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL; - const char *cgroup_tree; - int idx = 0; - int i; - size_t len; -- char *suffix = NULL; -+ char *suffix; - struct lxc_conf *conf; - - if (!ops) -@@ -1401,25 +1648,7 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, - - conf = handler->conf; - -- if (!check_cgroup_dir_config(conf)) -- return false; -- -- if (conf->cgroup_meta.container_dir) { -- cgroup_tree = NULL; -- -- limiting_cgroup = strdup(conf->cgroup_meta.container_dir); -- if (!limiting_cgroup) -- return ret_set_errno(false, ENOMEM); -- -- if (conf->cgroup_meta.namespace_dir) { -- container_cgroup = must_make_path(limiting_cgroup, -- conf->cgroup_meta.namespace_dir, -- NULL); -- } else { -- /* explicit paths but without isolation */ -- container_cgroup = move_ptr(limiting_cgroup); -- } -- } else if (conf->cgroup_meta.dir) { -+ if (conf->cgroup_meta.dir) { - cgroup_tree = conf->cgroup_meta.dir; - container_cgroup = must_concat(&len, cgroup_tree, "/", - DEFAULT_PAYLOAD_CGROUP_PREFIX, -@@ -1443,38 +1672,41 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, - if (!container_cgroup) - return ret_set_errno(false, ENOMEM); - -- if (!conf->cgroup_meta.container_dir) { -- suffix = container_cgroup + len - CGROUP_CREATE_RETRY_LEN; -- *suffix = '\0'; -- } -+ suffix = container_cgroup + len - CGROUP_CREATE_RETRY_LEN; -+ *suffix = '\0'; - do { -- if (idx && suffix) -+ if (idx) - sprintf(suffix, "-%d", idx); - - for (i = 0; ops->hierarchies[i]; i++) { -- if (cgroup_tree_create(ops, handler->conf, -- ops->hierarchies[i], cgroup_tree, -- container_cgroup, true, -- limiting_cgroup)) -+ if (cgroup_tree_create(ops->hierarchies[i], cgroup_tree, container_cgroup, true)) - continue; - -- DEBUG("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path ?: "(null)"); -+ ERROR("Failed to create cgroup \"%s\"", ops->hierarchies[i]->container_full_path ?: "(null)"); - for (int j = 0; j < i; j++) - cgroup_tree_leaf_remove(ops->hierarchies[j], true); - - idx++; - break; - } -- } while (ops->hierarchies[i] && idx > 0 && idx < 1000 && suffix); -+ } while (ops->hierarchies[i] && idx > 0 && idx < 1000); - -- if (idx == 1000 || (!suffix && idx != 0)) -- return log_error_errno(false, ERANGE, "Failed to create container cgroup"); -+ if (idx == 1000) -+ return ret_set_errno(false, ERANGE); - - ops->container_cgroup = move_ptr(container_cgroup); - INFO("The container process uses \"%s\" as cgroup", ops->container_cgroup); - return true; - } -+#endif - -+#ifdef HAVE_ISULAD -+__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ return true; -+} -+#else - __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, - struct lxc_handler *handler) - { -@@ -1526,7 +1758,58 @@ __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, - - return true; - } -+#endif - -+#ifdef HAVE_ISULAD -+__cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ int len; -+ char pidstr[INTTYPE_TO_STRLEN(pid_t)]; -+ -+ if (!ops) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!ops->hierarchies) -+ return true; -+ -+ if (!ops->container_cgroup) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!handler || !handler->conf) -+ return ret_set_errno(false, EINVAL); -+ -+ len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid); -+ -+ for (int i = 0; ops->hierarchies[i]; i++) { -+ int ret; -+ char *fullpath; -+ int retry_count = 0; -+ int max_retry = 10; -+ -+ fullpath = must_make_path(ops->hierarchies[i]->container_full_path, -+ "cgroup.procs", NULL); -+retry: -+ ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666); -+ if (ret != 0) { -+ if (retry_count < max_retry) { -+ SYSERROR("Failed to enter cgroup \"%s\" with retry count:%d", fullpath, retry_count); -+ (void)isulad_cg_legacy_handle_cpuset_hierarchy(ops->hierarchies[i], ops->container_cgroup); -+ (void)isulad_mkdir_eexist_on_last(ops->hierarchies[i]->container_full_path, 0755); -+ usleep(100 * 1000); /* 100 millisecond */ -+ retry_count++; -+ goto retry; -+ } -+ SYSERROR("Failed to enter cgroup \"%s\"", fullpath); -+ free(fullpath); -+ return false; -+ } -+ free(fullpath); -+ } -+ -+ return true; -+} -+#else - __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops, - struct lxc_handler *handler) - { -@@ -1558,6 +1841,7 @@ __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops, - - return true; - } -+#endif - - static int fchowmodat(int dirfd, const char *path, uid_t chown_uid, - gid_t chown_gid, mode_t chmod_mode) -@@ -1805,6 +2089,196 @@ static inline int cg_mount_cgroup_full(int type, struct hierarchy *h, - return __cg_mount_direct(type, h, controllerpath); - } - -+#ifdef HAVE_ISULAD -+__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, -+ struct lxc_handler *handler, -+ const char *root, int type) -+{ -+ int i, ret; -+ char *tmpfspath = NULL; -+ char *systemdpath = NULL; -+ char *unifiedpath = NULL; -+ bool has_cgns = false, retval = false, wants_force_mount = false; -+ char **merged = NULL; -+ -+ if ((type & LXC_AUTO_CGROUP_MASK) == 0) -+ return true; -+ -+ if (type & LXC_AUTO_CGROUP_FORCE) { -+ type &= ~LXC_AUTO_CGROUP_FORCE; -+ wants_force_mount = true; -+ } -+ -+ if (!wants_force_mount) { -+ if (!lxc_list_empty(&handler->conf->keepcaps)) -+ wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps); -+ else -+ wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps); -+ } -+ -+ has_cgns = cgns_supported(); -+ if (has_cgns && !wants_force_mount) -+ return true; -+ -+ if (type == LXC_AUTO_CGROUP_NOSPEC) -+ type = LXC_AUTO_CGROUP_MIXED; -+ else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC) -+ type = LXC_AUTO_CGROUP_FULL_MIXED; -+ -+ /* Mount tmpfs */ -+ tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL); -+ if (mkdir_p(tmpfspath, 0755) < 0) { -+ ERROR("Failed to create directory: %s", tmpfspath); -+ goto on_error; -+ } -+ ret = safe_mount(NULL, tmpfspath, "tmpfs", -+ MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, -+ "size=10240k,mode=755", root); -+ if (ret < 0) -+ goto on_error; -+ -+ for (i = 0; ops->hierarchies[i]; i++) { -+ char *controllerpath = NULL; -+ char *path2 = NULL; -+ struct hierarchy *h = ops->hierarchies[i]; -+ char *controller = strrchr(h->mountpoint, '/'); -+ -+ if (!controller) -+ continue; -+ controller++; -+ -+ // isulad: symlink subcgroup -+ if (strchr(controller, ',') != NULL) { -+ int pret; -+ pret = lxc_append_string(&merged, controller); -+ if (pret < 0) -+ goto on_error; -+ } -+ -+ controllerpath = must_make_path(tmpfspath, controller, NULL); -+ if (dir_exists(controllerpath)) { -+ free(controllerpath); -+ continue; -+ } -+ -+ ret = mkdir(controllerpath, 0755); -+ if (ret < 0) { -+ SYSERROR("Error creating cgroup path: %s", controllerpath); -+ free(controllerpath); -+ goto on_error; -+ } -+ -+ if (has_cgns && wants_force_mount) { -+ /* If cgroup namespaces are supported but the container -+ * will not have CAP_SYS_ADMIN after it has started we -+ * need to mount the cgroups manually. -+ */ -+ ret = cg_mount_in_cgroup_namespace(type, h, controllerpath); -+ free(controllerpath); -+ if (ret < 0) -+ goto on_error; -+ -+ continue; -+ } -+ -+ ret = cg_mount_cgroup_full(type, h, controllerpath); -+ if (ret < 0) { -+ free(controllerpath); -+ goto on_error; -+ } -+ -+ if (!cg_mount_needs_subdirs(type)) { -+ free(controllerpath); -+ continue; -+ } -+ -+ // isulad: ignore ops->container_cgroup so we will not see directory lxc after /sys/fs/cgroup/xxx in container, -+ // isulad: ignore h->container_base_path so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container -+ path2 = must_make_path(controllerpath, NULL); -+ ret = mkdir_p(path2, 0755); -+ if (ret < 0) { -+ free(controllerpath); -+ free(path2); -+ goto on_error; -+ } -+ -+ ret = cg_legacy_mount_controllers(type, h, controllerpath, -+ path2, ops->container_cgroup); -+ free(controllerpath); -+ free(path2); -+ if (ret < 0) -+ goto on_error; -+ } -+ -+ // isulad: symlink subcgroup -+ if (merged) { -+ char **mc = NULL; -+ for (mc = merged; *mc; mc++) { -+ char *token = NULL; -+ char *copy = must_copy_string(*mc); -+ lxc_iterate_parts(token, copy, ",") { -+ int mret; -+ char *link; -+ link = must_make_path(tmpfspath, token, NULL); -+ mret = symlink(*mc, link); -+ if (mret < 0 && errno != EEXIST) { -+ SYSERROR("Failed to create link %s for target %s", link, *mc); -+ free(copy); -+ free(link); -+ goto on_error; -+ } -+ free(link); -+ } -+ free(copy); -+ } -+ } -+ -+ -+ // isulad: remount /sys/fs/cgroup to readonly -+ if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_RO) { -+ ret = mount(tmpfspath, tmpfspath, "bind", -+ MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_RDONLY|MS_BIND|MS_REMOUNT, NULL); -+ if (ret < 0) { -+ SYSERROR("Failed to remount /sys/fs/cgroup."); -+ goto on_error; -+ } -+ } -+ -+ // isulad: remount /sys/fs/cgroup/systemd to readwrite for system container -+ if (handler->conf->systemd != NULL && strcmp(handler->conf->systemd, "true") == 0) { -+ // isulad: don't use the unified hierarchy for the systemd cgroup -+ unifiedpath = must_make_path(root, "/sys/fs/cgroup/unified", NULL); -+ if (dir_exists(unifiedpath)) { -+ ret = umount2(unifiedpath, MNT_DETACH); -+ if (ret < 0) { -+ SYSERROR("Failed to umount /sys/fs/cgroup/unified."); -+ goto on_error; -+ } -+ } -+ -+ systemdpath = must_make_path(root, "/sys/fs/cgroup/systemd", NULL); -+ ret = mount(systemdpath, systemdpath, "bind", -+ MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_BIND|MS_REMOUNT, NULL); -+ if (ret < 0) { -+ SYSERROR("Failed to remount /sys/fs/cgroup/systemd."); -+ goto on_error; -+ } -+ } -+ -+ retval = true; -+ -+on_error: -+ free(tmpfspath); -+ if (systemdpath != NULL) { -+ free(systemdpath); -+ } -+ if (unifiedpath != NULL) { -+ free(unifiedpath); -+ } -+ lxc_free_array((void **)merged, free); -+ return retval; -+} -+#else - __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, - struct lxc_handler *handler, - const char *root, int type) -@@ -1830,24 +2304,11 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, - wants_force_mount = true; - } - -- if (!wants_force_mount) { -+ if (!wants_force_mount){ - if (!lxc_list_empty(&handler->conf->keepcaps)) - wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps); - else - wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps); -- -- /* -- * Most recent distro versions currently have init system that -- * do support cgroup2 but do not mount it by default unless -- * explicitly told so even if the host is cgroup2 only. That -- * means they often will fail to boot. Fix this by pre-mounting -- * cgroup2 by default. We will likely need to be doing this a -- * few years until all distros have switched over to cgroup2 at -- * which point we can safely assume that their init systems -- * will mount it themselves. -- */ -- if (pure_unified_layout(ops)) -- wants_force_mount = true; - } - - has_cgns = cgns_supported(); -@@ -1930,6 +2391,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, - - return true; - } -+#endif - - /* Only root needs to escape to the cgroup of its init. */ - __cgfsng_ops static bool cgfsng_escape(const struct cgroup_ops *ops, -@@ -2046,14 +2508,78 @@ static int freezer_cgroup_events_cb(int fd, uint32_t events, void *cbdata, - return LXC_MAINLOOP_CONTINUE; - } - --static int cg_unified_freeze_do(struct cgroup_ops *ops, int timeout, -- const char *state_string, -- int state_num, -- const char *epoll_error, -- const char *wait_error) -+static int cg_unified_freeze(struct cgroup_ops *ops, int timeout) -+{ -+ __do_close int fd = -EBADF; -+ call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL; -+ int ret; -+ struct lxc_epoll_descr descr; -+ struct hierarchy *h; -+ -+ h = ops->unified; -+ if (!h) -+ return ret_set_errno(-1, ENOENT); -+ -+ if (!h->container_full_path) -+ return ret_set_errno(-1, EEXIST); -+ -+ if (timeout != 0) { -+ __do_free char *events_file = NULL; -+ -+ events_file = must_make_path(h->container_full_path, "cgroup.events", NULL); -+ fd = open(events_file, O_RDONLY | O_CLOEXEC); -+ if (fd < 0) -+ return log_error_errno(-1, errno, "Failed to open cgroup.events file"); -+ -+ ret = lxc_mainloop_open(&descr); -+ if (ret) -+ return log_error_errno(-1, errno, "Failed to create epoll instance to wait for container freeze"); -+ -+ /* automatically cleaned up now */ -+ descr_ptr = &descr; -+ -+ ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, INT_TO_PTR((int){1})); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop"); -+ } -+ -+ ret = lxc_write_openat(h->container_full_path, "cgroup.freeze", "1", 1); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to open cgroup.freeze file"); -+ -+ if (timeout != 0 && lxc_mainloop(&descr, timeout)) -+ return log_error_errno(-1, errno, "Failed to wait for container to be frozen"); -+ -+ return 0; -+} -+ -+__cgfsng_ops static int cgfsng_freeze(struct cgroup_ops *ops, int timeout) -+{ -+ if (!ops->hierarchies) -+ return ret_set_errno(-1, ENOENT); -+ -+ if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED) -+ return cg_legacy_freeze(ops); -+ -+ return cg_unified_freeze(ops, timeout); -+} -+ -+static int cg_legacy_unfreeze(struct cgroup_ops *ops) -+{ -+ struct hierarchy *h; -+ -+ h = get_hierarchy(ops, "freezer"); -+ if (!h) -+ return ret_set_errno(-1, ENOENT); -+ -+ return lxc_write_openat(h->container_full_path, "freezer.state", -+ "THAWED", STRLITERALLEN("THAWED")); -+} -+ -+static int cg_unified_unfreeze(struct cgroup_ops *ops, int timeout) - { - __do_close int fd = -EBADF; -- call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL; -+ call_cleaner(lxc_mainloop_close)struct lxc_epoll_descr *descr_ptr = NULL; - int ret; - struct lxc_epoll_descr descr; - struct hierarchy *h; -@@ -2075,63 +2601,26 @@ static int cg_unified_freeze_do(struct cgroup_ops *ops, int timeout, - - ret = lxc_mainloop_open(&descr); - if (ret) -- return log_error_errno(-1, errno, "%s", epoll_error); -+ return log_error_errno(-1, errno, "Failed to create epoll instance to wait for container unfreeze"); - - /* automatically cleaned up now */ - descr_ptr = &descr; - -- ret = lxc_mainloop_add_handler_events(&descr, fd, EPOLLPRI, freezer_cgroup_events_cb, INT_TO_PTR(state_num)); -+ ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, INT_TO_PTR((int){0})); - if (ret < 0) - return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop"); - } - -- ret = lxc_write_openat(h->container_full_path, "cgroup.freeze", state_string, 1); -+ ret = lxc_write_openat(h->container_full_path, "cgroup.freeze", "0", 1); - if (ret < 0) - return log_error_errno(-1, errno, "Failed to open cgroup.freeze file"); - - if (timeout != 0 && lxc_mainloop(&descr, timeout)) -- return log_error_errno(-1, errno, "%s", wait_error); -+ return log_error_errno(-1, errno, "Failed to wait for container to be unfrozen"); - - return 0; - } - --static int cg_unified_freeze(struct cgroup_ops *ops, int timeout) --{ -- return cg_unified_freeze_do(ops, timeout, "1", 1, -- "Failed to create epoll instance to wait for container freeze", -- "Failed to wait for container to be frozen"); --} -- --__cgfsng_ops static int cgfsng_freeze(struct cgroup_ops *ops, int timeout) --{ -- if (!ops->hierarchies) -- return ret_set_errno(-1, ENOENT); -- -- if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED) -- return cg_legacy_freeze(ops); -- -- return cg_unified_freeze(ops, timeout); --} -- --static int cg_legacy_unfreeze(struct cgroup_ops *ops) --{ -- struct hierarchy *h; -- -- h = get_hierarchy(ops, "freezer"); -- if (!h) -- return ret_set_errno(-1, ENOENT); -- -- return lxc_write_openat(h->container_full_path, "freezer.state", -- "THAWED", STRLITERALLEN("THAWED")); --} -- --static int cg_unified_unfreeze(struct cgroup_ops *ops, int timeout) --{ -- return cg_unified_freeze_do(ops, timeout, "0", 0, -- "Failed to create epoll instance to wait for container unfreeze", -- "Failed to wait for container to be unfrozen"); --} -- - __cgfsng_ops static int cgfsng_unfreeze(struct cgroup_ops *ops, int timeout) - { - if (!ops->hierarchies) -@@ -2143,8 +2632,8 @@ __cgfsng_ops static int cgfsng_unfreeze(struct cgroup_ops *ops, int timeout) - return cg_unified_unfreeze(ops, timeout); - } - --static const char *cgfsng_get_cgroup_do(struct cgroup_ops *ops, -- const char *controller, bool limiting) -+__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, -+ const char *controller) - { - struct hierarchy *h; - -@@ -2153,27 +2642,33 @@ static const char *cgfsng_get_cgroup_do(struct cgroup_ops *ops, - return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"", - controller ? controller : "(null)"); - -- if (limiting) -- return h->container_limit_path -- ? h->container_limit_path + strlen(h->mountpoint) -- : NULL; -+#ifdef HAVE_ISULAD -+ if (!h->container_full_path) -+ h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL); -+#endif - - return h->container_full_path - ? h->container_full_path + strlen(h->mountpoint) - : NULL; - } - --__cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, -+#ifdef HAVE_ISULAD -+__cgfsng_ops static const char *cgfsng_get_cgroup_full_path(struct cgroup_ops *ops, - const char *controller) - { -- return cgfsng_get_cgroup_do(ops, controller, false); --} -+ struct hierarchy *h; - --__cgfsng_ops static const char *cgfsng_get_limiting_cgroup(struct cgroup_ops *ops, -- const char *controller) --{ -- return cgfsng_get_cgroup_do(ops, controller, true); -+ h = get_hierarchy(ops, controller); -+ if (!h) -+ return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"", -+ controller ? controller : "(null)"); -+ -+ if (!h->container_full_path) -+ h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL); -+ -+ return h->container_full_path; - } -+#endif - - /* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path, - * which must be freed by the caller. -@@ -2481,6 +2976,44 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, - return true; - } - -+#ifdef HAVE_ISULAD -+__cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename, -+ char *value, size_t len, const char *name, -+ const char *lxcpath) -+{ -+ int ret = -1; -+ size_t controller_len; -+ char *controller, *p, *path; -+ struct hierarchy *h; -+ -+ controller_len = strlen(filename); -+ controller = alloca(controller_len + 1); -+ (void)strlcpy(controller, filename, controller_len + 1); -+ -+ p = strchr(controller, '.'); -+ if (p) -+ *p = '\0'; -+ -+ const char *ori_path = ops->get_cgroup(ops, controller); -+ if (ori_path == NULL) { -+ ERROR("Failed to get cgroup path:%s", controller); -+ return -1; -+ } -+ path = safe_strdup(ori_path); -+ -+ h = get_hierarchy(ops, controller); -+ if (h) { -+ char *fullpath; -+ -+ fullpath = build_full_cgpath_from_monitorpath(h, path, filename); -+ ret = lxc_read_from_file(fullpath, value, len); -+ free(fullpath); -+ } -+ free(path); -+ -+ return ret; -+} -+#else - /* Called externally (i.e. from 'lxc-cgroup') to query cgroup limits. Here we - * don't have a cgroup_data set up, so we ask the running container through the - * commands API for the cgroup path. -@@ -2503,7 +3036,7 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename, - if (p) - *p = '\0'; - -- path = lxc_cmd_get_limiting_cgroup_path(name, lxcpath, controller); -+ path = lxc_cmd_get_cgroup_path(name, lxcpath, controller); - /* not running */ - if (!path) - return -1; -@@ -2518,6 +3051,7 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename, - - return ret; - } -+#endif - - static int device_cgroup_parse_access(struct device_item *device, const char *val) - { -@@ -2544,7 +3078,7 @@ static int device_cgroup_parse_access(struct device_item *device, const char *va - return 0; - } - --static int device_cgroup_rule_parse(struct device_item *device, const char *key, -+int device_cgroup_rule_parse(struct device_item *device, const char *key, - const char *val) - { - int count, ret; -@@ -2631,6 +3165,44 @@ static int device_cgroup_rule_parse(struct device_item *device, const char *key, - return device_cgroup_parse_access(device, ++val); - } - -+#ifdef HAVE_ISULAD -+__cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, -+ const char *filename, const char *value, -+ const char *name, const char *lxcpath) -+{ -+ int ret = -1; -+ size_t controller_len; -+ char *controller, *p, *path; -+ struct hierarchy *h; -+ -+ controller_len = strlen(filename); -+ controller = alloca(controller_len + 1); -+ (void)strlcpy(controller, filename, controller_len + 1); -+ -+ p = strchr(controller, '.'); -+ if (p) -+ *p = '\0'; -+ -+ const char *ori_path = ops->get_cgroup(ops, controller); -+ if (ori_path == NULL) { -+ ERROR("Failed to get cgroup path:%s", controller); -+ return -1; -+ } -+ path = safe_strdup(ori_path); -+ -+ h = get_hierarchy(ops, controller); -+ if (h) { -+ char *fullpath; -+ -+ fullpath = build_full_cgpath_from_monitorpath(h, path, filename); -+ ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); -+ free(fullpath); -+ } -+ free(path); -+ -+ return ret; -+} -+#else - /* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits. Here we - * don't have a cgroup_data set up, so we ask the running container through the - * commands API for the cgroup path. -@@ -2668,7 +3240,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, - return 0; - } - -- path = lxc_cmd_get_limiting_cgroup_path(name, lxcpath, controller); -+ path = lxc_cmd_get_cgroup_path(name, lxcpath, controller); - /* not running */ - if (!path) - return -1; -@@ -2683,6 +3255,7 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, - - return ret; - } -+#endif - - /* take devices cgroup line - * /dev/foo rwx -@@ -2726,9 +3299,6 @@ static int device_cgroup_rule_parse_devpath(struct device_item *device, - return ret_set_errno(-1, EINVAL); - } - -- if (!mode) -- return ret_errno(EINVAL); -- - if (device_cgroup_parse_access(device, mode) < 0) - return -1; - -@@ -2777,11 +3347,12 @@ static int convert_devpath(const char *invalue, char *dest) - return 0; - } - -+#ifndef HAVE_ISULAD - /* Called from setup_limits - here we have the container's cgroup_data because - * we created the cgroups. - */ - static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, -- const char *value, bool is_cpuset) -+ const char *value) - { - __do_free char *controller = NULL; - char *p; -@@ -2807,12 +3378,116 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, - if (!h) - return log_error_errno(-ENOENT, ENOENT, "Failed to setup limits for the \"%s\" controller. The controller seems to be unused by \"cgfsng\" cgroup driver or not enabled on the cgroup hierarchy", controller); - -- if (is_cpuset) { -- int ret = lxc_write_openat(h->container_full_path, filename, value, strlen(value)); -- if (ret) -+ return lxc_write_openat(h->container_full_path, filename, value, strlen(value)); -+} -+#endif -+ -+#ifdef HAVE_ISULAD -+/* Called from setup_limits - here we have the container's cgroup_data because -+ * we created the cgroups. -+ */ -+static int isulad_cg_legacy_get_data(struct cgroup_ops *ops, const char *filename, -+ char *value, size_t len) -+{ -+ char *fullpath = NULL; -+ char *p = NULL; -+ struct hierarchy *h = NULL; -+ int ret = 0; -+ char *controller = NULL; -+ -+ len = strlen(filename); -+ if (SIZE_MAX - 1 < len) { -+ errno = EINVAL; -+ return -1; -+ } -+ controller = calloc(1, len + 1); -+ if (controller == NULL) { -+ errno = ENOMEM; -+ return -1; -+ } -+ (void)strlcpy(controller, filename, len + 1); -+ -+ p = strchr(controller, '.'); -+ if (p) -+ *p = '\0'; -+ -+ -+ h = get_hierarchy(ops, controller); -+ if (!h) { -+ ERROR("Failed to setup limits for the \"%s\" controller. " -+ "The controller seems to be unused by \"cgfsng\" cgroup " -+ "driver or not enabled on the cgroup hierarchy", -+ controller); -+ errno = ENOENT; -+ free(controller); -+ return -ENOENT; -+ } -+ -+ fullpath = must_make_path(h->container_full_path, filename, NULL); -+ ret = lxc_read_from_file(fullpath, value, len); -+ free(fullpath); -+ free(controller); -+ return ret; -+} -+ -+static int isulad_cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, -+ const char *value) -+{ -+ size_t len; -+ char *fullpath, *p; -+ /* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */ -+ char converted_value[50]; -+ struct hierarchy *h; -+ int ret = 0; -+ char *controller = NULL; -+ int retry_count = 0; -+ int max_retry = 10; -+ char *container_cgroup = ops->container_cgroup; -+ -+ len = strlen(filename); -+ controller = alloca(len + 1); -+ (void)strlcpy(controller, filename, len + 1); -+ -+ p = strchr(controller, '.'); -+ if (p) -+ *p = '\0'; -+ -+ if (strcmp("devices.allow", filename) == 0 && value[0] == '/') { -+ ret = convert_devpath(value, converted_value); -+ if (ret < 0) - return ret; -+ value = converted_value; - } -- return lxc_write_openat(h->container_limit_path, filename, value, strlen(value)); -+ -+ h = get_hierarchy(ops, controller); -+ if (!h) { -+ ERROR("Failed to setup limits for the \"%s\" controller. " -+ "The controller seems to be unused by \"cgfsng\" cgroup " -+ "driver or not enabled on the cgroup hierarchy", -+ controller); -+ errno = ENOENT; -+ return -ENOENT; -+ } -+ -+ fullpath = must_make_path(h->container_full_path, filename, NULL); -+ -+retry: -+ ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); -+ if (ret != 0) { -+ if (retry_count < max_retry) { -+ SYSERROR("setting cgroup config for ready process caused \"failed to write %s to %s\".", value, fullpath); -+ (void)isulad_cg_legacy_handle_cpuset_hierarchy(h, container_cgroup); -+ (void)isulad_mkdir_eexist_on_last(h->container_full_path, 0755); -+ usleep(100 * 1000); /* 100 millisecond */ -+ retry_count++; -+ goto retry; -+ } -+ lxc_write_error_message(ops->errfd, -+ "%s:%d: setting cgroup config for ready process caused \"failed to write %s to %s: %s\".", -+ __FILE__, __LINE__, value, fullpath, strerror(errno)); -+ } -+ free(fullpath); -+ return ret; - } - - __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, -@@ -2824,6 +3499,8 @@ __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, - struct lxc_list *iterator, *next; - struct lxc_cgroup *cg; - bool ret = false; -+ char value[21 + 1] = { 0 }; -+ long long int readvalue, setvalue; - - if (!ops) - return ret_set_errno(false, ENOENT); -@@ -2838,8 +3515,99 @@ __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, - if (!ops->hierarchies) - return ret_set_errno(false, EINVAL); - -- if (pure_unified_layout(ops)) -- return log_warn_errno(true, EINVAL, "Ignoring legacy cgroup limits on pure cgroup2 system"); -+ sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings); -+ if (!sorted_cgroup_settings) -+ return false; -+ -+ lxc_list_for_each(iterator, sorted_cgroup_settings) { -+ cg = iterator->elem; -+ -+ if (do_devices == !strncmp("devices", cg->subsystem, 7)) { -+ const char *cgvalue = cg->value; -+ if (strcmp(cg->subsystem, "files.limit") == 0) { -+ if (lxc_safe_long_long(cgvalue, &setvalue) != 0) { -+ SYSERROR("Invalid integer value %s", cgvalue); -+ goto out; -+ } -+ if (setvalue <= 0) { -+ cgvalue = "max"; -+ } -+ } -+ if (isulad_cg_legacy_set_data(ops, cg->subsystem, cgvalue)) { -+ if (do_devices && (errno == EACCES || errno == EPERM)) { -+ SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue); -+ continue; -+ } -+ SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue); -+ goto out; -+ } -+ DEBUG("Set controller \"%s\" set to \"%s\"", cg->subsystem, cgvalue); -+ } -+ -+ // isulad: check cpu shares -+ if (strcmp(cg->subsystem, "cpu.shares") == 0) { -+ if (isulad_cg_legacy_get_data(ops, cg->subsystem, value, sizeof(value) - 1) < 0) { -+ SYSERROR("Error get %s", cg->subsystem); -+ goto out; -+ } -+ trim(value); -+ if (lxc_safe_long_long(cg->value, &setvalue) != 0) { -+ SYSERROR("Invalid value %s", cg->value); -+ goto out; -+ } -+ if (lxc_safe_long_long(value, &readvalue) != 0) { -+ SYSERROR("Invalid value %s", value); -+ goto out; -+ } -+ if (setvalue > readvalue) { -+ ERROR("The maximum allowed cpu-shares is %s", value); -+ lxc_write_error_message(ops->errfd, -+ "%s:%d: setting cgroup config for ready process caused \"The maximum allowed cpu-shares is %s\".", -+ __FILE__, __LINE__, value); -+ goto out; -+ } else if (setvalue < readvalue) { -+ ERROR("The minimum allowed cpu-shares is %s", value); -+ lxc_write_error_message(ops->errfd, -+ "%s:%d: setting cgroup config for ready process caused \"The minimum allowed cpu-shares is %s\".", -+ __FILE__, __LINE__, value); -+ goto out; -+ } -+ } -+ } -+ -+ ret = true; -+ INFO("Limits for the legacy cgroup hierarchies have been setup"); -+out: -+ lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) { -+ lxc_list_del(iterator); -+ free(iterator); -+ } -+ -+ return ret; -+} -+#else -+__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, -+ struct lxc_conf *conf, -+ bool do_devices) -+{ -+ __do_free struct lxc_list *sorted_cgroup_settings = NULL; -+ struct lxc_list *cgroup_settings = &conf->cgroup; -+ struct lxc_list *iterator, *next; -+ struct lxc_cgroup *cg; -+ bool ret = false; -+ -+ if (!ops) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!conf) -+ return ret_set_errno(false, EINVAL); -+ -+ cgroup_settings = &conf->cgroup; -+ if (lxc_list_empty(cgroup_settings)) -+ return true; -+ -+ if (!ops->hierarchies) -+ return ret_set_errno(false, EINVAL); - - sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings); - if (!sorted_cgroup_settings) -@@ -2849,7 +3617,7 @@ __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, - cg = iterator->elem; - - if (do_devices == !strncmp("devices", cg->subsystem, 7)) { -- if (cg_legacy_set_data(ops, cg->subsystem, cg->value, strncmp("cpuset", cg->subsystem, 6) == 0)) { -+ if (cg_legacy_set_data(ops, cg->subsystem, cg->value)) { - if (do_devices && (errno == EACCES || errno == EPERM)) { - SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cg->value); - continue; -@@ -2871,6 +3639,7 @@ out: - - return ret; - } -+#endif - - /* - * Some of the parsing logic comes from the original cgroup device v1 -@@ -2918,12 +3687,9 @@ __cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops, - return ret_set_errno(false, EINVAL); - conf = handler->conf; - -- cgroup_settings = &conf->cgroup2; -- if (lxc_list_empty(cgroup_settings)) -+ if (lxc_list_empty(&conf->cgroup2)) - return true; -- -- if (!pure_unified_layout(ops)) -- return log_warn_errno(true, EINVAL, "Ignoring cgroup2 limits on legacy cgroup system"); -+ cgroup_settings = &conf->cgroup2; - - if (!ops->unified) - return false; -@@ -2937,7 +3703,7 @@ __cgfsng_ops static bool cgfsng_setup_limits(struct cgroup_ops *ops, - ret = bpf_device_cgroup_prepare(ops, conf, cg->subsystem, - cg->value); - } else { -- ret = lxc_write_openat(h->container_limit_path, -+ ret = lxc_write_openat(h->container_full_path, - cg->subsystem, cg->value, - strlen(cg->value)); - if (ret < 0) -@@ -3013,7 +3779,7 @@ __cgfsng_ops bool cgfsng_devices_activate(struct cgroup_ops *ops, - return log_error_errno(false, ENOMEM, "Failed to finalize bpf program"); - - ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE, -- unified->container_limit_path, -+ unified->container_full_path, - BPF_F_ALLOW_MULTI); - if (ret) - return log_error_errno(false, ENOMEM, "Failed to attach bpf program"); -@@ -3085,6 +3851,12 @@ bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup) - return true; - } - -+#ifdef HAVE_ISULAD -+__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) -+{ -+ return true; -+} -+#else - __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) - { - if (!ops) -@@ -3092,6 +3864,7 @@ __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) - - return __cgfsng_delegate_controllers(ops, ops->monitor_cgroup); - } -+#endif - - __cgfsng_ops bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops) - { -@@ -3144,7 +3917,7 @@ static void cg_unified_delegate(char ***delegate) - return; - } - -- lxc_iterate_parts(token, buf, " \t\n") { -+ lxc_iterate_parts (token, buf, " \t\n") { - /* - * We always need to chown this for both cgroup and - * cgroup2. -@@ -3192,7 +3965,6 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg - __do_free char *base_cgroup = NULL, *mountpoint = NULL; - __do_free_string_list char **controller_list = NULL; - int type; -- bool writeable; - struct hierarchy *new; - - type = get_cgroup_version(line); -@@ -3242,6 +4014,23 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg - - trim(base_cgroup); - prune_init_scope(base_cgroup); -+#ifdef HAVE_ISULAD -+ /* isulad: do not test writeable, if we run isulad in docker without cgroup namespace. -+ * the base_cgroup will be docker/XXX.., mountpoint+base_cgroup may be not exist */ -+ -+ /* -+ * reason:base cgroup may be started with /system.slice when cg_hybrid_init -+ * read /proc/1/cgroup on host, and cgroup init will set all containers -+ * cgroup path under /sys/fs/cgroup//system.slice/xxx/lxc -+ * directory, this is not consistent with docker. The default cgroup path -+ * should be under /sys/fs/cgroup//lxc directory. -+ */ -+ -+ if (strlen(base_cgroup) > 1 && base_cgroup[0] == '/') { -+ base_cgroup[1] = '\0'; -+ } -+#else -+ bool writeable; - if (type == CGROUP2_SUPER_MAGIC) - writeable = test_writeable_v2(mountpoint, base_cgroup); - else -@@ -3250,7 +4039,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg - TRACE("The %s group is not writeable", base_cgroup); - continue; - } -- -+#endif - if (type == CGROUP2_SUPER_MAGIC) { - char *cgv2_ctrl_path; - -@@ -3403,7 +4192,45 @@ static int cg_init(struct cgroup_ops *ops, struct lxc_conf *conf) - return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map)); - } - --__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops) -+#ifdef HAVE_ISULAD -+__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf) -+{ -+ const char *cgroup_pattern; -+ const char *cgroup_tree; -+ __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL; -+ size_t len; -+ -+ if (!ops) -+ return ret_set_errno(-1, ENOENT); -+ -+ /* copy system-wide cgroup information */ -+ cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern"); -+ if (cgroup_pattern && strcmp(cgroup_pattern, "") != 0) -+ ops->cgroup_pattern = must_copy_string(cgroup_pattern); -+ -+ if (conf->cgroup_meta.dir) { -+ cgroup_tree = conf->cgroup_meta.dir; -+ container_cgroup = must_concat(&len, cgroup_tree, "/", conf->name, NULL); -+ } else if (ops->cgroup_pattern) { -+ __cgroup_tree = lxc_string_replace("%n", conf->name, ops->cgroup_pattern); -+ if (!__cgroup_tree) -+ return ret_set_errno(-1, ENOMEM); -+ -+ cgroup_tree = __cgroup_tree; -+ container_cgroup = must_concat(&len, cgroup_tree, NULL); -+ } else { -+ cgroup_tree = NULL; -+ container_cgroup = must_concat(&len, conf->name, NULL); -+ } -+ if (!container_cgroup) -+ return ret_set_errno(-1, ENOMEM); -+ -+ ops->container_cgroup = move_ptr(container_cgroup); -+ -+ return 0; -+} -+#else -+__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf) - { - const char *cgroup_pattern; - -@@ -3417,6 +4244,7 @@ __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops) - - return 0; - } -+#endif - - struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) - { -@@ -3433,7 +4261,12 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) - return NULL; - - cgfsng_ops->data_init = cgfsng_data_init; -+#ifdef HAVE_ISULAD -+ cgfsng_ops->errfd = conf ? conf->errpipe[1] : -1; -+ cgfsng_ops->payload_destroy = isulad_cgfsng_payload_destroy; -+#else - cgfsng_ops->payload_destroy = cgfsng_payload_destroy; -+#endif - cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy; - cgfsng_ops->monitor_create = cgfsng_monitor_create; - cgfsng_ops->monitor_enter = cgfsng_monitor_enter; -@@ -3446,6 +4279,9 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) - cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies; - cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies; - cgfsng_ops->get_cgroup = cgfsng_get_cgroup; -+#ifdef HAVE_ISULAD -+ cgfsng_ops->get_cgroup_full_path = cgfsng_get_cgroup_full_path; -+#endif - cgfsng_ops->get = cgfsng_get; - cgfsng_ops->set = cgfsng_set; - cgfsng_ops->freeze = cgfsng_freeze; -@@ -3458,7 +4294,6 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) - cgfsng_ops->chown = cgfsng_chown; - cgfsng_ops->mount = cgfsng_mount; - cgfsng_ops->devices_activate = cgfsng_devices_activate; -- cgfsng_ops->get_limiting_cgroup = cgfsng_get_limiting_cgroup; - - return move_ptr(cgfsng_ops); - } -diff --git a/src/lxc/cgroups/cgroup.c b/src/lxc/cgroups/cgroup.c -index 7c94fd83b..ad46d5c99 100644 ---- a/src/lxc/cgroups/cgroup.c -+++ b/src/lxc/cgroups/cgroup.c -@@ -31,7 +31,7 @@ struct cgroup_ops *cgroup_init(struct lxc_conf *conf) - if (!cgroup_ops) - return log_error_errno(NULL, errno, "Failed to initialize cgroup driver"); - -- if (cgroup_ops->data_init(cgroup_ops)) { -+ if (cgroup_ops->data_init(cgroup_ops, conf)) { - cgroup_exit(cgroup_ops); - return log_error_errno(NULL, errno, - "Failed to initialize cgroup data"); -diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h -index c5bf7941a..a9048c44a 100644 ---- a/src/lxc/cgroups/cgroup.h -+++ b/src/lxc/cgroups/cgroup.h -@@ -54,11 +54,7 @@ typedef enum { - * init's cgroup (if root). - * - * @container_full_path -- * - The full path to the container's cgroup. -- * -- * @container_limit_path -- * - The full path to the container's limiting cgroup. May simply point to -- * container_full_path. -+ * - The full path to the containers cgroup. - * - * @monitor_full_path - * - The full path to the monitor's cgroup. -@@ -81,18 +77,15 @@ struct hierarchy { - char *mountpoint; - char *container_base_path; - char *container_full_path; -- char *container_limit_path; - char *monitor_full_path; - int version; - - /* cgroup2 only */ - unsigned int bpf_device_controller:1; - -- /* container cgroup fd */ -- int cgfd_con; -- /* limiting cgroup fd (may be equal to cgfd_con if not separated) */ -- int cgfd_limit; - /* monitor cgroup fd */ -+ int cgfd_con; -+ /* container cgroup fd */ - int cgfd_mon; - }; - -@@ -109,6 +102,10 @@ struct cgroup_ops { - char *container_cgroup; - char *monitor_cgroup; - -+#ifdef HAVE_ISULAD -+ int errfd; -+#endif -+ - /* @hierarchies - * - A NULL-terminated array of struct hierarchy, one per legacy - * hierarchy. No duplicates. First sufficient, writeable mounted -@@ -146,14 +143,21 @@ struct cgroup_ops { - */ - cgroup_layout_t cgroup_layout; - -- int (*data_init)(struct cgroup_ops *ops); -+ int (*data_init)(struct cgroup_ops *ops, struct lxc_conf *conf); -+#ifdef HAVE_ISULAD -+ bool (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); -+#else - void (*payload_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); -+#endif - void (*monitor_destroy)(struct cgroup_ops *ops, struct lxc_handler *handler); - bool (*monitor_create)(struct cgroup_ops *ops, struct lxc_handler *handler); - bool (*monitor_enter)(struct cgroup_ops *ops, struct lxc_handler *handler); - bool (*payload_create)(struct cgroup_ops *ops, struct lxc_handler *handler); - bool (*payload_enter)(struct cgroup_ops *ops, struct lxc_handler *handler); - const char *(*get_cgroup)(struct cgroup_ops *ops, const char *controller); -+#ifdef HAVE_ISULAD -+ const char *(*get_cgroup_full_path)(struct cgroup_ops *ops, const char *controller); -+#endif - bool (*escape)(const struct cgroup_ops *ops, struct lxc_conf *conf); - int (*num_hierarchies)(struct cgroup_ops *ops); - bool (*get_hierarchies)(struct cgroup_ops *ops, int n, char ***out); -@@ -176,7 +180,6 @@ struct cgroup_ops { - bool (*monitor_delegate_controllers)(struct cgroup_ops *ops); - bool (*payload_delegate_controllers)(struct cgroup_ops *ops); - void (*payload_finalize)(struct cgroup_ops *ops); -- const char *(*get_limiting_cgroup)(struct cgroup_ops *ops, const char *controller); - }; - - extern struct cgroup_ops *cgroup_init(struct lxc_conf *conf); -diff --git a/src/lxc/cgroups/cgroup2_devices.c b/src/lxc/cgroups/cgroup2_devices.c -index 04ba7b332..4efb28fbd 100644 ---- a/src/lxc/cgroups/cgroup2_devices.c -+++ b/src/lxc/cgroups/cgroup2_devices.c -@@ -167,7 +167,7 @@ struct bpf_program *bpf_program_new(uint32_t prog_type) - { - __do_free struct bpf_program *prog = NULL; - -- prog = zalloc(sizeof(struct bpf_program)); -+ prog = calloc(1, sizeof(struct bpf_program)); - if (!prog) - return NULL; - -@@ -183,6 +183,9 @@ struct bpf_program *bpf_program_new(uint32_t prog_type) - - int bpf_program_init(struct bpf_program *prog) - { -+ if (!prog) -+ return ret_set_errno(-1, EINVAL); -+ - const struct bpf_insn pre_insn[] = { - /* load device type to r2 */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, access_type)), -@@ -199,17 +202,19 @@ int bpf_program_init(struct bpf_program *prog) - BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, minor)), - }; - -- if (!prog) -- return ret_set_errno(-1, EINVAL); -- - return bpf_program_add_instructions(prog, pre_insn, ARRAY_SIZE(pre_insn)); - } - - int bpf_program_append_device(struct bpf_program *prog, struct device_item *device) - { -+ int ret; - int jump_nr = 1; -- int access_mask, device_type, ret; -- struct bpf_insn bpf_access_decision[2]; -+ struct bpf_insn bpf_access_decision[] = { -+ BPF_MOV64_IMM(BPF_REG_0, device->allow), -+ BPF_EXIT_INSN(), -+ }; -+ int access_mask; -+ int device_type; - - if (!prog || !device) - return ret_set_errno(-1, EINVAL); -@@ -280,8 +285,6 @@ int bpf_program_append_device(struct bpf_program *prog, struct device_item *devi - return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); - } - -- bpf_access_decision[0] = BPF_MOV64_IMM(BPF_REG_0, device->allow); -- bpf_access_decision[1] = BPF_EXIT_INSN(); - ret = bpf_program_add_instructions(prog, bpf_access_decision, - ARRAY_SIZE(bpf_access_decision)); - if (ret) -@@ -292,7 +295,10 @@ int bpf_program_append_device(struct bpf_program *prog, struct device_item *devi - - int bpf_program_finalize(struct bpf_program *prog) - { -- struct bpf_insn ins[2]; -+ struct bpf_insn ins[] = { -+ BPF_MOV64_IMM(BPF_REG_0, prog->device_list_type), -+ BPF_EXIT_INSN(), -+ }; - - if (!prog) - return ret_set_errno(-1, EINVAL); -@@ -301,9 +307,6 @@ int bpf_program_finalize(struct bpf_program *prog) - prog->device_list_type == LXC_BPF_DEVICE_CGROUP_BLACKLIST - ? "blacklist" - : "whitelist"); -- -- ins[0] = BPF_MOV64_IMM(BPF_REG_0, prog->device_list_type); -- ins[1] = BPF_EXIT_INSN(); - return bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); - } - -@@ -337,12 +340,12 @@ static int bpf_program_load_kernel(struct bpf_program *prog, char *log_buf, - int bpf_program_cgroup_attach(struct bpf_program *prog, int type, - const char *path, uint32_t flags) - { -- __do_close int fd = -EBADF; - __do_free char *copy = NULL; -+ __do_close int fd = -EBADF; - union bpf_attr attr; - int ret; - -- if (!path || !prog) -+ if (!prog) - return ret_set_errno(-1, EINVAL); - - if (flags & ~(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)) -@@ -392,8 +395,8 @@ int bpf_program_cgroup_attach(struct bpf_program *prog, int type, - - int bpf_program_cgroup_detach(struct bpf_program *prog) - { -- __do_close int fd = -EBADF; - int ret; -+ __do_close int fd = -EBADF; - - if (!prog) - return 0; -@@ -441,9 +444,6 @@ int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device) - __do_free struct device_item *new_device = NULL; - struct lxc_list *it; - -- if (!conf || !device) -- return ret_errno(EINVAL); -- - lxc_list_for_each(it, &conf->devices) { - struct device_item *cur = it->elem; - -@@ -502,11 +502,12 @@ int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device) - - bool bpf_devices_cgroup_supported(void) - { -- __do_bpf_program_free struct bpf_program *prog = NULL; - const struct bpf_insn dummy[] = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }; -+ -+ __do_bpf_program_free struct bpf_program *prog = NULL; - int ret; - - if (geteuid() != 0) -@@ -514,7 +515,7 @@ bool bpf_devices_cgroup_supported(void) - "The bpf device cgroup requires real root"); - - prog = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE); -- if (!prog) -+ if (prog < 0) - return log_trace(false, "Failed to allocate new bpf device cgroup program"); - - ret = bpf_program_add_instructions(prog, dummy, ARRAY_SIZE(dummy)); -diff --git a/src/lxc/cmd/lxc-update-config.in b/src/lxc/cmd/lxc-update-config.in -index 0a03f06d0..95187d405 100644 ---- a/src/lxc/cmd/lxc-update-config.in -+++ b/src/lxc/cmd/lxc-update-config.in -@@ -74,7 +74,7 @@ sed -i \ - -e 's/\([[:blank:]*]\|#*\)\(lxc\.stopsignal\)\([[:blank:]*]\|=\)/\1lxc\.signal\.stop\3/g' \ - -e 's/\([[:blank:]*]\|#*\)\(lxc\.syslog\)\([[:blank:]*]\|=\)/\1lxc\.log\.syslog\3/g' \ - -e 's/\([[:blank:]*]\|#*\)\(lxc\.loglevel\)\([[:blank:]*]\|=\)/\1lxc\.log\.level\3/g' \ ---e 's/\([[:blank:]*]\|#*\)\(lxc\.logfile\)\([[:blank:]*]\|=\)/\1lxc\.log\.file\3/g' \ -+-e 's/\([[:blank:]*]\|#*\)\(lxc\.logfile\)\([[:blank:]*]\|=\)/1lxc\.log\.file\3/g' \ - -e 's/\([[:blank:]*]\|#*\)\(lxc\.init_cmd\)\([[:blank:]*]\|=\)/\1lxc\.init\.cmd\3/g' \ - -e 's/\([[:blank:]*]\|#*\)\(lxc\.init_uid\)\([[:blank:]*]\|=\)/\1lxc\.init\.uid\3/g' \ - -e 's/\([[:blank:]*]\|#*\)\(lxc\.init_gid\)\([[:blank:]*]\|=\)/\1lxc\.init\.gid\3/g' \ -diff --git a/src/lxc/cmd/lxc_init.c b/src/lxc/cmd/lxc_init.c -index a03631f1a..a52793343 100644 ---- a/src/lxc/cmd/lxc_init.c -+++ b/src/lxc/cmd/lxc_init.c -@@ -28,7 +28,7 @@ - #include "initutils.h" - #include "memory_utils.h" - #include "parse.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "string_utils.h" - - /* option keys for long only options */ -diff --git a/src/lxc/cmd/lxc_monitord.c b/src/lxc/cmd/lxc_monitord.c -index bcb289ca6..3ec7a756d 100644 ---- a/src/lxc/cmd/lxc_monitord.c -+++ b/src/lxc/cmd/lxc_monitord.c -@@ -28,7 +28,7 @@ - #include "log.h" - #include "mainloop.h" - #include "monitor.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "utils.h" - - #define CLIENTFDS_CHUNK 64 -diff --git a/src/lxc/cmd/lxc_user_nic.c b/src/lxc/cmd/lxc_user_nic.c -index 4160565f3..fd3455903 100644 ---- a/src/lxc/cmd/lxc_user_nic.c -+++ b/src/lxc/cmd/lxc_user_nic.c -@@ -36,7 +36,7 @@ - #include "memory_utils.h" - #include "network.h" - #include "parse.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "string_utils.h" - #include "syscall_wrappers.h" - #include "utils.h" -@@ -133,14 +133,26 @@ static char *get_username(void) - return strdup(pwent.pw_name); - } - -+static void free_groupnames(char **groupnames) -+{ -+ int i; -+ -+ if (!groupnames) -+ return; -+ -+ for (i = 0; groupnames[i]; i++) -+ free(groupnames[i]); -+ -+ free(groupnames); -+} - - static char **get_groupnames(void) - { - __do_free char *buf = NULL; - __do_free gid_t *group_ids = NULL; -- __do_free_string_list char **groupnames = NULL; - int ngroups; - int ret, i; -+ char **groupnames; - struct group grent; - struct group *grentp = NULL; - size_t bufsize; -@@ -149,10 +161,9 @@ static char **get_groupnames(void) - if (ngroups < 0) { - CMD_SYSERROR("Failed to get number of groups the user belongs to\n"); - return NULL; -- } -- -- if (ngroups == 0) -+ } else if (ngroups == 0) { - return NULL; -+ } - - group_ids = malloc(sizeof(gid_t) * ngroups); - if (!group_ids) { -@@ -166,53 +177,66 @@ static char **get_groupnames(void) - return NULL; - } - -- groupnames = zalloc(sizeof(char *) * (ngroups + 1)); -+ groupnames = malloc(sizeof(char *) * (ngroups + 1)); - if (!groupnames) { - CMD_SYSERROR("Failed to allocate memory while getting group names\n"); - return NULL; - } - -+ memset(groupnames, 0, sizeof(char *) * (ngroups + 1)); -+ - bufsize = sysconf(_SC_GETGR_R_SIZE_MAX); - if (bufsize == -1) - bufsize = 1024; - - buf = malloc(bufsize); - if (!buf) { -+ free_groupnames(groupnames); - CMD_SYSERROR("Failed to allocate memory while getting group names\n"); - return NULL; - } - - for (i = 0; i < ngroups; i++) { - while ((ret = getgrgid_r(group_ids[i], &grent, buf, bufsize, &grentp)) == ERANGE) { -- char *new_buf; -- - bufsize <<= 1; - if (bufsize > MAX_GRBUF_SIZE) { -- usernic_error("Failed to get group members: %u\n", group_ids[i]); -+ usernic_error("Failed to get group members: %u\n", -+ group_ids[i]); -+ free(buf); -+ free(group_ids); -+ free_groupnames(groupnames); - return NULL; - } -- -- new_buf = realloc(buf, bufsize); -+ char *new_buf = realloc(buf, bufsize); - if (!new_buf) { -- usernic_error("Failed to allocate memory while getting group names: %s\n", -+ usernic_error("Failed to allocate memory while getting group " -+ "names: %s\n", - strerror(errno)); -+ free(buf); -+ free(group_ids); -+ free_groupnames(groupnames); - return NULL; - } - buf = new_buf; - } -+ if (!grentp) { -+ if (ret == 0) -+ usernic_error("%s", "Could not find matched group record\n"); - -- /* If a group is not found, just ignore it. */ -- if (!grentp) -- continue; -+ CMD_SYSERROR("Failed to get group name: %u\n", group_ids[i]); -+ free_groupnames(groupnames); -+ return NULL; -+ } - - groupnames[i] = strdup(grent.gr_name); - if (!groupnames[i]) { - usernic_error("Failed to copy group name \"%s\"", grent.gr_name); -+ free_groupnames(groupnames); - return NULL; - } - } - -- return move_ptr(groupnames); -+ return groupnames; - } - - static bool name_is_in_groupnames(char *name, char **groupnames) -@@ -301,9 +325,9 @@ static int get_alloted(char *me, char *intype, char *link, - { - __do_free char *line = NULL; - __do_fclose FILE *fin = NULL; -- __do_free_string_list char **groups = NULL; - int n, ret; - char name[100], type[100], br[100]; -+ char **groups; - int count = 0; - size_t len = 0; - -@@ -355,6 +379,8 @@ static int get_alloted(char *me, char *intype, char *link, - count += n; - } - -+ free_groupnames(groups); -+ - /* Now return the total number of nics that this user can create. */ - return count; - } -diff --git a/src/lxc/cmd/lxc_usernsexec.c b/src/lxc/cmd/lxc_usernsexec.c -index aee7448ce..6441fb3c8 100644 ---- a/src/lxc/cmd/lxc_usernsexec.c -+++ b/src/lxc/cmd/lxc_usernsexec.c -@@ -61,7 +61,7 @@ static void opentty(const char *tty, int which) - - fd = open(tty, O_RDWR | O_NONBLOCK); - if (fd < 0) { -- CMD_SYSINFO("Failed to open tty"); -+ CMD_SYSERROR("Failed to open tty"); - return; - } - -@@ -87,13 +87,13 @@ static int do_child(void *vargv) - int ret; - char **argv = (char **)vargv; - -- if (!lxc_setgroups(0, NULL)) -- return -1; -- - /* Assume we want to become root */ - if (!lxc_switch_uid_gid(0, 0)) - return -1; - -+ if (!lxc_setgroups(0, NULL)) -+ return -1; -+ - ret = unshare(CLONE_NEWNS); - if (ret < 0) { - CMD_SYSERROR("Failed to unshare mount namespace"); -@@ -103,7 +103,7 @@ static int do_child(void *vargv) - if (detect_shared_rootfs()) { - ret = mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL); - if (ret < 0) { -- CMD_SYSINFO("Failed to recursively turn root mount tree into dependent mount"); -+ CMD_SYSINFO("Failed to make \"/\" rslave"); - return -1; - } - } -diff --git a/src/lxc/commands.c b/src/lxc/commands.c -index b6ae101fc..37354e87c 100644 ---- a/src/lxc/commands.c -+++ b/src/lxc/commands.c -@@ -75,8 +75,8 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) - [LXC_CMD_GET_CONFIG_ITEM] = "get_config_item", - [LXC_CMD_GET_NAME] = "get_name", - [LXC_CMD_GET_LXCPATH] = "get_lxcpath", -- [LXC_CMD_ADD_STATE_CLIENT] = "add_state_client", -- [LXC_CMD_CONSOLE_LOG] = "console_log", -+ [LXC_CMD_ADD_STATE_CLIENT] = "add_state_client", -+ [LXC_CMD_CONSOLE_LOG] = "console_log", - [LXC_CMD_SERVE_STATE_CLIENTS] = "serve_state_clients", - [LXC_CMD_SECCOMP_NOTIFY_ADD_LISTENER] = "seccomp_notify_add_listener", - [LXC_CMD_ADD_BPF_DEVICE_CGROUP] = "add_bpf_device_cgroup", -@@ -84,8 +84,10 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) - [LXC_CMD_UNFREEZE] = "unfreeze", - [LXC_CMD_GET_CGROUP2_FD] = "get_cgroup2_fd", - [LXC_CMD_GET_INIT_PIDFD] = "get_init_pidfd", -- [LXC_CMD_GET_LIMITING_CGROUP] = "get_limiting_cgroup", -- [LXC_CMD_GET_LIMITING_CGROUP2_FD] = "get_limiting_cgroup2_fd", -+#ifdef HAVE_ISULAD -+ [LXC_CMD_SET_TERMINAL_FIFOS] = "set_terminal_fifos", -+ [LXC_CMD_SET_TERMINAL_WINCH] = "set_terminal_winch", -+#endif - }; - - if (cmd >= LXC_CMD_MAX) -@@ -108,7 +110,7 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) - * stored directly in data and datalen will be 0. - * - * As a special case, the response for LXC_CMD_CONSOLE is created -- * here as it contains an fd for the ptmx pty passed through the -+ * here as it contains an fd for the master pty passed through the - * unix socket. - */ - static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) -@@ -117,7 +119,15 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) - int ret; - struct lxc_cmd_rsp *rsp = &cmd->rsp; - -+#ifdef HAVE_ISULAD -+ /*isulad: add timeout 1s to avoid long block due to [lxc monitor] error*/ -+ ret = lxc_abstract_unix_recv_fds_timeout(sock, &fd_rsp, 1, rsp, sizeof(*rsp), 1000 * 1000); -+ if (ret < 0 && (errno == ECONNRESET || errno == EAGAIN || errno == EWOULDBLOCK)) { -+ errno = ECONNRESET; /*isulad set errno ECONNRESET when timeout */ -+ } -+#else - ret = lxc_abstract_unix_recv_fds(sock, &fd_rsp, 1, rsp, sizeof(*rsp)); -+#endif - if (ret < 0) - return log_warn_errno(-1, - errno, "Failed to receive response for command \"%s\"", -@@ -139,14 +149,12 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) - ENOMEM, "Failed to receive response for command \"%s\"", - lxc_cmd_str(cmd->req.cmd)); - -- rspdata->ptmxfd = move_fd(fd_rsp); -+ rspdata->masterfd = move_fd(fd_rsp); - rspdata->ttynum = PTR_TO_INT(rsp->data); - rsp->data = rspdata; - } - -- if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD || -- cmd->req.cmd == LXC_CMD_GET_LIMITING_CGROUP2_FD) -- { -+ if (cmd->req.cmd == LXC_CMD_GET_CGROUP2_FD) { - int cgroup2_fd = move_fd(fd_rsp); - rsp->data = INT_TO_PTR(cgroup2_fd); - } -@@ -487,14 +495,25 @@ static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, - return 0; - } - --static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath, -- const char *subsystem, -- lxc_cmd_t command) -+/* -+ * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a -+ * particular subsystem. This is the cgroup path relative to the root -+ * of the cgroup filesystem. -+ * -+ * @name : name of container to connect to -+ * @lxcpath : the lxcpath in which the container is running -+ * @subsystem : the subsystem being asked about -+ * -+ * Returns the path on success, NULL on failure. The caller must free() the -+ * returned path. -+ */ -+char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, -+ const char *subsystem) - { - int ret, stopped; - struct lxc_cmd_rr cmd = { - .req = { -- .cmd = command, -+ .cmd = LXC_CMD_GET_CGROUP, - .data = subsystem, - .datalen = 0, - }, -@@ -509,21 +528,8 @@ static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath, - if (ret < 0) - return NULL; - -- if (ret == 0) { -- if (command == LXC_CMD_GET_LIMITING_CGROUP) { -- /* -- * This may indicate that the container was started -- * under an ealier version before -- * `cgroup_advanced_isolation` as implemented, there -- * it sees an unknown command and just closes the -- * socket, sending us an EOF. -- */ -- return lxc_cmd_get_cgroup_path_do(name, lxcpath, -- subsystem, -- LXC_CMD_GET_CGROUP); -- } -+ if (ret == 0) - return NULL; -- } - - if (cmd.rsp.ret < 0 || cmd.rsp.datalen < 0) - return NULL; -@@ -531,72 +537,24 @@ static char *lxc_cmd_get_cgroup_path_do(const char *name, const char *lxcpath, - return cmd.rsp.data; - } - --/* -- * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a -- * particular subsystem. This is the cgroup path relative to the root -- * of the cgroup filesystem. -- * -- * @name : name of container to connect to -- * @lxcpath : the lxcpath in which the container is running -- * @subsystem : the subsystem being asked about -- * -- * Returns the path on success, NULL on failure. The caller must free() the -- * returned path. -- */ --char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, -- const char *subsystem) --{ -- return lxc_cmd_get_cgroup_path_do(name, lxcpath, subsystem, -- LXC_CMD_GET_CGROUP); --} -- --/* -- * lxc_cmd_get_limiting_cgroup_path: Calculate a container's limiting cgroup -- * path for a particular subsystem. This is the cgroup path relative to the -- * root of the cgroup filesystem. This may be the same as the path returned by -- * lxc_cmd_get_cgroup_path if the container doesn't have a limiting path prefix -- * set. -- * -- * @name : name of container to connect to -- * @lxcpath : the lxcpath in which the container is running -- * @subsystem : the subsystem being asked about -- * -- * Returns the path on success, NULL on failure. The caller must free() the -- * returned path. -- */ --char *lxc_cmd_get_limiting_cgroup_path(const char *name, const char *lxcpath, -- const char *subsystem) --{ -- return lxc_cmd_get_cgroup_path_do(name, lxcpath, subsystem, -- LXC_CMD_GET_LIMITING_CGROUP); --} -- --static int lxc_cmd_get_cgroup_callback_do(int fd, struct lxc_cmd_req *req, -- struct lxc_handler *handler, -- struct lxc_epoll_descr *descr, -- bool limiting_cgroup) -+static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, -+ struct lxc_handler *handler, -+ struct lxc_epoll_descr *descr) - { - int ret; - const char *path; -- const void *reqdata; - struct lxc_cmd_rsp rsp; - struct cgroup_ops *cgroup_ops = handler->cgroup_ops; -- const char *(*get_fn)(struct cgroup_ops *ops, const char *controller); - - if (req->datalen > 0) { - ret = validate_string_request(fd, req); - if (ret != 0) - return ret; -- reqdata = req->data; -+ -+ path = cgroup_ops->get_cgroup(cgroup_ops, req->data); - } else { -- reqdata = NULL; -+ path = cgroup_ops->get_cgroup(cgroup_ops, NULL); - } -- -- get_fn = (limiting_cgroup ? cgroup_ops->get_cgroup -- : cgroup_ops->get_limiting_cgroup); -- -- path = get_fn(cgroup_ops, reqdata); -- - if (!path) - return -1; - -@@ -611,20 +569,6 @@ static int lxc_cmd_get_cgroup_callback_do(int fd, struct lxc_cmd_req *req, - return 0; - } - --static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, -- struct lxc_handler *handler, -- struct lxc_epoll_descr *descr) --{ -- return lxc_cmd_get_cgroup_callback_do(fd, req, handler, descr, false); --} -- --static int lxc_cmd_get_limiting_cgroup_callback(int fd, struct lxc_cmd_req *req, -- struct lxc_handler *handler, -- struct lxc_epoll_descr *descr) --{ -- return ret_errno(ENOSYS); --} -- - /* - * lxc_cmd_get_config_item: Get config item the running container - * -@@ -844,7 +788,7 @@ static int lxc_cmd_terminal_winch_callback(int fd, struct lxc_cmd_req *req, - * @name : name of container to connect to - * @ttynum : in: the tty to open or -1 for next available - * : out: the tty allocated -- * @fd : out: file descriptor for ptmx side of pty -+ * @fd : out: file descriptor for master side of pty - * @lxcpath : the lxcpath in which the container is running - * - * Returns fd holding tty allocated on success, < 0 on failure -@@ -871,11 +815,11 @@ int lxc_cmd_console(const char *name, int *ttynum, int *fd, const char *lxcpath) - if (ret == 0) - return log_error(-1, "tty number %d invalid, busy or all ttys busy", *ttynum); - -- if (rspdata->ptmxfd < 0) -+ if (rspdata->masterfd < 0) - return log_error(-1, "Unable to allocate fd for tty %d", rspdata->ttynum); - - ret = cmd.rsp.ret; /* socket fd */ -- *fd = rspdata->ptmxfd; -+ *fd = rspdata->masterfd; - *ttynum = rspdata->ttynum; - - return log_info(ret, "Alloced fd %d for tty %d via socket %d", *fd, rspdata->ttynum, ret); -@@ -885,17 +829,17 @@ static int lxc_cmd_console_callback(int fd, struct lxc_cmd_req *req, - struct lxc_handler *handler, - struct lxc_epoll_descr *descr) - { -- int ptmxfd, ret; -+ int masterfd, ret; - struct lxc_cmd_rsp rsp; - int ttynum = PTR_TO_INT(req->data); - -- ptmxfd = lxc_terminal_allocate(handler->conf, fd, &ttynum); -- if (ptmxfd < 0) -+ masterfd = lxc_terminal_allocate(handler->conf, fd, &ttynum); -+ if (masterfd < 0) - return LXC_CMD_REAP_CLIENT_FD; - - memset(&rsp, 0, sizeof(rsp)); - rsp.data = INT_TO_PTR(ttynum); -- ret = lxc_abstract_unix_send_fds(fd, &ptmxfd, 1, &rsp, sizeof(rsp)); -+ ret = lxc_abstract_unix_send_fds(fd, &masterfd, 1, &rsp, sizeof(rsp)); - if (ret < 0) { - lxc_terminal_free(handler->conf, fd); - return log_error_errno(LXC_CMD_REAP_CLIENT_FD, errno, -@@ -1434,47 +1378,146 @@ int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath) - return PTR_TO_INT(cmd.rsp.data); - } - --static int lxc_cmd_get_cgroup2_fd_callback_do(int fd, struct lxc_cmd_req *req, -- struct lxc_handler *handler, -- struct lxc_epoll_descr *descr, -- bool limiting_cgroup) -+static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req, -+ struct lxc_handler *handler, -+ struct lxc_epoll_descr *descr) - { - struct lxc_cmd_rsp rsp = { - .ret = -EINVAL, - }; - struct cgroup_ops *ops = handler->cgroup_ops; -- int ret, send_fd; -+ int ret; - - if (!pure_unified_layout(ops) || !ops->unified) - return lxc_cmd_rsp_send(fd, &rsp); - -- send_fd = limiting_cgroup ? ops->unified->cgfd_limit -- : ops->unified->cgfd_con; -- - rsp.ret = 0; -- ret = lxc_abstract_unix_send_fds(fd, &send_fd, 1, &rsp, sizeof(rsp)); -+ ret = lxc_abstract_unix_send_fds(fd, &ops->unified->cgfd_con, 1, &rsp, -+ sizeof(rsp)); - if (ret < 0) - return log_error(LXC_CMD_REAP_CLIENT_FD, "Failed to send cgroup2 fd"); - - return 0; - } - --static int lxc_cmd_get_cgroup2_fd_callback(int fd, struct lxc_cmd_req *req, -- struct lxc_handler *handler, -- struct lxc_epoll_descr *descr) -+#ifdef HAVE_ISULAD -+/* -+ * isulad: lxc_cmd_set_terminal_fifos: Set the fifos used for the container as terminal input/output -+ * -+ * @hashed_sock_name: hashed socket name -+ * -+ * Returns 0 when success, else when fail. -+ */ -+int lxc_cmd_set_terminal_fifos(const char *name, const char *lxcpath, const char *in_fifo, -+ const char *out_fifo, const char *err_fifo) - { -- return lxc_cmd_get_cgroup2_fd_callback_do(fd, req, handler, descr, -- false); -+ int ret = 0, stopped = 0; -+ int len = 0; -+ char *tmp = NULL; -+ const char *split = "&&&&", *none_fifo_name = "none"; -+ const char *cmd_in_fifo = in_fifo ? in_fifo : none_fifo_name; -+ const char *cmd_out_fifo = out_fifo ? out_fifo : none_fifo_name; -+ const char *cmd_err_fifo = err_fifo ? err_fifo : none_fifo_name; -+ -+ if (len + strlen(cmd_in_fifo) + strlen(split) + strlen(cmd_out_fifo) + -+ strlen(split) + strlen(cmd_err_fifo) == SIZE_MAX) -+ return -1; -+ len += strlen(cmd_in_fifo) + strlen(split) + strlen(cmd_out_fifo) + strlen(split) + strlen(cmd_err_fifo) + 1; -+ tmp = malloc(len); -+ if (tmp == NULL) -+ return -1; -+ ret = snprintf(tmp, len, "%s%s%s%s%s", cmd_in_fifo, split, cmd_out_fifo, split, cmd_err_fifo); -+ if (ret < 0 || ret >= len) { -+ ERROR("Failed to snprintf in fifo of command"); -+ free(tmp); -+ return -1; -+ } -+ -+ struct lxc_cmd_rr cmd = { -+ .req = { -+ .cmd = LXC_CMD_SET_TERMINAL_FIFOS, -+ .datalen = strlen(tmp)+1, -+ .data = tmp, -+ }, -+ }; -+ -+ ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); -+ if (ret < 0) { -+ ERROR("Failed to send command to container"); -+ free(tmp); -+ return -1; -+ } -+ -+ if (cmd.rsp.ret != 0) { -+ ERROR("Command response error:%d", cmd.rsp.ret); -+ free(tmp); -+ return -1; -+ } -+ -+ free(tmp); -+ return 0; - } - --static int lxc_cmd_get_limiting_cgroup2_fd_callback(int fd, -- struct lxc_cmd_req *req, -- struct lxc_handler *handler, -- struct lxc_epoll_descr *descr) -+static int lxc_cmd_set_terminal_fifos_callback(int fd, struct lxc_cmd_req *req, -+ struct lxc_handler *handler, struct lxc_epoll_descr *descr) - { -- return ret_errno(ENOSYS); -+ struct lxc_cmd_rsp rsp; -+ memset(&rsp, 0, sizeof(rsp)); -+ -+ rsp.ret = lxc_terminal_add_fifos(handler->conf, req->data);; -+ -+ return lxc_cmd_rsp_send(fd, &rsp); -+} -+ -+struct lxc_cmd_set_terminal_winch_request { -+ unsigned int height; -+ unsigned int width; -+}; -+ -+int lxc_cmd_set_terminal_winch(const char *name, const char *lxcpath, unsigned int height, unsigned int width) -+{ -+ int ret = 0, stopped = 0; -+ struct lxc_cmd_set_terminal_winch_request data = { 0 }; -+ -+ data.height = height; -+ data.width = width; -+ -+ struct lxc_cmd_rr cmd = { -+ .req = { -+ .cmd = LXC_CMD_SET_TERMINAL_WINCH, -+ .datalen = sizeof(struct lxc_cmd_set_terminal_winch_request), -+ .data = &data, -+ }, -+ }; -+ -+ ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); -+ if (ret < 0) { -+ ERROR("Failed to send command to container"); -+ return -1; -+ } -+ -+ if (cmd.rsp.ret != 0) { -+ ERROR("Command response error:%d", cmd.rsp.ret); -+ return -1; -+ } -+ return 0; - } - -+static int lxc_cmd_set_terminal_winch_callback(int fd, struct lxc_cmd_req *req, -+ struct lxc_handler *handler, struct lxc_epoll_descr *descr) -+{ -+ struct lxc_cmd_rsp rsp; -+ struct lxc_cmd_set_terminal_winch_request *data = (struct lxc_cmd_set_terminal_winch_request *)(req->data); -+ memset(&rsp, 0, sizeof(rsp)); -+ -+ rsp.ret = lxc_set_terminal_winsz(&handler->conf->console, data->height, data->width);; -+ -+ return lxc_cmd_rsp_send(fd, &rsp); -+ -+} -+ -+#endif -+ - static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, - struct lxc_handler *handler, - struct lxc_epoll_descr *descr) -@@ -1502,12 +1545,14 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, - [LXC_CMD_UNFREEZE] = lxc_cmd_unfreeze_callback, - [LXC_CMD_GET_CGROUP2_FD] = lxc_cmd_get_cgroup2_fd_callback, - [LXC_CMD_GET_INIT_PIDFD] = lxc_cmd_get_init_pidfd_callback, -- [LXC_CMD_GET_LIMITING_CGROUP] = lxc_cmd_get_limiting_cgroup_callback, -- [LXC_CMD_GET_LIMITING_CGROUP2_FD] = lxc_cmd_get_limiting_cgroup2_fd_callback, -+#ifdef HAVE_ISULAD -+ [LXC_CMD_SET_TERMINAL_FIFOS] = lxc_cmd_set_terminal_fifos_callback, -+ [LXC_CMD_SET_TERMINAL_WINCH] = lxc_cmd_set_terminal_winch_callback, -+#endif - }; - - if (req->cmd >= LXC_CMD_MAX) -- return log_trace_errno(-1, EINVAL, "Invalid command id %d", req->cmd); -+ return log_error_errno(-1, ENOENT, "Undefined command id %d", req->cmd); - - return cb[req->cmd](fd, req, handler, descr); - } -diff --git a/src/lxc/commands.h b/src/lxc/commands.h -index 3624a1497..aa8289d7a 100644 ---- a/src/lxc/commands.h -+++ b/src/lxc/commands.h -@@ -38,8 +38,10 @@ typedef enum { - LXC_CMD_UNFREEZE, - LXC_CMD_GET_CGROUP2_FD, - LXC_CMD_GET_INIT_PIDFD, -- LXC_CMD_GET_LIMITING_CGROUP, -- LXC_CMD_GET_LIMITING_CGROUP2_FD, -+#ifdef HAVE_ISULAD -+ LXC_CMD_SET_TERMINAL_FIFOS, -+ LXC_CMD_SET_TERMINAL_WINCH, -+#endif - LXC_CMD_MAX, - } lxc_cmd_t; - -@@ -61,7 +63,7 @@ struct lxc_cmd_rr { - }; - - struct lxc_cmd_console_rsp_data { -- int ptmxfd; -+ int masterfd; - int ttynum; - }; - -@@ -131,9 +133,11 @@ extern int lxc_cmd_add_bpf_device_cgroup(const char *name, const char *lxcpath, - extern int lxc_cmd_freeze(const char *name, const char *lxcpath, int timeout); - extern int lxc_cmd_unfreeze(const char *name, const char *lxcpath, int timeout); - extern int lxc_cmd_get_cgroup2_fd(const char *name, const char *lxcpath); --extern char *lxc_cmd_get_limiting_cgroup_path(const char *name, -- const char *lxcpath, -- const char *subsystem); --extern int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath); -+ -+#ifdef HAVE_ISULAD -+extern int lxc_cmd_set_terminal_fifos(const char *name, const char *lxcpath, -+ const char *in_fifo, const char *out_fifo, const char *err_fifo); -+extern int lxc_cmd_set_terminal_winch(const char *name, const char *lxcpath, unsigned int height, unsigned int width); -+#endif - - #endif /* __commands_h */ -diff --git a/src/lxc/commands_utils.c b/src/lxc/commands_utils.c -index 2af722ca1..2f2670d74 100644 ---- a/src/lxc/commands_utils.c -+++ b/src/lxc/commands_utils.c -@@ -62,14 +62,11 @@ int lxc_cmd_sock_get_state(const char *name, const char *lxcpath, - - ret = lxc_cmd_add_state_client(name, lxcpath, states, &state_client_fd); - if (ret < 0) -- return ret_errno(EINVAL); -+ return -1; - - if (ret < MAX_STATE) - return ret; - -- if (state_client_fd < 0) -- return ret_errno(EBADF); -- - return lxc_cmd_sock_rcv_state(state_client_fd, timeout); - } - -diff --git a/src/lxc/compiler.h b/src/lxc/compiler.h -index 114fb81ba..92cd9fd14 100644 ---- a/src/lxc/compiler.h -+++ b/src/lxc/compiler.h -@@ -57,22 +57,4 @@ - - #define __cgfsng_ops - --/* access attribute */ --#define __access_r(x, y) --#define __access_w(x, y) --#define __access_rw(x, y) -- --#ifdef __has_attribute --#if __has_attribute(access) --#undef __access_r --#define __access_r(x, y) __attribute__((access(read_only, x, y))) -- --#undef __access_w --#define __access_w(x, y) __attribute__((access(write_only, x, y))) -- --#undef __access_rw --#define __access_rw(x, y) __attribute__((access(read_write, x, y))) --#endif --#endif -- - #endif /* __LXC_COMPILER_H */ -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 00789961c..0744c19b3 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -33,6 +33,11 @@ - #include - #include - -+#ifdef HAVE_ISULAD -+#include -+#include "sync.h" -+#endif -+ - #include "af_unix.h" - #include "caps.h" - #include "cgroup.h" -@@ -51,15 +56,18 @@ - #include "namespace.h" - #include "network.h" - #include "parse.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "ringbuf.h" - #include "start.h" - #include "storage.h" - #include "storage/overlay.h" - #include "syscall_wrappers.h" - #include "terminal.h" -+#include "loop.h" - #include "utils.h" - #include "uuid.h" -+#include "path.h" -+#include "utils.h" - - #ifdef MAJOR_IN_MKDEV - #include -@@ -118,7 +126,14 @@ char *lxchook_names[NUM_LXC_HOOKS] = { - "post-stop", - "clone", - "destroy", -+#ifdef HAVE_ISULAD -+ "start-host", -+ "oci-prestart", -+ "oci-poststart", -+ "oci-poststop" -+#else - "start-host" -+#endif - }; - - struct mount_opt { -@@ -637,8 +652,13 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha - { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL }, - { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL }, - { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL }, -+ #ifdef HAVE_ISULAD -+ { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL }, -+ { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY|MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL }, -+ #else - { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL }, - { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL }, -+ #endif - { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL }, - { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL }, - { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL }, -@@ -670,6 +690,13 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha - if (!destination) - return -1; - -+#ifdef HAVE_ISULAD -+ if (mkdir_p(destination, 0755) < 0) { -+ SYSERROR("Failed to create mount target '%s'", destination); -+ return log_error(-1, "Failed to mkdir destination %s", destination); -+ } -+#endif -+ - mflags = add_required_remount_flags(source, destination, - default_mounts[i].flags); - r = safe_mount(source, destination, default_mounts[i].fstype, -@@ -901,13 +928,11 @@ static int lxc_setup_ttys(struct lxc_conf *conf) - return 0; - } - --define_cleanup_function(struct lxc_tty_info *, lxc_delete_tty); -- - int lxc_allocate_ttys(struct lxc_conf *conf) - { -- struct lxc_terminal_info *tty_new = NULL; -+ __do_free struct lxc_terminal_info *tty_new = NULL; - int ret; -- call_cleaner(lxc_delete_tty) struct lxc_tty_info *ttys = &conf->ttys; -+ struct lxc_tty_info *ttys = &conf->ttys; - - /* no tty in the configuration */ - if (ttys->max == 0) -@@ -921,39 +946,41 @@ int lxc_allocate_ttys(struct lxc_conf *conf) - for (size_t i = 0; i < ttys->max; i++) { - struct lxc_terminal_info *tty = &ttys->tty[i]; - -- tty->ptmx = -EBADF; -- tty->pts = -EBADF; -- ret = openpty(&tty->ptmx, &tty->pts, NULL, NULL, NULL); -+ tty->master = -EBADF; -+ tty->slave = -EBADF; -+ ret = openpty(&tty->master, &tty->slave, NULL, NULL, NULL); - if (ret < 0) { - ttys->max = i; -+ lxc_delete_tty(ttys); - return log_error_errno(-ENOTTY, ENOTTY, "Failed to create tty %zu", i); - } - -- ret = ttyname_r(tty->pts, tty->name, sizeof(tty->name)); -+ ret = ttyname_r(tty->slave, tty->name, sizeof(tty->name)); - if (ret < 0) { - ttys->max = i; -- return log_error_errno(-ENOTTY, ENOTTY, "Failed to retrieve name of tty %zu pts", i); -+ lxc_delete_tty(ttys); -+ return log_error_errno(-ENOTTY, ENOTTY, "Failed to retrieve name of tty %zu slave", i); - } - -- DEBUG("Created tty \"%s\" with ptmx fd %d and pts fd %d", -- tty->name, tty->ptmx, tty->pts); -+ DEBUG("Created tty \"%s\" with master fd %d and slave fd %d", -+ tty->name, tty->master, tty->slave); - - /* Prevent leaking the file descriptors to the container */ -- ret = fd_cloexec(tty->ptmx, true); -+ ret = fd_cloexec(tty->master, true); - if (ret < 0) -- SYSWARN("Failed to set FD_CLOEXEC flag on ptmx fd %d of tty device \"%s\"", -- tty->ptmx, tty->name); -+ SYSWARN("Failed to set FD_CLOEXEC flag on master fd %d of tty device \"%s\"", -+ tty->master, tty->name); - -- ret = fd_cloexec(tty->pts, true); -+ ret = fd_cloexec(tty->slave, true); - if (ret < 0) -- SYSWARN("Failed to set FD_CLOEXEC flag on pts fd %d of tty device \"%s\"", -- tty->pts, tty->name); -+ SYSWARN("Failed to set FD_CLOEXEC flag on slave fd %d of tty device \"%s\"", -+ tty->slave, tty->name); - - tty->busy = -1; - } - - INFO("Finished creating %zu tty devices", ttys->max); -- move_ptr(ttys); -+ ttys->tty = move_ptr(tty_new); - return 0; - } - -@@ -964,8 +991,8 @@ void lxc_delete_tty(struct lxc_tty_info *ttys) - - for (int i = 0; i < ttys->max; i++) { - struct lxc_terminal_info *tty = &ttys->tty[i]; -- close_prot_errno_disarm(tty->ptmx); -- close_prot_errno_disarm(tty->pts); -+ close_prot_errno_disarm(tty->master); -+ close_prot_errno_disarm(tty->slave); - } - - free_disarm(ttys->tty); -@@ -986,15 +1013,15 @@ static int lxc_send_ttys_to_parent(struct lxc_handler *handler) - int ttyfds[2]; - struct lxc_terminal_info *tty = &ttys->tty[i]; - -- ttyfds[0] = tty->ptmx; -- ttyfds[1] = tty->pts; -+ ttyfds[0] = tty->master; -+ ttyfds[1] = tty->slave; - - ret = lxc_abstract_unix_send_fds(sock, ttyfds, 2, NULL, 0); - if (ret < 0) - break; - -- TRACE("Sent tty \"%s\" with ptmx fd %d and pts fd %d to parent", -- tty->name, tty->ptmx, tty->pts); -+ TRACE("Sent tty \"%s\" with master fd %d and slave fd %d to parent", -+ tty->name, tty->master, tty->slave); - } - - if (ret < 0) -@@ -1047,8 +1074,13 @@ on_error: - /* Just create a path for /dev under $lxcpath/$name and in rootfs If we hit an - * error, log it but don't fail yet. - */ -+#ifdef HAVE_ISULAD -+static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, -+ int autodevtmpfssize, const char *lxcpath, char *systemd) -+#else - static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, - int autodevtmpfssize, const char *lxcpath) -+#endif - { - __do_free char *path = NULL; - int ret; -@@ -1061,6 +1093,7 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, - /* $(rootfs->mount) + "/dev/pts" + '\0' */ - clen = (rootfs->path ? strlen(rootfs->mount) : 0) + 9; - path = must_realloc(NULL, clen); -+ - sprintf(mount_options, "size=%d,mode=755", (autodevtmpfssize != 0) ? autodevtmpfssize : 500000); - DEBUG("Using mount options: %s", mount_options); - -@@ -1076,6 +1109,23 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, - goto reset_umask; - } - -+#ifdef HAVE_ISULAD -+ if (systemd != NULL && !strcmp(systemd, "true")) { -+ ret = mount(path, path, "", MS_BIND, NULL); -+ if (ret < 0) { -+ SYSERROR("Failed to bind mount path \"%s\"", path); -+ goto reset_umask; -+ } -+ } else { -+ ret = safe_mount("none", path, "tmpfs", 0, mount_options, -+ rootfs->path ? rootfs->mount : NULL); -+ if (ret < 0) { -+ SYSERROR("Failed to mount tmpfs on \"%s\"", path); -+ goto reset_umask; -+ } -+ TRACE("Mounted tmpfs on \"%s\"", path); -+ } -+#else - ret = safe_mount("none", path, "tmpfs", 0, mount_options, - rootfs->path ? rootfs->mount : NULL ); - if (ret < 0) { -@@ -1083,6 +1133,7 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, - goto reset_umask; - } - TRACE("Mounted tmpfs on \"%s\"", path); -+#endif - - ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : ""); - if (ret < 0 || (size_t)ret >= clen) { -@@ -1223,16 +1274,118 @@ static int lxc_fill_autodev(const struct lxc_rootfs *rootfs) - return 0; - } - -+static void null_endofword(char *word) -+{ -+ while (*word && *word != ' ' && *word != '\t') -+ word++; -+ *word = '\0'; -+} -+ -+/* skip @nfields spaces in @src */ -+static char *get_field(char *src, int nfields) -+{ -+ int i; -+ char *p = src; -+ -+ for (i = 0; i < nfields; i++) { -+ while (*p && *p != ' ' && *p != '\t') -+ p++; -+ -+ if (!*p) -+ break; -+ -+ p++; -+ } -+ -+ return p; -+} -+ -+#ifdef HAVE_ISULAD -+static int rootfs_parent_mount_private(char *rootfs) -+{ -+ /* walk /proc/self/mountinfo and change parent of rootfs to private */ -+ FILE *f = fopen("/proc/self/mountinfo", "r"); -+ char *line = NULL; -+ char *parent = NULL, *options = NULL; -+ size_t len = 0; -+ int ret = 0; -+ -+ if (!f) { -+ SYSERROR("Failed to open /proc/self/mountinfo to make parent of rootfs to private"); -+ return -1; -+ } -+ -+ while (getline(&line, &len, f) != -1) { -+ char *target = NULL; -+ char *opts = NULL; -+ char *tmptarget = NULL; -+ target = get_field(line, 4); -+ if (!target) -+ continue; -+ tmptarget = safe_strdup(target); -+ null_endofword(tmptarget); -+ if (!strstr(rootfs, tmptarget)) { -+ free(tmptarget); -+ continue; -+ } -+ if (!parent || strlen(tmptarget) > strlen(parent)) { -+ free(parent); -+ parent = tmptarget; -+ } else { -+ free(tmptarget); -+ continue; -+ } -+ opts = get_field(target, 2); -+ if (!opts) -+ continue; -+ null_endofword(opts); -+ free(options); -+ options = safe_strdup(opts); -+ } -+ -+ if (!parent || !options) { -+ ERROR("Could not find parent mount of %s", rootfs); -+ ret = -1; -+ } else { -+ if (strstr(options, "shared")) { -+ if (mount(NULL, parent, NULL, MS_PRIVATE, NULL)) { -+ SYSERROR("Failed to make %s private", parent); -+ ret = -1; -+ } -+ DEBUG("Mounted parent %s of rootfs %s to private", parent, rootfs); -+ } -+ } -+ free(parent); -+ free(options); -+ fclose(f); -+ free(line); -+ return ret; -+} -+#endif -+ - static int lxc_mount_rootfs(struct lxc_conf *conf) - { - int ret; - struct lxc_storage *bdev; -- const struct lxc_rootfs *rootfs = &conf->rootfs; -+ struct lxc_rootfs *rootfs = &conf->rootfs; -+ -+#ifdef HAVE_ISULAD -+ unsigned long flags, mntflags, pflags; -+ char *mntdata = NULL; -+#endif - - if (!rootfs->path) { - ret = mount("", "/", NULL, MS_SLAVE | MS_REC, 0); - if (ret < 0) -- return log_error_errno(-1, errno, "Failed to recursively turn root mount tree into dependent mount"); -+ return log_error_errno(-1, errno, "Failed to remount \"/\" MS_REC | MS_SLAVE"); -+#ifdef HAVE_ISULAD -+ if (!access(rootfs->mount, F_OK)) { -+ rootfs->path = safe_strdup("/"); -+ if (mount("/", rootfs->mount, NULL, MS_BIND, 0)) { -+ return log_error_errno(-1, errno, "Failed to mount \"/\" to %s", rootfs->mount); -+ } -+ } -+#endif - - return 0; - } -@@ -1242,6 +1395,44 @@ static int lxc_mount_rootfs(struct lxc_conf *conf) - return log_error_errno(-1, errno, "Failed to access to \"%s\". Check it is present", - rootfs->mount); - -+#ifdef HAVE_ISULAD -+ // Support mount propagations of rootfs -+ // Get rootfs mnt propagation options, such as slave or shared -+ if (parse_mntopts(conf->rootfs.options, &mntflags, &pflags, &mntdata) < 0) { -+ free(mntdata); -+ return -1; -+ } -+ free(mntdata); -+ -+ flags = MS_SLAVE | MS_REC; -+ if (pflags) -+ flags = pflags; -+ -+ /* Mount propagation inside container can not greater than host. -+ * So we must change propagation of root according to flags, default is rslave. -+ * That means shared propagation inside container is disabled by default. -+ */ -+ ret = mount("", "/", NULL, flags, NULL); -+ if (ret < 0) { -+ return log_error_errno(-1, errno, "Failed to make / to propagation flags %lu.", flags); -+ } -+ -+ /* Make parent mount private to make sure following bind mount does -+ * not propagate in other namespaces. Also it will help with kernel -+ * check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent)) -+ */ -+ ret = rootfs_parent_mount_private(conf->rootfs.mount); -+ if (ret != 0) { -+ return log_error(-1, "Failed to make parent of rootfs %s to private.", conf->rootfs.mount); -+ } -+ -+ ret = mount(conf->rootfs.mount, conf->rootfs.mount, "bind", MS_BIND | MS_REC, NULL); -+ if (ret < 0) { -+ SYSERROR("Failed to mount rootfs %s", conf->rootfs.mount); -+ return -1; -+ } -+#endif -+ - bdev = storage_init(conf); - if (!bdev) - return log_error(-1, "Failed to mount rootfs \"%s\" onto \"%s\" with options \"%s\"", -@@ -1262,6 +1453,158 @@ static int lxc_mount_rootfs(struct lxc_conf *conf) - return 0; - } - -+#ifdef HAVE_ISULAD -+// maskPath masks the top of the specified path inside a container to avoid -+// security issues from processes reading information from non-namespace aware -+// mounts ( proc/kcore ). -+static bool mask_path(const char *path) -+{ -+ int ret; -+ -+ if (!path) -+ return true; -+ -+ ret = mount("/dev/null", path, "", MS_BIND, ""); -+ if (ret < 0 && errno != ENOENT) { -+ if (errno == ENOTDIR) { -+ ret = mount("tmpfs", path, "tmpfs", MS_RDONLY, ""); -+ if (ret < 0) -+ goto error; -+ return true; -+ } -+ goto error; -+ } -+ return true; -+ -+error: -+ SYSERROR("Failed to mask path \"%s\": %s", path, strerror(errno)); -+ return false; -+} -+ -+#ifdef HAVE_ISULAD -+static bool remount_readwrite(const char *path) -+{ -+ int ret, i; -+ -+ if (!path) -+ return true; -+ -+ for (i = 0; i < 5; i++) { -+ ret = mount("", path, "", MS_REMOUNT, ""); -+ if (ret < 0 && errno != ENOENT) { -+ if (errno == EINVAL) { -+ // Probably not a mountpoint, use bind-mount -+ ret = mount(path, path, "", MS_BIND, ""); -+ if (ret < 0) -+ goto on_error; -+ ret = mount(path, path, "", MS_BIND | MS_REMOUNT | MS_REC | \ -+ MS_NOEXEC | MS_NOSUID | MS_NODEV, ""); -+ if (ret < 0) -+ goto on_error; -+ } else if (errno == EBUSY) { -+ DEBUG("Try to mount \"%s\" to readonly after 100ms.", path); -+ usleep(100 * 1000); -+ continue; -+ } else { -+ goto on_error; -+ } -+ } -+ return true; -+ } -+ -+on_error: -+ SYSERROR("Unable to mount \"%s\" to readwrite", path); -+ return false; -+} -+ -+static int remount_proc_sys_mount_entries(struct lxc_list *mount_list, bool lsm_aa_allow_nesting) -+{ -+ char buf[4096]; -+ FILE *file; -+ struct mntent mntent; -+ -+ file = make_anonymous_mount_file(mount_list, lsm_aa_allow_nesting); -+ if (!file) -+ return -1; -+ -+ while (getmntent_r(file, &mntent, buf, sizeof(buf))) { -+ if (strstr(mntent.mnt_dir, "proc/sys") == NULL) { -+ continue; -+ } -+ -+ if (!remount_readwrite((const char*)mntent.mnt_dir)) { -+ fclose(file); -+ return -1; -+ } -+ } -+ -+ fclose(file); -+ return 0; -+} -+#endif -+ -+// remount_readonly will bind over the top of an existing path and ensure that it is read-only. -+static bool remount_readonly(const char *path) -+{ -+ int ret, i; -+ -+ if (!path) -+ return true; -+ -+ for (i = 0; i < 5; i++) { -+ ret = mount("", path, "", MS_REMOUNT | MS_RDONLY, ""); -+ if (ret < 0 && errno != ENOENT) { -+ if (errno == EINVAL) { -+ // Probably not a mountpoint, use bind-mount -+ ret = mount(path, path, "", MS_BIND, ""); -+ if (ret < 0) -+ goto on_error; -+ ret = mount(path, path, "", MS_BIND | MS_REMOUNT | MS_RDONLY | MS_REC | \ -+ MS_NOEXEC | MS_NOSUID | MS_NODEV, ""); -+ if (ret < 0) -+ goto on_error; -+ } else if (errno == EBUSY) { -+ DEBUG("Try to mount \"%s\" to readonly after 100ms.", path); -+ usleep(100 * 1000); -+ continue; -+ } else { -+ goto on_error; -+ } -+ } -+ return true; -+ } -+ -+on_error: -+ SYSERROR("Unable to mount \"%s\" to readonly", path); -+ return false; -+} -+ -+// isulad: setup rootfs masked paths -+static int setup_rootfs_maskedpaths(struct lxc_list *maskedpaths) -+{ -+ struct lxc_list *it; -+ -+ lxc_list_for_each(it, maskedpaths) { -+ if (!mask_path((char *)it->elem)) -+ return -1; -+ } -+ -+ return 0; -+} -+// isulad: setup rootfs ro paths -+static int setup_rootfs_ropaths(struct lxc_list *ropaths) -+{ -+ struct lxc_list *it; -+ -+ lxc_list_for_each(it, ropaths) { -+ if (!remount_readonly((char *)it->elem)) -+ return -1; -+ } -+ -+ return 0; -+} -+#endif -+ - int lxc_chroot(const struct lxc_rootfs *rootfs) - { - __do_free char *nroot = NULL; -@@ -1409,12 +1752,12 @@ static int lxc_pivot_root(const char *rootfs) - if (ret < 0) - return log_error_errno(-1, errno, "Failed to enter old root directory"); - -- /* Make oldroot a depedent mount to make sure our umounts don't propagate to the -+ /* Make oldroot rslave to make sure our umounts don't propagate to the - * host. - */ - ret = mount("", ".", "", MS_SLAVE | MS_REC, NULL); - if (ret < 0) -- return log_error_errno(-1, errno, "Failed to recursively turn old root mount tree into dependent mount"); -+ return log_error_errno(-1, errno, "Failed to make oldroot rslave"); - - ret = umount2(".", MNT_DETACH); - if (ret < 0) -@@ -1575,28 +1918,21 @@ static int setup_personality(int persona) - return 0; - } - --static inline bool wants_console(const struct lxc_terminal *terminal) --{ -- return !terminal->path || strcmp(terminal->path, "none"); --} -- - static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs, -- const struct lxc_terminal *console, -- int pts_mnt_fd) -+ const struct lxc_terminal *console) - { - int ret; - char path[PATH_MAX]; - char *rootfs_path = rootfs->path ? rootfs->mount : ""; - -- if (!wants_console(console)) -+ if (console->path && !strcmp(console->path, "none")) - return 0; - - ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs_path); - if (ret < 0 || (size_t)ret >= sizeof(path)) - return -1; - -- /* -- * When we are asked to setup a console we remove any previous -+ /* When we are asked to setup a console we remove any previous - * /dev/console bind-mounts. - */ - if (file_exists(path)) { -@@ -1607,49 +1943,39 @@ static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs, - DEBUG("Cleared all (%d) mounts from \"%s\"", ret, path); - } - -- /* -- * For unprivileged containers autodev or automounts will already have -+ /* For unprivileged containers autodev or automounts will already have - * taken care of creating /dev/console. - */ - ret = mknod(path, S_IFREG | 0000, 0); - if (ret < 0 && errno != EEXIST) - return log_error_errno(-errno, errno, "Failed to create console"); - -- ret = fchmod(console->pts, S_IXUSR | S_IXGRP); -+#ifdef HAVE_ISULAD -+ if (console->slave > 0) { -+#endif -+ ret = fchmod(console->slave, S_IXUSR | S_IXGRP); - if (ret < 0) - return log_error_errno(-errno, errno, "Failed to set mode \"0%o\" to \"%s\"", S_IXUSR | S_IXGRP, console->name); - -- if (pts_mnt_fd >= 0) { -- ret = move_mount(pts_mnt_fd, "", -EBADF, path, MOVE_MOUNT_F_EMPTY_PATH); -- if (!ret) { -- DEBUG("Moved mount \"%s\" onto \"%s\"", console->name, path); -- goto finish; -- } -- -- if (ret && errno != ENOSYS) -- return log_error_errno(-1, errno, -- "Failed to mount %d(%s) on \"%s\"", -- pts_mnt_fd, console->name, path); -- } -- - ret = safe_mount(console->name, path, "none", MS_BIND, 0, rootfs_path); - if (ret < 0) -- return log_error_errno(-1, errno, "Failed to mount %d(%s) on \"%s\"", pts_mnt_fd, console->name, path); -- --finish: -- DEBUG("Mounted pts device %d(%s) onto \"%s\"", pts_mnt_fd, console->name, path); -+ return log_error_errno(-1, errno, "Failed to mount \"%s\" on \"%s\"", console->name, path); -+#ifdef HAVE_ISULAD -+ } -+#endif -+ DEBUG("Mounted pts device \"%s\" onto \"%s\"", console->name, path); - return 0; - } - - static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs, - const struct lxc_terminal *console, -- char *ttydir, int pts_mnt_fd) -+ char *ttydir) - { - int ret; - char path[PATH_MAX], lxcpath[PATH_MAX]; - char *rootfs_path = rootfs->path ? rootfs->mount : ""; - -- if (!wants_console(console)) -+ if (console->path && !strcmp(console->path, "none")) - return 0; - - /* create rootfs/dev/ directory */ -@@ -1686,30 +2012,22 @@ static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs, - if (ret < 0 && errno != EEXIST) - return log_error_errno(-errno, errno, "Failed to create console"); - -- ret = fchmod(console->pts, S_IXUSR | S_IXGRP); -+#ifdef HAVE_ISULAD -+ if (console->slave > 0) { -+#endif -+ ret = fchmod(console->slave, S_IXUSR | S_IXGRP); - if (ret < 0) - return log_error_errno(-errno, errno, "Failed to set mode \"0%o\" to \"%s\"", S_IXUSR | S_IXGRP, console->name); - - /* bind mount console->name to '/dev//console' */ -- if (pts_mnt_fd >= 0) { -- ret = move_mount(pts_mnt_fd, "", -EBADF, lxcpath, MOVE_MOUNT_F_EMPTY_PATH); -- if (!ret) { -- DEBUG("Moved mount \"%s\" onto \"%s\"", console->name, lxcpath); -- goto finish; -- } -- -- if (ret && errno != ENOSYS) -- return log_error_errno(-1, errno, -- "Failed to mount %d(%s) on \"%s\"", -- pts_mnt_fd, console->name, lxcpath); -- } -- - ret = safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs_path); - if (ret < 0) -- return log_error_errno(-1, errno, "Failed to mount %d(%s) on \"%s\"", pts_mnt_fd, console->name, lxcpath); -+ return log_error_errno(-1, errno, "Failed to mount \"%s\" on \"%s\"", console->name, lxcpath); - DEBUG("Mounted \"%s\" onto \"%s\"", console->name, lxcpath); -+#ifdef HAVE_ISULAD -+ } -+#endif - --finish: - /* bind mount '/dev//console' to '/dev/console' */ - ret = safe_mount(lxcpath, path, "none", MS_BIND, 0, rootfs_path); - if (ret < 0) -@@ -1721,16 +2039,51 @@ finish: - } - - static int lxc_setup_console(const struct lxc_rootfs *rootfs, -- const struct lxc_terminal *console, char *ttydir, -- int pts_mnt_fd) -+ const struct lxc_terminal *console, char *ttydir) - { - - if (!ttydir) -- return lxc_setup_dev_console(rootfs, console, pts_mnt_fd); -+ return lxc_setup_dev_console(rootfs, console); - -- return lxc_setup_ttydir_console(rootfs, console, ttydir, pts_mnt_fd); -+ return lxc_setup_ttydir_console(rootfs, console, ttydir); - } -+#ifdef HAVE_ISULAD -+static void parse_mntopt(char *opt, unsigned long *mflags, unsigned long *pflags, char **data, size_t size) -+{ -+ struct mount_opt *mo; -+ -+ /* If opt is found in mount_opt, set or clear flags. -+ * Otherwise append it to data. */ -+ -+ for (mo = &mount_opt[0]; mo->name != NULL; mo++) { -+ if (strncmp(opt, mo->name, strlen(mo->name)) == 0) { -+ if (mo->clear) -+ *mflags &= ~mo->flag; -+ else -+ *mflags |= mo->flag; -+ return; -+ } -+ } -+ -+ /* If opt is found in propagation_opt, set or clear flags. */ -+ for (mo = &propagation_opt[0]; mo->name != NULL; mo++) { -+ if (strncmp(opt, mo->name, strlen(mo->name)) != 0) -+ continue; -+ -+ if (mo->clear) -+ *pflags &= ~mo->flag; -+ else -+ *pflags |= mo->flag; -+ -+ return; -+ } -+ -+ if (strlen(*data)) -+ (void)strlcat(*data, ",", size); - -+ (void)strlcat(*data, opt, size); -+} -+#else - static int parse_mntopt(char *opt, unsigned long *flags, char **data, size_t size) - { - ssize_t ret; -@@ -1767,7 +2120,43 @@ static int parse_mntopt(char *opt, unsigned long *flags, char **data, size_t siz - - return 0; - } -+#endif -+ -+#ifdef HAVE_ISULAD -+int parse_mntopts(const char *mntopts, unsigned long *mntflags, unsigned long *pflags, char **mntdata) -+{ -+ char *data, *p, *s; -+ size_t size; -+ -+ *mntdata = NULL; -+ *mntflags = 0L; -+ *pflags = 0L; - -+ if (!mntopts) -+ return 0; -+ -+ s = safe_strdup(mntopts); -+ -+ size = strlen(s) + 1; -+ data = malloc(size); -+ if (!data) { -+ free(s); -+ return -1; -+ } -+ *data = 0; -+ -+ lxc_iterate_parts(p, s, ",") -+ parse_mntopt(p, mntflags, pflags, &data, size); -+ -+ if (*data) -+ *mntdata = data; -+ else -+ free(data); -+ free(s); -+ -+ return 0; -+} -+#else - int parse_mntopts(const char *mntopts, unsigned long *mntflags, char **mntdata) - { - __do_free char *mntopts_new = NULL, *mntopts_dup = NULL; -@@ -1798,6 +2187,7 @@ int parse_mntopts(const char *mntopts, unsigned long *mntflags, char **mntdata) - - return 0; - } -+#endif - - static void parse_propagationopt(char *opt, unsigned long *flags) - { -@@ -1836,43 +2226,17 @@ int parse_propagationopts(const char *mntopts, unsigned long *pflags) - return 0; - } - --static void null_endofword(char *word) -+static int mount_entry(const char *fsname, const char *target, -+ const char *fstype, unsigned long mountflags, -+ unsigned long pflags, const char *data, bool optional, -+ bool dev, bool relative, const char *rootfs) - { -- while (*word && *word != ' ' && *word != '\t') -- word++; -- *word = '\0'; --} -- --/* skip @nfields spaces in @src */ --static char *get_field(char *src, int nfields) --{ -- int i; -- char *p = src; -- -- for (i = 0; i < nfields; i++) { -- while (*p && *p != ' ' && *p != '\t') -- p++; -- -- if (!*p) -- break; -- -- p++; -- } -- -- return p; --} -- --static int mount_entry(const char *fsname, const char *target, -- const char *fstype, unsigned long mountflags, -- unsigned long pflags, const char *data, bool optional, -- bool dev, bool relative, const char *rootfs) --{ -- int ret; -- char srcbuf[PATH_MAX]; -- const char *srcpath = fsname; --#ifdef HAVE_STATVFS -- struct statvfs sb; --#endif -+ int ret; -+ char srcbuf[PATH_MAX]; -+ const char *srcpath = fsname; -+#ifdef HAVE_STATVFS -+ struct statvfs sb; -+#endif - - if (relative) { - ret = snprintf(srcbuf, sizeof(srcbuf), "%s/%s", rootfs ? rootfs : "/", fsname ? fsname : ""); -@@ -2010,8 +2374,15 @@ static int mount_entry_create_dir_file(const struct mntent *mntent, - - if (hasmntopt(mntent, "create=dir")) { - ret = mkdir_p(path, 0755); -+#ifdef HAVE_ISULAD -+ if (ret < 0 && errno != EEXIST) { -+ lxc_write_error_message(rootfs->errfd, "%s:%d: mkdir %s: %s.", __FILE__, __LINE__, path, strerror(errno)); -+ return log_error_errno(-1, errno, "Failed to create directory \"%s\"", path); -+ } -+#else - if (ret < 0 && errno != EEXIST) - return log_error_errno(-1, errno, "Failed to create directory \"%s\"", path); -+#endif - } - - if (!hasmntopt(mntent, "create=file")) -@@ -2028,16 +2399,184 @@ static int mount_entry_create_dir_file(const struct mntent *mntent, - p2 = dirname(p1); - - ret = mkdir_p(p2, 0755); -+#ifdef HAVE_ISULAD -+ if (ret < 0 && errno != EEXIST) { -+ lxc_write_error_message(rootfs->errfd, "%s:%d: mkdir %s: %s.", __FILE__, __LINE__, path, strerror(errno)); -+ return log_error_errno(-1, errno, "Failed to create directory \"%s\"", path); -+ } -+#else - if (ret < 0 && errno != EEXIST) - return log_error_errno(-1, errno, "Failed to create directory \"%s\"", path); -+#endif - - ret = mknod(path, S_IFREG | 0000, 0); -+#ifdef HAVE_ISULAD -+ if (ret < 0 && errno != EEXIST) { -+ lxc_write_error_message(rootfs->errfd, "%s:%d: open %s: %s.", __FILE__, __LINE__, path, strerror(errno)); -+ return -errno; -+ } -+#else - if (ret < 0 && errno != EEXIST) - return -errno; -+#endif -+ -+ return 0; -+} -+ -+#ifdef HAVE_ISULAD -+static int mount_entry_with_loop_dev(const char *src, const char *dest, const char *fstype, -+ char *mnt_opts, const char *rootfs) -+{ -+ int srcfd = -1, destfd, ret, saved_errno; -+ char srcbuf[50], destbuf[50]; // only needs enough for /proc/self/fd/ -+ const char *mntsrc = src; -+ int max_retry = 5; -+ struct lxc_storage loop; -+ -+ if (!rootfs) -+ rootfs = ""; -+ -+ /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */ -+ if (src && src[0] != '/') { -+ INFO("this is a relative mount"); -+ srcfd = open_without_symlink(src, NULL); -+ if (srcfd < 0) -+ return srcfd; -+ ret = snprintf(srcbuf, sizeof(srcbuf), "/proc/self/fd/%d", srcfd); -+ if (ret < 0 || ret > sizeof(srcbuf)) { -+ close(srcfd); -+ ERROR("Failed to print string"); -+ return -EINVAL; -+ } -+ mntsrc = srcbuf; -+ } -+ -+ destfd = open_without_symlink(dest, rootfs); -+ if (destfd < 0) { -+ if (srcfd != -1) { -+ saved_errno = errno; -+ close(srcfd); -+ errno = saved_errno; -+ } -+ return destfd; -+ } -+ -+ ret = snprintf(destbuf, sizeof(destbuf), "/proc/self/fd/%d", destfd); -+ if (ret < 0 || ret > sizeof(destbuf)) { -+ if (srcfd != -1) -+ close(srcfd); -+ close(destfd); -+ ERROR("Out of memory"); -+ return -EINVAL; -+ } -+ -+retry: -+ loop.src = (char *)mntsrc; -+ loop.dest = destbuf; -+ loop.mntopts = mnt_opts; -+ loop.type = "loop"; -+ loop.lofd = -1; -+ ret = loop_mount(&loop); -+ if (ret < 0) { -+ /* If loop is used by other program, mount may fail. So -+ * we do retry to ensure mount ok */ -+ if (max_retry > 0) { -+ max_retry--; -+ DEBUG("mount entry with loop dev failed, retry mount." -+ "retry count left %d", max_retry); -+ goto retry; -+ } -+ } -+ if (loop.lofd != -1) -+ close(loop.lofd); -+ if (srcfd != -1) -+ close(srcfd); -+ close(destfd); -+ if (ret < 0) { -+ SYSERROR("Failed to mount %s onto %s", src, dest); -+ return ret; -+ } - - return 0; - } - -+/* isulad: checkMountDestination checks to ensure that the mount destination is not over the top of /proc. -+ * dest is required to be an abs path and have any symlinks resolved before calling this function. */ -+static int check_mount_destination(const char *rootfs, const char *dest) -+{ -+ const char *invalid_destinations[] = { -+ "/proc", -+ NULL -+ }; -+ // White list, it should be sub directories of invalid destinations -+ const char *valid_destinations[] = { -+ // These entries can be bind mounted by files emulated by fuse, -+ // so commands like top, free displays stats in container. -+ "/proc/cpuinfo", -+ "/proc/diskstats", -+ "/proc/meminfo", -+ "/proc/stat", -+ "/proc/swaps", -+ "/proc/uptime", -+ "/proc/net/dev", -+ NULL -+ }; -+ const char **valid = NULL; -+ const char **invalid = NULL; -+ -+ for(valid = valid_destinations; *valid != NULL; valid++) { -+ char *fullpath = NULL; -+ char *relpath = NULL; -+ const char *parts[3] = { -+ rootfs, -+ *valid, -+ NULL -+ }; -+ fullpath = lxc_string_join("/", parts, false); -+ if (!fullpath) { -+ ERROR("Out of memory"); -+ return -1; -+ } -+ relpath = path_relative(fullpath, dest); -+ free(fullpath); -+ if (!relpath) -+ return -1; -+ if (!strcmp(relpath, ".")) { -+ free(relpath); -+ return 0; -+ } -+ free(relpath); -+ } -+ -+ for(invalid = invalid_destinations; *invalid != NULL; invalid++) { -+ char *fullpath = NULL; -+ char *relpath = NULL; -+ const char *parts[3] = { -+ rootfs, -+ *invalid, -+ NULL -+ }; -+ fullpath = lxc_string_join("/", parts, false); -+ if (!fullpath) { -+ ERROR("Out of memory"); -+ return -1; -+ } -+ relpath = path_relative(fullpath, dest); -+ free(fullpath); -+ if (!relpath) -+ return -1; -+ if (!strcmp(relpath, ".") || strncmp(relpath, "..", 2)) { -+ ERROR("%s cannot be mounted because it is located inside %s", dest, *invalid); -+ free(relpath); -+ return -1; -+ } -+ free(relpath); -+ } -+ -+ return 0; -+} -+#endif -+ - /* rootfs, lxc_name, and lxc_path can be NULL when the container is created - * without a rootfs. */ - static inline int mount_entry_on_generic(struct mntent *mntent, -@@ -2051,6 +2590,11 @@ static inline int mount_entry_on_generic(struct mntent *mntent, - char *rootfs_path = NULL; - int ret; - bool dev, optional, relative; -+ const char *dest = path; -+ -+#ifdef HAVE_ISULAD -+ char *rpath = NULL; -+#endif - - optional = hasmntopt(mntent, "optional") != NULL; - dev = hasmntopt(mntent, "dev") != NULL; -@@ -2059,9 +2603,38 @@ static inline int mount_entry_on_generic(struct mntent *mntent, - if (rootfs && rootfs->path) - rootfs_path = rootfs->mount; - -- ret = mount_entry_create_dir_file(mntent, path, rootfs, lxc_name, -+#ifdef HAVE_ISULAD -+ // isulad: ensure that the destination of the bind mount is resolved of symlinks at mount time because -+ // any previous mounts can invalidate the next mount's destination. -+ // this can happen when a user specifies mounts within other mounts to cause breakouts or other -+ // evil stuff to try to escape the container's rootfs. -+ if (rootfs_path) { -+ rpath = follow_symlink_in_scope(path, rootfs_path); -+ if (!rpath) { -+ ERROR("Failed to get real path of '%s' in scope '%s'.", path, rootfs_path); -+ lxc_write_error_message(rootfs->errfd, "%s:%d: failed to get real path of '%s' in scope '%s'.", -+ __FILE__, __LINE__, path, rootfs_path); -+ return -1; -+ } -+ dest = rpath; -+ -+ ret = check_mount_destination(rootfs_path, dest); -+ if (ret) { -+ ERROR("Mount destination is invalid: '%s'", dest); -+ lxc_write_error_message(rootfs->errfd, "%s:%d: mount destination is invalid: '%s'.", -+ __FILE__, __LINE__, dest); -+ free(rpath); -+ return -1; -+ } -+ } -+#endif -+ -+ ret = mount_entry_create_dir_file(mntent, dest, rootfs, lxc_name, - lxc_path); - if (ret < 0) { -+#ifdef HAVE_ISULAD -+ free(rpath); -+#endif - if (optional) - return 0; - -@@ -2069,6 +2642,29 @@ static inline int mount_entry_on_generic(struct mntent *mntent, - } - cull_mntent_opt(mntent); - -+#ifdef HAVE_ISULAD -+ ret = parse_mntopts(mntent->mnt_opts, &mntflags, &pflags, &mntdata); -+ if (ret < 0) { -+ free(rpath); -+ return -1; -+ } -+ -+ // support squashfs -+ if (strcmp(mntent->mnt_type, "squashfs") == 0) { -+ ret = mount_entry_with_loop_dev(mntent->mnt_fsname, dest, mntent->mnt_type, -+ mntent->mnt_opts, rootfs_path); -+ } else { -+ ret = mount_entry(mntent->mnt_fsname, dest, mntent->mnt_type, mntflags, -+ pflags, mntdata, optional, dev, relative, rootfs_path); -+ } -+ -+ if (ret < 0) { -+ lxc_write_error_message(rootfs->errfd, "%s:%d: failed to mount %s as type %s.", -+ __FILE__, __LINE__, mntent->mnt_fsname, mntent->mnt_type); -+ } -+ -+ free(rpath); -+#else - ret = parse_propagationopts(mntent->mnt_opts, &pflags); - if (ret < 0) - return -1; -@@ -2077,8 +2673,9 @@ static inline int mount_entry_on_generic(struct mntent *mntent, - if (ret < 0) - return ret; - -- ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type, mntflags, -- pflags, mntdata, optional, dev, relative, rootfs_path); -+ ret = mount_entry(mntent->mnt_fsname, dest, mntent->mnt_type, mntflags, -+ pflags, mntdata, optional, dev, relative, rootfs_path); -+#endif - - return ret; - } -@@ -2169,6 +2766,28 @@ static int mount_file_entries(const struct lxc_conf *conf, - while (getmntent_r(file, &mntent, buf, sizeof(buf))) { - int ret; - -+#ifdef HAVE_ISULAD -+ //isulad, system contaienr, skip "proc/sys/xxx" path -+ if (conf->systemd != NULL && strcmp(conf->systemd, "true") == 0) { -+ if (strstr(mntent.mnt_dir, "proc/sys") != NULL) { -+ continue; -+ } -+ } -+ -+ /* Note: Workaround for volume file path with space*/ -+ mntent.mnt_fsname = lxc_string_replace(SPACE_MAGIC_STR, " ", mntent.mnt_fsname); -+ if(!mntent.mnt_fsname) { -+ SYSERROR("memory allocation error"); -+ return -1; -+ } -+ mntent.mnt_dir = lxc_string_replace(SPACE_MAGIC_STR, " ", mntent.mnt_dir); -+ if(!mntent.mnt_dir) { -+ SYSERROR("memory allocation error"); -+ free(mntent.mnt_fsname); -+ return -1; -+ } -+#endif -+ - if (!rootfs->path) - ret = mount_entry_on_systemfs(&mntent); - else if (mntent.mnt_dir[0] != '/') -@@ -2177,6 +2796,14 @@ static int mount_file_entries(const struct lxc_conf *conf, - else - ret = mount_entry_on_absolute_rootfs(&mntent, rootfs, - lxc_name, lxc_path); -+ -+#ifdef HAVE_ISULAD -+ free(mntent.mnt_fsname); -+ mntent.mnt_fsname = NULL; -+ free(mntent.mnt_dir); -+ mntent.mnt_dir = NULL; -+#endif -+ - if (ret < 0) - return -1; - } -@@ -2299,6 +2926,51 @@ static int setup_mount_entries(const struct lxc_conf *conf, - return mount_file_entries(conf, rootfs, f, lxc_name, lxc_path); - } - -+#ifdef HAVE_ISULAD -+static bool have_dev_bind_mount_entry(FILE *file) -+{ -+ bool have_bind_dev = false; -+ char buf[PATH_MAX]; -+ struct mntent mntent; -+ -+ while (getmntent_r(file, &mntent, buf, sizeof(buf))) { -+ mntent.mnt_dir = lxc_string_replace(SPACE_MAGIC_STR, " ", mntent.mnt_dir); -+ if(!mntent.mnt_dir) { -+ SYSERROR("memory allocation error"); -+ continue; -+ } -+ -+ if (strcmp(mntent.mnt_dir, "dev") == 0 && strcmp(mntent.mnt_type, "bind") == 0) { -+ have_bind_dev = true; -+ } -+ -+ free(mntent.mnt_dir); -+ mntent.mnt_dir = NULL; -+ -+ if (have_bind_dev) -+ return true; -+ } -+ -+ return false; -+} -+ -+// returns true if /dev needs to be set up. -+static bool need_setup_dev(const struct lxc_conf *conf, struct lxc_list *mount) -+{ -+ __do_fclose FILE *f = NULL; -+ -+ f = make_anonymous_mount_file(mount, conf->lsm_aa_allow_nesting); -+ if (!f) -+ return true; -+ -+ if (have_dev_bind_mount_entry(f)) { -+ return false; -+ } else { -+ return true; -+ } -+} -+#endif -+ - static int parse_cap(const char *cap) - { - size_t i; -@@ -2395,6 +3067,16 @@ static int dropcaps_except(struct lxc_list *caps) - lxc_list_for_each (iterator, caps) { - keep_entry = iterator->elem; - -+#ifdef HAVE_ISULAD -+ /* Do not keep any cap*/ -+ if (strcmp(keep_entry, "ISULAD_KEEP_NONE") == 0) { -+ DEBUG("Do not keep any capability"); -+ for(i = 0; i < numcaps; i++) { -+ caplist[i] = 0; -+ } -+ break; -+ } -+#endif - capid = parse_cap(keep_entry); - if (capid == -2) - continue; -@@ -2443,7 +3125,11 @@ static int parse_resource(const char *res) - return resid; - } - -+#ifdef HAVE_ISULAD -+int setup_resource_limits(struct lxc_list *limits, pid_t pid, int errfd) -+#else - int setup_resource_limits(struct lxc_list *limits, pid_t pid) -+#endif - { - int resid; - struct lxc_list *it; -@@ -2457,8 +3143,17 @@ int setup_resource_limits(struct lxc_list *limits, pid_t pid) - return log_error(-1, "Unknown resource %s", lim->resource); - - #if HAVE_PRLIMIT || HAVE_PRLIMIT64 -+#if HAVE_ISULAD -+ if (prlimit(pid, resid, &lim->limit, NULL) != 0) { -+ lxc_write_error_message(errfd, "%s:%d: Failed to set limit %s %lu %lu: %s.", -+ __FILE__, __LINE__, lim->resource, -+ lim->limit.rlim_cur, lim->limit.rlim_max, strerror(errno)); -+ return log_error_errno(-1, errno, "Failed to set limit %s", lim->resource); -+ } -+#else - if (prlimit(pid, resid, &lim->limit, NULL) != 0) - return log_error_errno(-1, errno, "Failed to set limit %s", lim->resource); -+#endif - - TRACE("Setup \"%s\" limit", lim->resource); - #else -@@ -2546,10 +3241,10 @@ struct lxc_conf *lxc_conf_init(void) - new->console.path = NULL; - new->console.peer = -1; - new->console.proxy.busy = -1; -- new->console.proxy.ptmx = -1; -- new->console.proxy.pts = -1; -- new->console.ptmx = -1; -- new->console.pts = -1; -+ new->console.proxy.master = -1; -+ new->console.proxy.slave = -1; -+ new->console.master = -1; -+ new->console.slave = -1; - new->console.name[0] = '\0'; - memset(&new->console.ringbuf, 0, sizeof(struct lxc_ringbuf)); - new->maincmd_fd = -1; -@@ -2601,6 +3296,27 @@ struct lxc_conf *lxc_conf_init(void) - memset(&new->ns_share, 0, sizeof(char *) * LXC_NS_MAX); - seccomp_conf_init(new); - -+#ifdef HAVE_ISULAD -+ lxc_list_init(&new->populate_devs); -+ lxc_list_init(&new->rootfs.maskedpaths); -+ lxc_list_init(&new->rootfs.ropaths); -+ new->exit_fd = -1; -+ new->umask = 0027; /*default umask 0027*/ -+ new->console.init_fifo[0] = NULL; -+ new->console.init_fifo[1] = NULL; -+ new->console.init_fifo[2] = NULL; -+ new->console.pipes[0][0] = -1; -+ new->console.pipes[0][1] = -1; -+ new->console.pipes[1][0] = -1; -+ new->console.pipes[1][1] = -1; -+ new->console.pipes[2][0] = -1; -+ new->console.pipes[2][1] = -1; -+ lxc_list_init(&new->console.fifos); -+ new->errmsg = NULL; -+ new->errpipe[0] = -1; -+ new->errpipe[1] = -1; -+#endif -+ - return new; - } - -@@ -2716,10 +3432,19 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid) - struct id_map *map; - struct lxc_list *iterator; - enum idtype type; -+ /* strlen("new@idmap") = 9 -+ * + -+ * strlen(" ") = 1 -+ * + -+ * INTTYPE_TO_STRLEN(uint32_t) -+ * + -+ * strlen(" ") = 1 -+ * -+ * We add some additional space to make sure that we really have -+ * LXC_IDMAPLEN bytes available for our the {g,u]id mapping. -+ */ - int ret = 0, gidmap = 0, uidmap = 0; -- char mapbuf[STRLITERALLEN("new@idmap") + STRLITERALLEN(" ") + -- INTTYPE_TO_STRLEN(pid_t) + STRLITERALLEN(" ") + -- LXC_IDMAPLEN] = {0}; -+ char mapbuf[9 + 1 + INTTYPE_TO_STRLEN(uint32_t) + 1 + LXC_IDMAPLEN] = {0}; - bool had_entry = false, use_shadow = false; - int hostuid, hostgid; - -@@ -2828,11 +3553,11 @@ int lxc_map_ids(struct lxc_list *idmap, pid_t pid) - return 0; - } - --/* -- * Return the host uid/gid to which the container root is mapped in val. -+/* Return the host uid/gid to which the container root is mapped in val. - * Return true if id was found, false otherwise. - */ --static id_t get_mapped_rootid(const struct lxc_conf *conf, enum idtype idtype) -+static bool get_mapped_rootid(const struct lxc_conf *conf, enum idtype idtype, -+ unsigned long *val) - { - unsigned nsid; - struct id_map *map; -@@ -2849,13 +3574,11 @@ static id_t get_mapped_rootid(const struct lxc_conf *conf, enum idtype idtype) - continue; - if (map->nsid != nsid) - continue; -- return map->hostid; -+ *val = map->hostid; -+ return true; - } - -- if (idtype == ID_TYPE_UID) -- return LXC_INVALID_UID; -- -- return LXC_INVALID_GID; -+ return false; - } - - int mapped_hostid(unsigned id, const struct lxc_conf *conf, enum idtype idtype) -@@ -2896,6 +3619,129 @@ again: - return freeid; - } - -+int chown_mapped_root_exec_wrapper(void *args) -+{ -+ execvp("lxc-usernsexec", args); -+ return -1; -+} -+ -+/* chown_mapped_root: for an unprivileged user with uid/gid X to -+ * chown a dir to subuid/subgid Y, he needs to run chown as root -+ * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and -+ * nsid Y is mapped to hostuid/hostgid X. That way, the container -+ * root is privileged with respect to hostuid/hostgid X, allowing -+ * him to do the chown. -+ */ -+int chown_mapped_root(const char *path, const struct lxc_conf *conf) -+{ -+ uid_t rootuid, rootgid; -+ unsigned long val; -+ int hostuid, hostgid, ret; -+ struct stat sb; -+ char map1[100], map2[100], map3[100], map4[100], map5[100]; -+ char ugid[100]; -+ const char *args1[] = {"lxc-usernsexec", -+ "-m", map1, -+ "-m", map2, -+ "-m", map3, -+ "-m", map5, -+ "--", "chown", ugid, path, -+ NULL}; -+ const char *args2[] = {"lxc-usernsexec", -+ "-m", map1, -+ "-m", map2, -+ "-m", map3, -+ "-m", map4, -+ "-m", map5, -+ "--", "chown", ugid, path, -+ NULL}; -+ char cmd_output[PATH_MAX]; -+ -+ hostuid = geteuid(); -+ hostgid = getegid(); -+ -+ if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) -+ return log_error(-1, "No uid mapping for container root"); -+ rootuid = (uid_t)val; -+ -+ if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) -+ return log_error(-1, "No gid mapping for container root"); -+ rootgid = (gid_t)val; -+ -+ if (hostuid == 0) { -+ if (chown(path, rootuid, rootgid) < 0) -+ return log_error(-1, "Error chowning %s", path); -+ -+ return 0; -+ } -+ -+ /* nothing to do */ -+ if (rootuid == hostuid) -+ return log_info(0, "Container root is our uid; no need to chown"); -+ -+ /* save the current gid of "path" */ -+ if (stat(path, &sb) < 0) -+ return log_error(-1, "Error stat %s", path); -+ -+ /* Update the path argument in case this was overlayfs. */ -+ args1[sizeof(args1) / sizeof(args1[0]) - 2] = path; -+ args2[sizeof(args2) / sizeof(args2[0]) - 2] = path; -+ -+ /* -+ * A file has to be group-owned by a gid mapped into the -+ * container, or the container won't be privileged over it. -+ */ -+ DEBUG("trying to chown \"%s\" to %d", path, hostgid); -+ if (sb.st_uid == hostuid && -+ mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 && -+ chown(path, -1, hostgid) < 0) -+ return log_error(-1, "Failed chgrping %s", path); -+ -+ /* "u:0:rootuid:1" */ -+ ret = snprintf(map1, 100, "u:0:%d:1", rootuid); -+ if (ret < 0 || ret >= 100) -+ return log_error(-1, "Error uid printing map string"); -+ -+ /* "u:hostuid:hostuid:1" */ -+ ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid); -+ if (ret < 0 || ret >= 100) -+ return log_error(-1, "Error uid printing map string"); -+ -+ /* "g:0:rootgid:1" */ -+ ret = snprintf(map3, 100, "g:0:%d:1", rootgid); -+ if (ret < 0 || ret >= 100) -+ return log_error(-1, "Error gid printing map string"); -+ -+ /* "g:pathgid:rootgid+pathgid:1" */ -+ ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid, -+ rootgid + (gid_t)sb.st_gid); -+ if (ret < 0 || ret >= 100) -+ return log_error(-1, "Error gid printing map string"); -+ -+ /* "g:hostgid:hostgid:1" */ -+ ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid); -+ if (ret < 0 || ret >= 100) -+ return log_error(-1, "Error gid printing map string"); -+ -+ /* "0:pathgid" (chown) */ -+ ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid); -+ if (ret < 0 || ret >= 100) -+ return log_error(-1, "Error owner printing format string for chown"); -+ -+ if (hostgid == sb.st_gid) -+ ret = run_command(cmd_output, sizeof(cmd_output), -+ chown_mapped_root_exec_wrapper, -+ (void *)args1); -+ else -+ ret = run_command(cmd_output, sizeof(cmd_output), -+ chown_mapped_root_exec_wrapper, -+ (void *)args2); -+ if (ret < 0) -+ ERROR("lxc-usernsexec failed: %s", cmd_output); -+ -+ return ret; -+} -+ - /* NOTE: Must not be called from inside the container namespace! */ - int lxc_create_tmp_proc_mount(struct lxc_conf *conf) - { -@@ -2923,8 +3769,8 @@ void tmp_proc_unmount(struct lxc_conf *lxc_conf) - lxc_conf->tmp_umount_proc = false; - } - --/* Walk /proc/mounts and change any shared entries to dependent mounts. */ --void turn_into_dependent_mounts(void) -+/* Walk /proc/mounts and change any shared entries to slave. */ -+void remount_all_slave(void) - { - __do_free char *line = NULL; - __do_fclose FILE *f = NULL; -@@ -3001,12 +3847,13 @@ again: - null_endofword(target); - ret = mount(NULL, target, NULL, MS_SLAVE, NULL); - if (ret < 0) { -- SYSERROR("Failed to recursively turn old root mount tree into dependent mount. Continuing..."); -+ SYSERROR("Failed to make \"%s\" MS_SLAVE", target); -+ ERROR("Continuing..."); - continue; - } -- TRACE("Recursively turned old root mount tree into dependent mount"); -+ TRACE("Remounted \"%s\" as MS_SLAVE", target); - } -- TRACE("Turned all mount table entries into dependent mount"); -+ TRACE("Remounted all mount table entries as MS_SLAVE"); - } - - static int lxc_execute_bind_init(struct lxc_handler *handler) -@@ -3082,7 +3929,13 @@ int lxc_setup_rootfs_prepare_root(struct lxc_conf *conf, const char *name, - return log_trace(0, "Bind mounted container / onto itself"); - } - -- turn_into_dependent_mounts(); -+#ifdef HAVE_ISULAD -+ if (!conf->rootfs.options) { -+ remount_all_slave(); -+ } -+#else -+ remount_all_slave(); -+#endif - - ret = run_lxc_hooks(name, "pre-mount", conf, NULL); - if (ret < 0) -@@ -3123,7 +3976,7 @@ static bool verify_start_hooks(struct lxc_conf *conf) - - static bool execveat_supported(void) - { -- execveat(-1, "", NULL, NULL, AT_EMPTY_PATH); -+ lxc_raw_execveat(-1, "", NULL, NULL, AT_EMPTY_PATH); - if (errno == ENOSYS) - return false; - -@@ -3180,60 +4033,738 @@ static int lxc_setup_boot_id(void) - return 0; - } - --int lxc_setup(struct lxc_handler *handler) -+#ifdef HAVE_ISULAD -+/* isulad: setup devices which will be populated in the container.*/ -+static int setup_populate_devs(const struct lxc_rootfs *rootfs, struct lxc_list *devs) - { -- __do_close int pts_mnt_fd = -EBADF; -- int ret; -- const char *lxcpath = handler->lxcpath, *name = handler->name; -- struct lxc_conf *lxc_conf = handler->conf; -- char *keyring_context = NULL; -+ int ret = 0; -+ char *pathdirname = NULL; -+ char path[MAXPATHLEN]; -+ mode_t file_mode = 0; -+ struct lxc_populate_devs *dev_elem = NULL; -+ struct lxc_list *it = NULL; -+ mode_t cur_mask; - -- ret = lxc_setup_rootfs_prepare_root(lxc_conf, name, lxcpath); -- if (ret < 0) -- return log_error(-1, "Failed to setup rootfs"); -+ INFO("Populating devices into container"); -+ cur_mask = umask(0000); -+ lxc_list_for_each(it, devs) { -+ ret = 0; -+ dev_elem = it->elem; - -- if (handler->nsfd[LXC_NS_UTS] == -EBADF) { -- ret = setup_utsname(lxc_conf->utsname); -- if (ret < 0) -- return log_error(-1, "Failed to setup the utsname %s", name); -- } -+ ret = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->path ? rootfs->mount : "", dev_elem->name); -+ if (ret < 0 || ret >= MAXPATHLEN) { -+ ret = -1; -+ goto reset_umask; -+ } - -- if (!lxc_conf->keyring_disable_session) { -- if (lxc_conf->lsm_se_keyring_context) { -- keyring_context = lxc_conf->lsm_se_keyring_context; -- } else if (lxc_conf->lsm_se_context) { -- keyring_context = lxc_conf->lsm_se_context; -+ /* create any missing directories */ -+ pathdirname = safe_strdup(path); -+ pathdirname = dirname(pathdirname); -+ ret = mkdir_p(pathdirname, 0755); -+ free(pathdirname); -+ if (ret < 0) { -+ WARN("Failed to create target directory"); -+ ret = -1; -+ goto reset_umask; - } - -- ret = lxc_setup_keyring(keyring_context); -- if (ret < 0) -- return -1; -- } -+ if (!strcmp(dev_elem->type, "c")) { -+ file_mode = dev_elem->file_mode | S_IFCHR; -+ } else if (!strcmp(dev_elem->type, "b")) { -+ file_mode = dev_elem->file_mode | S_IFBLK; -+ } else { -+ ERROR("Failed to parse devices type '%s'", dev_elem->type); -+ ret = -1; -+ goto reset_umask; -+ } - -- if (handler->ns_clone_flags & CLONE_NEWNET) { -- ret = lxc_setup_network_in_child_namespaces(lxc_conf, -- &lxc_conf->network); -- if (ret < 0) -- return log_error(-1, "Failed to setup network"); -+ DEBUG("Try to mknod '%s':'%d':'%d':'%d'\n", path, -+ file_mode, dev_elem->maj, dev_elem->min); - -- ret = lxc_network_send_name_and_ifindex_to_parent(handler); -- if (ret < 0) -- return log_error(-1, "Failed to send network device names and ifindices to parent"); -+ ret = mknod(path, file_mode, makedev(dev_elem->maj, dev_elem->min)); -+ if (ret && errno != EEXIST) { -+ SYSERROR("Failed to mknod '%s':'%d':'%d':'%d'", dev_elem->name, -+ file_mode, dev_elem->maj, dev_elem->min); -+ -+ char hostpath[MAXPATHLEN]; -+ FILE *pathfile = NULL; -+ -+ // Unprivileged containers cannot create devices, so -+ // try to bind mount the device from the host -+ ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", dev_elem->name); -+ if (ret < 0 || ret >= MAXPATHLEN) { -+ ret = -1; -+ goto reset_umask; -+ } -+ pathfile = lxc_fopen(path, "wb"); -+ if (!pathfile) { -+ SYSERROR("Failed to create device mount target '%s'", path); -+ ret = -1; -+ goto reset_umask; -+ } -+ fclose(pathfile); -+ if (safe_mount(hostpath, path, 0, MS_BIND, NULL, -+ rootfs->path ? rootfs->mount : NULL) != 0) { -+ SYSERROR("Failed bind mounting device %s from host into container", -+ dev_elem->name); -+ ret = -1; -+ goto reset_umask; -+ } -+ } -+ if (chown(path, dev_elem->uid, dev_elem->gid) < 0) { -+ ERROR("Error chowning %s", path); -+ ret = -1; -+ goto reset_umask; -+ } -+ ret = 0; - } - -- if (wants_console(&lxc_conf->console)) { -- pts_mnt_fd = open_tree(-EBADF, lxc_conf->console.name, -- OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH); -- if (pts_mnt_fd < 0) -- SYSTRACE("Failed to create detached mount for container's console \"%s\"", -- lxc_conf->console.name); -- else -- TRACE("Created detached mount for container's console \"%s\"", -- lxc_conf->console.name); -- } -+reset_umask: -+ (void)umask(cur_mask); -+ -+ INFO("Populated devices into container /dev"); -+ return ret; -+} -+ -+// isulad: setup rootfs mountopts -+static int setup_rootfs_mountopts(const struct lxc_rootfs *rootfs) -+{ -+ unsigned long mflags, mntflags, pflags; -+ char *mntdata = NULL; -+ -+ if(!rootfs || !rootfs->options) -+ return 0; -+ -+ if (parse_mntopts(rootfs->options, &mntflags, &pflags, &mntdata) < 0) { -+ free(mntdata); -+ return -1; -+ } -+ free(mntdata); -+ -+ if (mntflags & MS_RDONLY) { -+ mflags = add_required_remount_flags("/", NULL, MS_BIND | MS_REC | mntflags | pflags | MS_REMOUNT); -+ DEBUG("remounting / as readonly"); -+ if (mount("/", "/", NULL, mflags, 0) < 0) { -+ SYSERROR("Failed to make / readonly."); -+ return -1; -+ } -+ } -+ return 0; -+} -+ -+struct oci_hook_conf { -+ defs_hook *ocihook; -+ -+ int errfd; -+ int which; -+}; -+ -+static int create_mtab_link() -+{ -+ ssize_t ret; -+ int mret; -+ struct stat sbuf; -+ const char *pathname = "/proc/mounts"; -+ const char *slink = "/etc/mtab"; -+ -+ if (file_exists(slink)) { -+ return 0; -+ } -+ -+ ret = stat(pathname, &sbuf); -+ if (ret < 0) { -+ SYSERROR("Failed to stat %s: %s", pathname, strerror(errno)); -+ return -1; -+ } -+ -+ mret = symlink(pathname, slink); -+ if (mret < 0 && errno != EEXIST) { -+ if (errno == EROFS) { -+ WARN("Failed to create link %s for target %s. Read-only filesystem", slink, pathname); -+ } else { -+ SYSERROR("Failed to create \"%s\"", slink); -+ return -1; -+ } -+ } -+ -+ return 0; -+} -+ -+struct wait_conf { -+ pid_t pid; -+ unsigned long long startat; -+ int timeout; -+ int errfd; -+ int which; -+}; -+ -+static char* generate_json_str(const char *name, const char *lxcpath, const char *rootfs) -+{ -+ char *cpid = NULL; -+ char *inmsg = NULL; -+ int rc = 0, ret = 0; -+ size_t size; -+ -+ if (!name || !lxcpath || !rootfs) { -+ ERROR("Invalid arguments"); -+ return NULL; -+ } -+ cpid = getenv("LXC_PID"); -+ if (!cpid) { -+ ERROR("Get container %s pid failed: %s", name, strerror(errno)); -+ cpid = "-1"; -+ } -+ -+ if ((strlen(name) + strlen(cpid) + strlen(rootfs) + strlen(lxcpath) + strlen(name)) > -+ SIZE_MAX - (strlen("{\"ociVersion\":\"\",\"id\":\"\",\"pid\":,\"root\":\"\",\"bundle\":\"\"}") - 1 - 1)) { -+ ERROR("Out of memory"); -+ ret = -1; -+ goto out_free; -+ } -+ -+ // {"ociVersion":"","id":"xxx","pid":777,"root":"xxx","bundle":"xxx"} -+ size = strlen("{\"ociVersion\":\"\",\"id\":\"\",\"pid\":,\"root\":\"\",\"bundle\":\"\"}") + -+ strlen(name) + strlen(cpid) + strlen(rootfs) + strlen(lxcpath) + 1 + strlen(name) + 1; -+ inmsg = malloc(size); -+ if (inmsg == NULL) { -+ ERROR("Out of memory"); -+ ret = -1; -+ goto out_free; -+ } -+ rc = snprintf(inmsg, size, -+ "{\"ociVersion\":\"\",\"id\":\"%s\",\"pid\":%s,\"root\":\"%s\",\"bundle\":\"%s/%s\"}", -+ name, cpid, rootfs, lxcpath, name); -+ if (rc < 0 || rc >= size) { -+ ERROR("Create json string failed"); -+ ret = -1; -+ } -+ -+out_free: -+ if (ret) { -+ free(inmsg); -+ inmsg = NULL; -+ } -+ return inmsg; -+} -+ -+static char **merge_ocihook_env(char **oldenvs, size_t env_len, size_t *merge_env_len) -+{ -+ char **result = NULL; -+ size_t result_len = env_len; -+ size_t i, j; -+ char *tmpenv = NULL; -+ char *lxc_envs[] = {"LD_LIBRARY_PATH", "PATH", "LXC_CGNS_AWARE", "LXC_PID", "LXC_ROOTFS_MOUNT", -+ "LXC_CONFIG_FILE", "LXC_CGROUP_PATH", "LXC_ROOTFS_PATH", "LXC_NAME" -+ }; -+ char *lxcenv_buf = NULL; -+ -+ if (result_len > SIZE_MAX - (sizeof(lxc_envs) / sizeof(char *)) - 1) -+ return NULL; -+ result_len += (sizeof(lxc_envs) / sizeof(char *)) + 1; -+ result = malloc(sizeof(char *) * result_len); -+ if (result == NULL) -+ return NULL; -+ memset(result, 0, sizeof(char *) * result_len); -+ -+ for(i = 0; i < env_len; i++) { -+ if (oldenvs[i]) -+ result[i] = safe_strdup(oldenvs[i]); -+ } -+ -+ for(j = 0; j < (sizeof(lxc_envs) / sizeof(char *)); j++) { -+ size_t env_buf_len = 0; -+ tmpenv = getenv(lxc_envs[j]); -+ if (tmpenv && i < (result_len - 1)) { -+ if (strlen(tmpenv) > (SIZE_MAX - 1 - 1 - strlen(lxc_envs[j]))) { -+ lxc_free_array((void **)result, free); -+ return NULL; -+ } -+ env_buf_len = ((strlen(tmpenv) + 1) + strlen(lxc_envs[j])) + 1; -+ lxcenv_buf = malloc(env_buf_len); -+ if (lxcenv_buf == NULL) { -+ lxc_free_array((void **)result, free); -+ return NULL; -+ } -+ if (snprintf(lxcenv_buf, env_buf_len, "%s=%s", lxc_envs[j], tmpenv) < 0) { -+ free(lxcenv_buf); -+ continue; -+ } -+ result[i++] = lxcenv_buf; -+ lxcenv_buf = NULL; -+ } -+ } -+ -+ *merge_env_len = i; -+ return result; -+} -+ -+static struct lxc_popen_FILE *lxc_popen_ocihook(const char *commandpath, char **args, int args_len, -+ char **envs, int env_len, const char *instr) -+{ -+ int ret; -+ struct lxc_popen_FILE *fp = NULL; -+ int pipe_fds[2] = {-1, -1}; -+ int pipe_msg[2] = {-1, -1}; -+ pid_t child_pid; -+ -+ ret = pipe2(pipe_fds, O_CLOEXEC | O_NONBLOCK); -+ if (ret < 0) -+ return NULL; -+ -+ ret = pipe2(pipe_msg, O_CLOEXEC | O_NONBLOCK); -+ if (ret < 0) { -+ ERROR("Pipe msg failure"); -+ close(pipe_fds[0]); -+ close(pipe_fds[1]); -+ return NULL; -+ } -+ -+ child_pid = fork(); -+ if (child_pid < 0) -+ goto on_error; -+ -+ if (child_pid == 0) { -+ close(pipe_msg[1]); -+ if (pipe_msg[0] != STDIN_FILENO) -+ dup2(pipe_msg[0], STDIN_FILENO); -+ else { -+ if (fcntl(pipe_msg[0], F_SETFD, 0) != 0) { -+ fprintf(stderr, "Failed to remove FD_CLOEXEC from fd."); -+ exit(127); -+ } -+ } -+ close(pipe_msg[0]); -+ -+ close(pipe_fds[0]); -+ -+ /* duplicate stdout */ -+ if (pipe_fds[1] != STDOUT_FILENO) -+ ret = dup2(pipe_fds[1], STDOUT_FILENO); -+ else -+ ret = fcntl(pipe_fds[1], F_SETFD, 0); -+ if (ret < 0) { -+ close(pipe_fds[1]); -+ _exit(EXIT_FAILURE); -+ } -+ -+ /* duplicate stderr */ -+ if (pipe_fds[1] != STDERR_FILENO) -+ ret = dup2(pipe_fds[1], STDERR_FILENO); -+ else -+ ret = fcntl(pipe_fds[1], F_SETFD, 0); -+ close(pipe_fds[1]); -+ if (ret < 0) -+ _exit(EXIT_FAILURE); -+ -+ if (lxc_check_inherited(NULL, true, NULL, 0) != 0) { -+ fprintf(stderr, "check inherited fd failed"); -+ exit(127); -+ } -+ -+ /* -+ * Unblock signals. -+ * This is the main/only reason -+ * why we do our lousy popen() emulation. -+ */ -+ { -+ sigset_t mask; -+ sigfillset(&mask); -+ sigprocmask(SIG_UNBLOCK, &mask, NULL); -+ } -+ -+ if (env_len > 0) -+ execvpe(commandpath, args, envs); -+ else -+ execvp(commandpath, args); -+ fprintf(stderr, "fork/exec %s: %s", commandpath, strerror(errno)); -+ exit(127); -+ } -+ -+ /* parent */ -+ -+ close(pipe_fds[1]); -+ pipe_fds[1] = -1; -+ -+ close(pipe_msg[0]); -+ pipe_msg[0]= -1; -+ if (instr) { -+ size_t len = strlen(instr); -+ if (lxc_write_nointr(pipe_msg[1], instr, len) != len) { -+ WARN("Write instr: %s failed", instr); -+ } -+ } -+ close(pipe_msg[1]); -+ pipe_msg[1]= -1; -+ -+ fp = calloc(1, sizeof(*fp)); -+ if (!fp) { -+ ERROR("Failed to allocate memory"); -+ goto on_error; -+ } -+ -+ fp->child_pid = child_pid; -+ fp->pipe = pipe_fds[0]; -+ -+ return fp; -+ -+on_error: -+ -+ if (pipe_fds[0] >= 0) -+ close(pipe_fds[0]); -+ -+ if (pipe_fds[1] >= 0) -+ close(pipe_fds[1]); -+ -+ if (pipe_msg[0] >= 0) -+ close(pipe_msg[0]); -+ -+ if (pipe_msg[1] >= 0) -+ close(pipe_msg[1]); -+ -+ if (fp) -+ free(fp); -+ -+ return NULL; -+} -+ -+void* wait_ocihook_timeout(void *arg) -+{ -+ bool alive = false; -+ struct wait_conf *conf = (struct wait_conf *)arg; -+ -+ if (!conf || conf->timeout < 1) -+ goto out; -+ -+ sleep(conf->timeout); -+ -+ alive = lxc_process_alive(conf->pid, conf->startat); -+ -+ if (alive) { -+ ERROR("%s:%d: running %s hook caused \"hook ran past specified timeout of %.1fs\"", -+ __FILE__, __LINE__, lxchook_names[conf->which], -+ (double)conf->timeout); -+ -+ lxc_write_error_message(conf->errfd, "%s:%d: running %s hook caused \"hook ran past specified timeout of %.1fs\".", -+ __FILE__, __LINE__, lxchook_names[conf->which], -+ (double)conf->timeout); -+ -+ if (kill(conf->pid, SIGKILL) && errno != ESRCH) { -+ ERROR("Send kill signal failed"); -+ goto out; -+ } -+ } -+ -+out: -+ free(conf); -+ return ((void *)0); -+} -+ -+static int run_ocihook_buffer(struct oci_hook_conf *oconf, const char *inmsg) -+{ -+ struct lxc_popen_FILE *f; -+ char output[LXC_LOG_BUFFER_SIZE] = {0}; -+ int ret; -+ pthread_t ptid; -+ int err; -+ struct wait_conf *conf = NULL; -+ pthread_attr_t attr; -+ char *buffer = oconf->ocihook->path; -+ char *err_args_msg = NULL; -+ char *err_envs_msg = NULL; -+ char **hookenvs = NULL; -+ size_t hookenvs_len = 0; -+ -+ hookenvs = merge_ocihook_env(oconf->ocihook->env, oconf->ocihook->env_len, &hookenvs_len); -+ if (!hookenvs) { -+ ERROR("Out of memory."); -+ return -1; -+ } -+ -+ f = lxc_popen_ocihook(buffer, oconf->ocihook->args, oconf->ocihook->args_len, hookenvs, hookenvs_len, inmsg); -+ lxc_free_array((void **)hookenvs, free); -+ if (!f) { -+ SYSERROR("Failed to popen() %s.", buffer); -+ return -1; -+ } -+ -+ conf = malloc(sizeof(struct wait_conf)); -+ if (conf == NULL) { -+ SYSERROR("Failed to malloc."); -+ goto on_error; -+ } -+ -+ memset(conf, 0x00, sizeof(struct wait_conf)); -+ -+ conf->pid = f->child_pid; -+ conf->startat = lxc_get_process_startat(conf->pid); -+ -+ INFO("hook_conf timeout %d", oconf->ocihook->timeout); -+ if(oconf->ocihook->timeout > 0) -+ conf->timeout = oconf->ocihook->timeout; -+ else { -+ conf->timeout = 30; -+ INFO("Set hook timeout 30s"); -+ } -+ conf->errfd = oconf->errfd; -+ conf->which = oconf->which; -+ -+ pthread_attr_init(&attr); -+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); -+ err = pthread_create(&ptid, &attr, wait_ocihook_timeout, conf); -+ if (err != 0) { -+ ERROR("Create wait timeout thread failed"); -+ free(conf); -+ goto on_error; -+ } -+ -+ ret = lxc_wait_for_pid_status(f->child_pid); -+ -+ lxc_read_nointr(f->pipe, output, sizeof(output) - 1); -+ close(f->pipe); -+ free(f); -+ -+ if (ret == -1) { -+ SYSERROR("Script exited with error."); -+ goto print_hook; -+ } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) { -+ ERROR("Script exited with status %d. output: %s", WEXITSTATUS(ret), output); -+ lxc_write_error_message(oconf->errfd, "%s:%d: running %s hook caused \"error running hook: exit status %d, output: %s\".", -+ __FILE__, __LINE__, -+ (oconf->which >= NUM_LXC_HOOKS) ? "invalid type" : lxchook_names[oconf->which], -+ WEXITSTATUS(ret), output); -+ -+ goto print_hook; -+ } else if (WIFSIGNALED(ret)) { -+ ERROR("Script terminated by signal %d.", WTERMSIG(ret)); -+ lxc_write_error_message(oconf->errfd, "%s:%d: running %s hook caused \"error running hook: Script terminated by signal %d\".", -+ __FILE__, __LINE__, -+ (oconf->which >= NUM_LXC_HOOKS) ? "invalid type" : lxchook_names[oconf->which], -+ WTERMSIG(ret)); -+ -+ goto print_hook; -+ } -+ -+ return 0; -+ -+on_error: -+ if (f) { -+ if (f->pipe >= 0) -+ close(f->pipe); -+ free(f); -+ } -+ -+print_hook: -+ if (oconf->ocihook->args) -+ err_args_msg = lxc_string_join(" ", (const char **)oconf->ocihook->args, false); -+ if (oconf->ocihook->env) -+ err_envs_msg = lxc_string_join(" ", (const char **)oconf->ocihook->env, false); -+ ERROR("Hook script command: \"%s\", args: \"%s\", envs: \"%s\", timeout: %d.", -+ buffer, err_args_msg ? err_args_msg : "", -+ err_envs_msg ? err_envs_msg : "", oconf->ocihook->timeout); -+ -+ free(err_args_msg); -+ free(err_envs_msg); -+ return -1; -+} -+ -+static int run_ocihook_script_argv(const char *name, const char *section, -+ struct oci_hook_conf *oconf, -+ const char *lxcpath, const char *rootfs) -+{ -+ int ret; -+ const char *script = oconf->ocihook->path; -+ char *inmsg = NULL; -+ -+ INFO("Executing script \"%s\" for container \"%s\", config section \"%s\".", -+ script, name, section); -+ -+ inmsg = generate_json_str(name, lxcpath, rootfs); -+ if (!inmsg) { -+ return -1; -+ } -+ -+ ret = run_ocihook_buffer(oconf, inmsg); -+ free(inmsg); -+ inmsg = NULL; -+ return ret; -+} -+ -+static char *get_root_path(const char *path, const char *backend) -+{ -+ char *ret = NULL; -+ char *tmp = NULL; -+ -+ if (!path) { -+ ret = safe_strdup("/"); -+ return ret; -+ } -+ if (!backend) { -+ goto default_out; -+ } -+ -+ if (strcmp(backend, "aufs") == 0 || -+ strcmp(backend, "overlayfs") == 0 || -+ strcmp(backend, "loop") == 0) { -+ tmp = strrchr(path, ':'); -+ if (tmp == NULL) { -+ ERROR("Invalid root path format"); -+ return NULL; -+ } -+ tmp++; -+ ret = safe_strdup(tmp); -+ return ret; -+ } -+ -+default_out: -+ ret = safe_strdup(path); -+ return ret; -+} -+ -+static int do_run_oci_hooks(const char *name, const char *lxcpath, struct lxc_conf *lc, int which, int errfd) -+{ -+ struct oci_hook_conf work_conf = {0}; -+ size_t i; -+ int ret = 0; -+ int nret = 0; -+ char *rootpath = NULL; -+ -+ if (!lc) { -+ return -1; -+ } -+ if (!lc->ocihooks) { -+ return 0; -+ } -+ -+ rootpath = get_root_path(lc->rootfs.path, lc->rootfs.bdev_type); -+ if (!rootpath) { -+ ERROR("Get container %s rootpath failed.", name); -+ return -1; -+ } -+ -+ work_conf.errfd = errfd; -+ work_conf.which = which; -+ switch (which) { -+ case OCI_HOOK_PRESTART: -+ for (i = 0; i < lc->ocihooks->prestart_len; i++) { -+ work_conf.ocihook = lc->ocihooks->prestart[i]; -+ ret = run_ocihook_script_argv(name, "lxc", &work_conf, lxcpath, rootpath); -+ if (ret != 0) -+ break; -+ } -+ break; -+ case OCI_HOOK_POSTSTART: -+ for (i = 0; i < lc->ocihooks->poststart_len; i++) { -+ work_conf.ocihook = lc->ocihooks->poststart[i]; -+ nret = run_ocihook_script_argv(name, "lxc", &work_conf, lxcpath, rootpath); -+ if (nret != 0) -+ WARN("running poststart hook %zu failed, ContainerId: %s", i, name); -+ } -+ break; -+ case OCI_HOOK_POSTSTOP: -+ for (i = 0; i < lc->ocihooks->poststop_len; i++) { -+ work_conf.ocihook = lc->ocihooks->poststop[i]; -+ nret = run_ocihook_script_argv(name, "lxc", &work_conf, lxcpath, rootpath); -+ if (nret != 0) -+ WARN("running poststart hook %zu failed, ContainerId: %s", i, name); -+ } -+ break; -+ default: -+ ret = -1; -+ } -+ if (rootpath) -+ free(rootpath); -+ return ret; -+} -+ -+int run_oci_hooks(const char *name, const char *hookname, struct lxc_conf *conf, const char *lxcpath) -+{ -+ int which = -1; -+ -+ if (strcmp(hookname, "oci-prestart") == 0) { -+ which = OCI_HOOK_PRESTART; -+ if (!lxcpath) { -+ ERROR("oci hook require lxcpath"); -+ return -1; -+ } -+ return do_run_oci_hooks(name, lxcpath, conf, which, conf->errpipe[1]); -+ } else if (strcmp(hookname, "oci-poststart") == 0) { -+ which = OCI_HOOK_POSTSTART; -+ if (!lxcpath) { -+ ERROR("oci hook require lxcpath"); -+ return -1; -+ } -+ return do_run_oci_hooks(name, lxcpath, conf, which, conf->errpipe[1]); -+ } else if (strcmp(hookname, "oci-poststop") == 0) { -+ which = OCI_HOOK_POSTSTOP; -+ if (!lxcpath) { -+ ERROR("oci hook require lxcpath"); -+ return -1; -+ } -+ return do_run_oci_hooks(name, lxcpath, conf, which, conf->errpipe[1]); -+ } else -+ return -1; -+ -+ return 0; -+} -+#endif -+ -+int lxc_setup(struct lxc_handler *handler) -+{ -+ int ret; -+ const char *lxcpath = handler->lxcpath, *name = handler->name; -+ struct lxc_conf *lxc_conf = handler->conf; -+ char *keyring_context = NULL; -+#ifdef HAVE_ISULAD -+ bool setup_dev = true; -+#endif -+ -+ ret = lxc_setup_rootfs_prepare_root(lxc_conf, name, lxcpath); -+#ifdef HAVE_ISULAD -+ if (ret < 0) { -+ lxc_write_error_message(lxc_conf->errpipe[1], "%s:%d: failed to setup rootfs %s.", -+ __FILE__, __LINE__, lxc_conf->rootfs.path); -+ return log_error(-1, "Failed to setup rootfs"); -+ } -+#else -+ if (ret < 0) -+ return log_error(-1, "Failed to setup rootfs"); -+#endif -+ -+ if (handler->nsfd[LXC_NS_UTS] == -EBADF) { -+ ret = setup_utsname(lxc_conf->utsname); -+ if (ret < 0) -+ return log_error(-1, "Failed to setup the utsname %s", name); -+ } -+ -+ if (!lxc_conf->keyring_disable_session) { -+ if (lxc_conf->lsm_se_keyring_context) { -+ keyring_context = lxc_conf->lsm_se_keyring_context; -+ } else if (lxc_conf->lsm_se_context) { -+ keyring_context = lxc_conf->lsm_se_context; -+ } -+ -+ ret = lxc_setup_keyring(keyring_context); -+ if (ret < 0) -+ return -1; -+ } -+ -+ if (handler->ns_clone_flags & CLONE_NEWNET) { -+ ret = lxc_setup_network_in_child_namespaces(lxc_conf, -+ &lxc_conf->network); -+ if (ret < 0) -+ return log_error(-1, "Failed to setup network"); -+ -+ ret = lxc_network_send_name_and_ifindex_to_parent(handler); -+ if (ret < 0) -+ return log_error(-1, "Failed to send network device names and ifindices to parent"); -+ } - - if (lxc_conf->autodev > 0) { -+#ifdef HAVE_ISULAD -+ ret = mount_autodev(name, &lxc_conf->rootfs, lxc_conf->autodevtmpfssize, lxcpath, lxc_conf->systemd); -+#else - ret = mount_autodev(name, &lxc_conf->rootfs, lxc_conf->autodevtmpfssize, lxcpath); -+#endif - if (ret < 0) - return log_error(-1, "Failed to mount \"/dev\""); - } -@@ -3254,6 +4785,9 @@ int lxc_setup(struct lxc_handler *handler) - &lxc_conf->mount_list, name, lxcpath); - if (ret < 0) - return log_error(-1, "Failed to setup mount entries"); -+#ifdef HAVE_ISULAD -+ setup_dev = need_setup_dev(lxc_conf, &lxc_conf->mount_list); -+#endif - } - - if (lxc_conf->is_execute) { -@@ -3300,16 +4834,21 @@ int lxc_setup(struct lxc_handler *handler) - return log_error(-1, "Failed to populate \"/dev\""); - } - -+#ifdef HAVE_ISULAD -+ /* isulad: setup devices which will be populated in the container. */ -+ if (!lxc_list_empty(&lxc_conf->populate_devs) && setup_dev) { -+ if (setup_populate_devs(&lxc_conf->rootfs, &lxc_conf->populate_devs) != 0) { -+ return log_error(-1, "Failed to setup devices in the container"); -+ } -+ } -+#endif -+ - /* Make sure any start hooks are in the container */ - if (!verify_start_hooks(lxc_conf)) - return log_error(-1, "Failed to verify start hooks"); - -- ret = lxc_create_tmp_proc_mount(lxc_conf); -- if (ret < 0) -- return log_error(-1, "Failed to \"/proc\" LSMs"); -- - ret = lxc_setup_console(&lxc_conf->rootfs, &lxc_conf->console, -- lxc_conf->ttys.dir, pts_mnt_fd); -+ lxc_conf->ttys.dir); - if (ret < 0) - return log_error(-1, "Failed to setup console"); - -@@ -3317,50 +4856,201 @@ int lxc_setup(struct lxc_handler *handler) - if (ret < 0) - return log_error(-1, "Failed to setup \"/dev\" symlinks"); - -+ ret = lxc_create_tmp_proc_mount(lxc_conf); -+ if (ret < 0) -+ return log_error(-1, "Failed to \"/proc\" LSMs"); -+ -+#ifdef HAVE_ISULAD -+ /* Ask father to run oci prestart hooks and wait for him to finish. */ -+ if (lxc_sync_barrier_parent(handler, LXC_SYNC_OCI_PRESTART_HOOK)) { -+ return log_error(-1, "Failed to sync parent to start host hook"); -+ } -+#endif - ret = lxc_setup_rootfs_switch_root(&lxc_conf->rootfs); - if (ret < 0) - return log_error(-1, "Failed to pivot root into rootfs"); - -- /* Setting the boot-id is best-effort for now. */ -- if (lxc_conf->autodev > 0) -- (void)lxc_setup_boot_id(); -+ /* Setting the boot-id is best-effort for now. */ -+ if (lxc_conf->autodev > 0) -+ (void)lxc_setup_boot_id(); -+ -+#ifdef HAVE_ISULAD -+ if (setup_rootfs_mountopts(&lxc_conf->rootfs)) { -+ return log_error(-1, "failed to set rootfs for '%s'", name); -+ } -+ if (lxc_conf->rootfs.path != NULL && setup_dev) { -+ ret = lxc_setup_devpts(lxc_conf); -+ if (ret < 0) { -+ return log_error(-1, "Failed to setup new devpts instance for '%s'", name); -+ } -+ } -+#else -+ ret = lxc_setup_devpts(lxc_conf); -+ if (ret < 0) -+ return log_error(-1, "Failed to setup new devpts instance"); -+#endif -+ -+ ret = lxc_create_ttys(handler); -+ if (ret < 0) -+ return -1; -+ -+#ifdef HAVE_ISULAD -+ /*isulad: set system umask */ -+ umask(lxc_conf->umask); -+#endif -+ -+ ret = setup_personality(lxc_conf->personality); -+ if (ret < 0) -+ return log_error(-1, "Failed to set personality"); -+ -+ /* Set sysctl value to a path under /proc/sys as determined from the -+ * key. For e.g. net.ipv4.ip_forward translated to -+ * /proc/sys/net/ipv4/ip_forward. -+ */ -+ if (!lxc_list_empty(&lxc_conf->sysctls)) { -+ ret = setup_sysctl_parameters(&lxc_conf->sysctls); -+ if (ret < 0) -+ return log_error(-1, "Failed to setup sysctl parameters"); -+ } -+ -+#ifdef HAVE_ISULAD -+ // isulad: setup rootfs masked paths -+ if (!lxc_list_empty(&lxc_conf->rootfs.maskedpaths)) { -+ if (setup_rootfs_maskedpaths(&lxc_conf->rootfs.maskedpaths)) { -+ return log_error(-1, "failed to setup maskedpaths"); -+ } -+ } -+ -+ // isulad: setup rootfs ro paths -+ if (!lxc_list_empty(&lxc_conf->rootfs.ropaths)) { -+ if (setup_rootfs_ropaths(&lxc_conf->rootfs.ropaths)) { -+ return log_error(-1, "failed to setup readonlypaths"); -+ } -+ } -+ -+ //isulad: system container, remount /proc/sys/xxx by mount_list -+ if (lxc_conf->systemd != NULL && strcmp(lxc_conf->systemd, "true") == 0) { -+ if (!lxc_list_empty(&lxc_conf->mount_list)) { -+ if (remount_proc_sys_mount_entries(&lxc_conf->mount_list, -+ lxc_conf->lsm_aa_allow_nesting)) { -+ return log_error(-1, "failed to remount /proc/sys"); -+ } -+ } -+ } -+ -+ // isulad: create link /etc/mtab for /proc/mounts -+ if (create_mtab_link() != 0) { -+ return log_error(-1, "failed to create link /etc/mtab for target /proc/mounts"); -+ } -+#endif -+ -+ if (!lxc_list_empty(&lxc_conf->keepcaps)) { -+ if (!lxc_list_empty(&lxc_conf->caps)) -+ return log_error(-1, "Container requests lxc.cap.drop and lxc.cap.keep: either use lxc.cap.drop or lxc.cap.keep, not both"); -+ -+ if (dropcaps_except(&lxc_conf->keepcaps)) -+ return log_error(-1, "Failed to keep capabilities"); -+ } else if (setup_caps(&lxc_conf->caps)) { -+ return log_error(-1, "Failed to drop capabilities"); -+ } -+ -+ NOTICE("The container \"%s\" is set up", name); -+ -+ return 0; -+} -+ -+#ifdef HAVE_ISULAD -+/* isulad drop caps for container*/ -+int lxc_drop_caps(struct lxc_conf *conf) -+{ -+#define __DEF_CAP_TO_MASK(x) (1U << ((x) & 31)) -+#if HAVE_LIBCAP -+ int ret = 0; -+ struct lxc_list *iterator = NULL; -+ char *keep_entry = NULL; -+ size_t i = 0; -+ int capid; -+ size_t numcaps = (size_t)lxc_caps_last_cap() + 1; -+ struct lxc_list *caps = NULL; -+ int *caplist = NULL; -+ -+ if (lxc_list_empty(&conf->keepcaps)) -+ return 0; -+ -+ caps = &conf->keepcaps; -+ -+ if (numcaps <= 0 || numcaps > 200) -+ return -1; -+ -+ // caplist[i] is 1 if we keep capability i -+ caplist = malloc(numcaps * sizeof(int)); -+ if (caplist == NULL) { -+ ERROR("Out of memory"); -+ return -1; -+ } -+ (void)memset(caplist, 0, numcaps * sizeof(int)); -+ -+ lxc_list_for_each(iterator, caps) { -+ -+ keep_entry = iterator->elem; -+ /* isulad: Do not keep any cap*/ -+ if (strcmp(keep_entry, "ISULAD_KEEP_NONE") == 0) { -+ DEBUG("Do not keep any capability"); -+ for(i = 0; i < numcaps; i++) { -+ caplist[i] = 0; -+ } -+ break; -+ } -+ -+ capid = parse_cap(keep_entry); -+ -+ if (capid == -2) -+ continue; -+ -+ if (capid < 0) { -+ ERROR("unknown capability %s", keep_entry); -+ ret = -1; -+ goto out; -+ } -+ -+ DEBUG("keep capability '%s' (%d)", keep_entry, capid); -+ -+ caplist[capid] = 1; -+ } -+ -+ struct __user_cap_header_struct cap_header_data; -+ struct __user_cap_data_struct cap_data_data[2]; - -- ret = lxc_setup_devpts(lxc_conf); -- if (ret < 0) -- return log_error(-1, "Failed to setup new devpts instance"); -+ cap_user_header_t cap_header = &cap_header_data; -+ cap_user_data_t cap_data = &cap_data_data[0]; - -- ret = lxc_create_ttys(handler); -- if (ret < 0) -- return -1; -+ memset(cap_header, 0,sizeof(struct __user_cap_header_struct)); -+ memset(cap_data, 0, sizeof(struct __user_cap_data_struct) * 2); - -- ret = setup_personality(lxc_conf->personality); -- if (ret < 0) -- return log_error(-1, "Failed to set personality"); -+ cap_header->pid = 0; -+ cap_header->version = _LINUX_CAPABILITY_VERSION_3; - -- /* Set sysctl value to a path under /proc/sys as determined from the -- * key. For e.g. net.ipv4.ip_forward translated to -- * /proc/sys/net/ipv4/ip_forward. -- */ -- if (!lxc_list_empty(&lxc_conf->sysctls)) { -- ret = setup_sysctl_parameters(&lxc_conf->sysctls); -- if (ret < 0) -- return log_error(-1, "Failed to setup sysctl parameters"); -+ for (i = 0; i < numcaps; i++) { -+ if (caplist[i]) { -+ cap_data[CAP_TO_INDEX(i)].effective = cap_data[CAP_TO_INDEX(i)].effective | (i > 31 ? __DEF_CAP_TO_MASK(i % 32) : __DEF_CAP_TO_MASK(i)); -+ cap_data[CAP_TO_INDEX(i)].permitted = cap_data[CAP_TO_INDEX(i)].permitted | (i > 31 ? __DEF_CAP_TO_MASK(i % 32) : __DEF_CAP_TO_MASK(i)); -+ cap_data[CAP_TO_INDEX(i)].inheritable = cap_data[CAP_TO_INDEX(i)].inheritable | (i > 31 ? __DEF_CAP_TO_MASK(i % 32) : __DEF_CAP_TO_MASK(i)); -+ } - } - -- if (!lxc_list_empty(&lxc_conf->keepcaps)) { -- if (!lxc_list_empty(&lxc_conf->caps)) -- return log_error(-1, "Container requests lxc.cap.drop and lxc.cap.keep: either use lxc.cap.drop or lxc.cap.keep, not both"); -- -- if (dropcaps_except(&lxc_conf->keepcaps)) -- return log_error(-1, "Failed to keep capabilities"); -- } else if (setup_caps(&lxc_conf->caps)) { -- return log_error(-1, "Failed to drop capabilities"); -+ if (capset(cap_header, cap_data)) { -+ SYSERROR("Failed to set capabilitys"); -+ ret = -1; -+ goto out; - } - -- NOTICE("The container \"%s\" is set up", name); -+#endif - -- return 0; -+out: -+ free(caplist); -+ return ret; - } -+#endif - - int run_lxc_hooks(const char *name, char *hookname, struct lxc_conf *conf, - char *argv[]) -@@ -3406,7 +5096,7 @@ static int lxc_free_idmap(struct lxc_list *id_map) - { - struct lxc_list *it, *next; - -- lxc_list_for_each_safe(it, id_map, next) { -+ lxc_list_for_each_safe (it, id_map, next) { - lxc_list_del(it); - free(it->elem); - free(it); -@@ -3753,6 +5443,23 @@ void lxc_conf_free(struct lxc_conf *conf) - free(conf->cgroup_meta.controllers); - free(conf->shmount.path_host); - free(conf->shmount.path_cont); -+#ifdef HAVE_ISULAD -+ free(conf->container_info_file); -+ if (conf->exit_fd != -1) { -+ close(conf->exit_fd); -+ } -+ free(conf->systemd); -+ lxc_clear_init_args(conf); -+ lxc_clear_init_groups(conf); -+ lxc_clear_populate_devices(conf); -+ lxc_clear_rootfs_masked_paths(conf); -+ lxc_clear_rootfs_ro_paths(conf); -+ free(conf->errmsg); -+ lxc_close_error_pipe(conf->errpipe); -+ if (conf->ocihooks) { -+ free_oci_runtime_spec_hooks(conf->ocihooks); -+ } -+#endif - free(conf); - } - -@@ -3842,19 +5549,18 @@ static struct id_map *mapped_hostid_add(const struct lxc_conf *conf, uid_t id, - - /* Reuse existing mapping. */ - tmp = find_mapped_hostid_entry(conf, id, type); -- if (tmp) { -- memcpy(entry, tmp, sizeof(*entry)); -- } else { -- /* Find new mapping. */ -- hostid_mapped = find_unmapped_nsid(conf, type); -- if (hostid_mapped < 0) -- return log_debug(NULL, "Failed to find free mapping for id %d", id); -+ if (tmp) -+ return memcpy(entry, tmp, sizeof(*entry)); - -- entry->idtype = type; -- entry->nsid = hostid_mapped; -- entry->hostid = (unsigned long)id; -- entry->range = 1; -- } -+ /* Find new mapping. */ -+ hostid_mapped = find_unmapped_nsid(conf, type); -+ if (hostid_mapped < 0) -+ return log_debug(NULL, "Failed to find free mapping for id %d", id); -+ -+ entry->idtype = type; -+ entry->nsid = hostid_mapped; -+ entry->hostid = (unsigned long)id; -+ entry->range = 1; - - return move_ptr(entry); - } -@@ -3878,7 +5584,7 @@ static struct lxc_list *get_minimal_idmap(const struct lxc_conf *conf, - euid = geteuid(); - if (euid >= container_root_uid->hostid && - euid < (container_root_uid->hostid + container_root_uid->range)) -- host_uid_map = move_ptr(container_root_uid); -+ host_uid_map = container_root_uid; - - container_root_gid = mapped_nsid_add(conf, nsgid, ID_TYPE_GID); - if (!container_root_gid) -@@ -3886,7 +5592,7 @@ static struct lxc_list *get_minimal_idmap(const struct lxc_conf *conf, - egid = getegid(); - if (egid >= container_root_gid->hostid && - egid < (container_root_gid->hostid + container_root_gid->range)) -- host_gid_map = move_ptr(container_root_gid); -+ host_gid_map = container_root_gid; - - /* Check whether the {g,u}id of the user has a mapping. */ - if (!host_uid_map) -@@ -3909,35 +5615,45 @@ static struct lxc_list *get_minimal_idmap(const struct lxc_conf *conf, - tmplist = malloc(sizeof(*tmplist)); - if (!tmplist) - return NULL; -- /* idmap will now keep track of that memory. */ -- lxc_list_add_elem(tmplist, move_ptr(host_uid_map)); -+ lxc_list_add_elem(tmplist, container_root_uid); - lxc_list_add_tail(idmap, tmplist); - -- if (container_root_uid) { -+ if (host_uid_map && (host_uid_map != container_root_uid)) { -+ /* idmap will now keep track of that memory. */ -+ move_ptr(container_root_uid); -+ - /* Add container root to the map. */ - tmplist = malloc(sizeof(*tmplist)); - if (!tmplist) - return NULL; -- /* idmap will now keep track of that memory. */ -- lxc_list_add_elem(tmplist, move_ptr(container_root_uid)); -+ lxc_list_add_elem(tmplist, host_uid_map); - lxc_list_add_tail(idmap, tmplist); - } -+ /* idmap will now keep track of that memory. */ -+ move_ptr(container_root_uid); -+ /* idmap will now keep track of that memory. */ -+ move_ptr(host_uid_map); - - tmplist = malloc(sizeof(*tmplist)); - if (!tmplist) - return NULL; -- /* idmap will now keep track of that memory. */ -- lxc_list_add_elem(tmplist, move_ptr(host_gid_map)); -+ lxc_list_add_elem(tmplist, container_root_gid); - lxc_list_add_tail(idmap, tmplist); - -- if (container_root_gid) { -+ if (host_gid_map && (host_gid_map != container_root_gid)) { -+ /* idmap will now keep track of that memory. */ -+ move_ptr(container_root_gid); -+ - tmplist = malloc(sizeof(*tmplist)); - if (!tmplist) - return NULL; -- /* idmap will now keep track of that memory. */ -- lxc_list_add_elem(tmplist, move_ptr(container_root_gid)); -+ lxc_list_add_elem(tmplist, host_gid_map); - lxc_list_add_tail(idmap, tmplist); - } -+ /* idmap will now keep track of that memory. */ -+ move_ptr(container_root_gid); -+ /* idmap will now keep track of that memory. */ -+ move_ptr(host_gid_map); - - TRACE("Allocated minimal idmapping for ns uid %d and ns gid %d", nsuid, nsgid); - -@@ -3966,13 +5682,9 @@ int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), void *data, - call_cleaner(lxc_free_idmap) struct lxc_list *idmap = NULL; - int ret = -1, status = -1; - char c = '1'; -- struct userns_fn_data d = { -- .arg = data, -- .fn = fn, -- .fn_name = fn_name, -- }; - pid_t pid; - int pipe_fds[2]; -+ struct userns_fn_data d; - - if (!conf) - return -EINVAL; -@@ -3985,6 +5697,9 @@ int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), void *data, - if (ret < 0) - return -errno; - -+ d.fn = fn; -+ d.fn_name = fn_name; -+ d.arg = data; - d.p[0] = pipe_fds[0]; - d.p[1] = pipe_fds[1]; - -@@ -4344,226 +6059,6 @@ on_error: - return ret; - } - --static int add_idmap_entry(struct lxc_list *idmap, enum idtype idtype, -- unsigned long nsid, unsigned long hostid, -- unsigned long range) --{ -- __do_free struct id_map *new_idmap = NULL; -- __do_free struct lxc_list *new_list = NULL; -- -- new_idmap = zalloc(sizeof(*new_idmap)); -- if (!new_idmap) -- return ret_errno(ENOMEM); -- -- new_idmap->idtype = idtype; -- new_idmap->hostid = hostid; -- new_idmap->nsid = nsid; -- new_idmap->range = range; -- -- new_list = zalloc(sizeof(*new_list)); -- if (!new_list) -- return ret_errno(ENOMEM); -- -- new_list->elem = move_ptr(new_idmap); -- lxc_list_add_tail(idmap, move_ptr(new_list)); -- -- INFO("Adding id map: type %c nsid %lu hostid %lu range %lu", -- idtype == ID_TYPE_UID ? 'u' : 'g', nsid, hostid, range); -- return 0; --} -- --int userns_exec_mapped_root(const char *path, int path_fd, -- const struct lxc_conf *conf) --{ -- call_cleaner(lxc_free_idmap) struct lxc_list *idmap = NULL; -- __do_close int fd = -EBADF; -- int target_fd = -EBADF; -- char c = '1'; -- ssize_t ret; -- pid_t pid; -- int sock_fds[2]; -- uid_t container_host_uid, hostuid; -- gid_t container_host_gid, hostgid; -- struct stat st; -- -- if (!conf || (!path && path_fd < 0)) -- return ret_errno(EINVAL); -- -- if (!path) -- path = "(null)"; -- -- container_host_uid = get_mapped_rootid(conf, ID_TYPE_UID); -- if (!uid_valid(container_host_uid)) -- return log_error(-1, "No uid mapping for container root"); -- -- container_host_gid = get_mapped_rootid(conf, ID_TYPE_GID); -- if (!gid_valid(container_host_gid)) -- return log_error(-1, "No gid mapping for container root"); -- -- if (path_fd < 0) { -- fd = open(path, O_CLOEXEC | O_NOCTTY); -- if (fd < 0) -- return log_error_errno(-errno, errno, "Failed to open \"%s\"", path); -- target_fd = fd; -- } else { -- target_fd = path_fd; -- } -- -- hostuid = geteuid(); -- /* We are root so chown directly. */ -- if (hostuid == 0) { -- ret = fchown(target_fd, container_host_uid, container_host_gid); -- if (ret) -- return log_error_errno(-errno, errno, -- "Failed to fchown(%d(%s), %d, %d)", -- target_fd, path, container_host_uid, -- container_host_gid); -- return log_trace(0, "Chowned %d(%s) to uid %d and %d", target_fd, path, -- container_host_uid, container_host_gid); -- } -- -- /* The container's root host id matches */ -- if (container_host_uid == hostuid) -- return log_info(0, "Container root id is mapped to our uid"); -- -- /* Get the current ids of our target. */ -- ret = fstat(target_fd, &st); -- if (ret) -- return log_error_errno(-errno, errno, "Failed to stat \"%s\"", path); -- -- hostgid = getegid(); -- if (st.st_uid == hostuid && mapped_hostid(st.st_gid, conf, ID_TYPE_GID) < 0) { -- ret = fchown(target_fd, -1, hostgid); -- if (ret) -- return log_error_errno(-errno, errno, -- "Failed to fchown(%d(%s), -1, %d)", -- target_fd, path, hostgid); -- TRACE("Chowned %d(%s) to -1:%d", target_fd, path, hostgid); -- } -- -- idmap = malloc(sizeof(*idmap)); -- if (!idmap) -- return -ENOMEM; -- lxc_list_init(idmap); -- -- /* "u:0:rootuid:1" */ -- ret = add_idmap_entry(idmap, ID_TYPE_UID, 0, container_host_uid, 1); -- if (ret < 0) -- return log_error_errno(ret, -ret, "Failed to add idmap entry"); -- -- /* "u:hostuid:hostuid:1" */ -- ret = add_idmap_entry(idmap, ID_TYPE_UID, hostuid, hostuid, 1); -- if (ret < 0) -- return log_error_errno(ret, -ret, "Failed to add idmap entry"); -- -- /* "g:0:rootgid:1" */ -- ret = add_idmap_entry(idmap, ID_TYPE_GID, 0, container_host_gid, 1); -- if (ret < 0) -- return log_error_errno(ret, -ret, "Failed to add idmap entry"); -- -- /* "g:hostgid:hostgid:1" */ -- ret = add_idmap_entry(idmap, ID_TYPE_GID, hostgid, hostgid, 1); -- if (ret < 0) -- return log_error_errno(ret, -ret, "Failed to add idmap entry"); -- -- if (hostgid != st.st_gid) { -- /* "g:pathgid:rootgid+pathgid:1" */ -- ret = add_idmap_entry(idmap, ID_TYPE_GID, st.st_gid, -- container_host_gid + (gid_t)st.st_gid, 1); -- if (ret < 0) -- return log_error_errno(ret, -ret, "Failed to add idmap entry"); -- } -- -- ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, sock_fds); -- if (ret < 0) -- return -errno; -- -- pid = fork(); -- if (pid < 0) { -- SYSERROR("Failed to create new process"); -- goto on_error; -- } -- -- if (pid == 0) { -- close_prot_errno_disarm(sock_fds[1]); -- -- ret = unshare(CLONE_NEWUSER); -- if (ret < 0) { -- SYSERROR("Failed to unshare new user namespace"); -- _exit(EXIT_FAILURE); -- } -- -- ret = lxc_write_nointr(sock_fds[0], &c, 1); -- if (ret != 1) -- _exit(EXIT_FAILURE); -- -- ret = lxc_read_nointr(sock_fds[0], &c, 1); -- if (ret != 1) -- _exit(EXIT_FAILURE); -- -- close_prot_errno_disarm(sock_fds[0]); -- -- if (!lxc_switch_uid_gid(0, 0)) -- _exit(EXIT_FAILURE); -- -- if (!lxc_setgroups(0, NULL)) -- _exit(EXIT_FAILURE); -- -- ret = fchown(target_fd, 0, st.st_gid); -- if (ret) { -- SYSERROR("Failed to chown %d(%s) to -1:%d", target_fd, path, st.st_gid); -- _exit(EXIT_FAILURE); -- } -- -- TRACE("Chowned %d(%s) to 0:%d", target_fd, path, st.st_gid); -- _exit(EXIT_SUCCESS); -- } -- -- close_prot_errno_disarm(sock_fds[0]); -- -- if (lxc_log_get_level() == LXC_LOG_LEVEL_TRACE || -- conf->loglevel == LXC_LOG_LEVEL_TRACE) { -- struct id_map *map; -- struct lxc_list *it; -- -- lxc_list_for_each(it, idmap) { -- map = it->elem; -- TRACE("Establishing %cid mapping for \"%d\" in new user namespace: nsuid %lu - hostid %lu - range %lu", -- (map->idtype == ID_TYPE_UID) ? 'u' : 'g', pid, map->nsid, map->hostid, map->range); -- } -- } -- -- ret = lxc_read_nointr(sock_fds[1], &c, 1); -- if (ret != 1) { -- SYSERROR("Failed waiting for child process %d\" to tell us to proceed", pid); -- goto on_error; -- } -- -- /* Set up {g,u}id mapping for user namespace of child process. */ -- ret = lxc_map_ids(idmap, pid); -- if (ret < 0) { -- ERROR("Error setting up {g,u}id mappings for child process \"%d\"", pid); -- goto on_error; -- } -- -- /* Tell child to proceed. */ -- ret = lxc_write_nointr(sock_fds[1], &c, 1); -- if (ret != 1) { -- SYSERROR("Failed telling child process \"%d\" to proceed", pid); -- goto on_error; -- } -- --on_error: -- close_prot_errno_disarm(sock_fds[0]); -- close_prot_errno_disarm(sock_fds[1]); -- -- /* Wait for child to finish. */ -- if (pid < 0) -- return -1; -- -- return wait_for_pid(pid); --} -- - /* not thread-safe, do not use from api without first forking */ - static char *getuname(void) - { -@@ -4775,3 +6270,89 @@ struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings) - - return result; - } -+ -+#ifdef HAVE_ISULAD -+/*isulad clear init args*/ -+int lxc_clear_init_args(struct lxc_conf *lxc_conf) -+{ -+ int i; -+ -+ for (i = 0; i < lxc_conf->init_argc; i++) { -+ free(lxc_conf->init_argv[i]); -+ lxc_conf->init_argv[i] = NULL; -+ } -+ free(lxc_conf->init_argv); -+ lxc_conf->init_argv = NULL; -+ lxc_conf->init_argc = 0; -+ -+ return 0; -+} -+ -+/*isulad clear init groups*/ -+int lxc_clear_init_groups(struct lxc_conf *lxc_conf) -+{ -+ free(lxc_conf->init_groups); -+ lxc_conf->init_groups = NULL; -+ lxc_conf->init_groups_len = 0; -+ -+ return 0; -+} -+ -+/*isulad: clear populate devices*/ -+int lxc_clear_populate_devices(struct lxc_conf *c) -+{ -+ struct lxc_list *it = NULL; -+ struct lxc_list *next = NULL; -+ -+ lxc_list_for_each_safe(it, &c->populate_devs, next) { -+ struct lxc_populate_devs *dev_elem = it->elem; -+ lxc_list_del(it); -+ free(dev_elem->name); -+ free(dev_elem->type); -+ free(dev_elem); -+ free(it); -+ } -+ return 0; -+} -+ -+/*isulad: clear rootfs masked paths*/ -+int lxc_clear_rootfs_masked_paths(struct lxc_conf *c) -+{ -+ struct lxc_list *it = NULL; -+ struct lxc_list *next = NULL; -+ -+ lxc_list_for_each_safe(it, &c->rootfs.maskedpaths, next) { -+ lxc_list_del(it); -+ free(it->elem); -+ free(it); -+ } -+ return 0; -+} -+ -+/*isulad: clear rootfs ro paths*/ -+int lxc_clear_rootfs_ro_paths(struct lxc_conf *c) -+{ -+ struct lxc_list *it = NULL; -+ struct lxc_list *next = NULL; -+ -+ lxc_list_for_each_safe(it, &c->rootfs.ropaths, next) { -+ lxc_list_del(it); -+ free(it->elem); -+ free(it); -+ } -+ return 0; -+} -+ -+/*isulad: close error pipe */ -+void lxc_close_error_pipe(int *errpipe) -+{ -+ if (errpipe[0] >= 0) { -+ close(errpipe[0]); -+ errpipe[0] = -1; -+ } -+ if (errpipe[1] >= 0) { -+ close(errpipe[1]); -+ errpipe[1] = -1; -+ } -+} -+#endif -diff --git a/src/lxc/conf.h b/src/lxc/conf.h -index b72afbaa5..4b6409e3e 100644 ---- a/src/lxc/conf.h -+++ b/src/lxc/conf.h -@@ -23,6 +23,10 @@ - #include "start.h" - #include "terminal.h" - -+#ifdef HAVE_ISULAD -+#include "oci_runtime_hooks.h" -+#endif -+ - #if HAVE_SYS_RESOURCE_H - #include - #endif -@@ -60,9 +64,6 @@ struct lxc_cgroup { - struct /* meta */ { - char *controllers; - char *dir; -- char *monitor_dir; -- char *container_dir; -- char *namespace_dir; - bool relative; - }; - }; -@@ -146,6 +147,8 @@ struct lxc_tty_info { - * @mountflags : the portion of @options that are flags - * @data : the portion of @options that are not flags - * @managed : whether it is managed by LXC -+ * @maskedpaths: A list of paths to be msked over inside the container -+ * @ropaths : A list of paths to be remounted with readonly inside the container - */ - struct lxc_rootfs { - char *path; -@@ -155,6 +158,16 @@ struct lxc_rootfs { - unsigned long mountflags; - char *data; - bool managed; -+ -+#ifdef HAVE_ISULAD -+ /* isulad: maskedpaths */ -+ struct lxc_list maskedpaths; -+ /* isulad: ropaths */ -+ struct lxc_list ropaths; -+ /* isulad: errfd */ -+ int errfd; -+#endif -+ - }; - - /* -@@ -203,6 +216,11 @@ enum lxchooks { - LXCHOOK_CLONE, - LXCHOOK_DESTROY, - LXCHOOK_START_HOST, -+#ifdef HAVE_ISULAD -+ OCI_HOOK_PRESTART, -+ OCI_HOOK_POSTSTART, -+ OCI_HOOK_POSTSTOP, -+#endif - NUM_LXC_HOOKS - }; - -@@ -233,6 +251,27 @@ struct device_item { - int global_rule; - }; - -+#ifdef HAVE_ISULAD -+/* -+ * iSulad: Defines a structure to store the devices which will -+ * be attached in container -+ * @name : the target device name in container -+ * @type : the type of target device "c" or "b" -+ * @mode : file mode for the device -+ * @maj : major number for the device -+ * @min : minor number for the device -+ */ -+struct lxc_populate_devs { -+ char *name; -+ char *type; -+ mode_t file_mode; -+ int maj; -+ int min; -+ uid_t uid; -+ gid_t gid; -+}; -+#endif -+ - struct lxc_conf { - /* Pointer to the name of the container. Do not free! */ - const char *name; -@@ -401,11 +440,39 @@ struct lxc_conf { - /* Absolute path (in the container) to the shared mount point */ - char *path_cont; - } shmount; -+ -+#ifdef HAVE_ISULAD -+ /* -+ * isulad: support oci hook -+ * */ -+ oci_runtime_spec_hooks *ocihooks; -+ -+ /* isulad add: init args used to repalce init_cmd*/ -+ char **init_argv; -+ size_t init_argc; -+ -+ gid_t *init_groups; -+ size_t init_groups_len; -+ -+ /* populate devices*/ -+ struct lxc_list populate_devs; -+ mode_t umask; //umask value -+ -+ char *container_info_file; -+ -+ int exit_fd; /* exit fifo fd*/ -+ -+ char *errmsg; /* record error messages */ -+ -+ int errpipe[2];//pipdfd for get error message of child or grandchild process. -+ -+ char *systemd; //systemd value -+#endif -+ - }; - - extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, -- size_t buf_size) --__access_r(3, 4); -+ size_t buf_size); - - #ifdef HAVE_TLS - extern thread_local struct lxc_conf *current_config; -@@ -439,19 +506,22 @@ extern int lxc_setup_rootfs_prepare_root(struct lxc_conf *conf, - const char *name, const char *lxcpath); - extern int lxc_setup(struct lxc_handler *handler); - extern int lxc_setup_parent(struct lxc_handler *handler); -+#ifdef HAVE_ISULAD -+extern int setup_resource_limits(struct lxc_list *limits, pid_t pid, int errfd); -+#else - extern int setup_resource_limits(struct lxc_list *limits, pid_t pid); -+#endif - extern int find_unmapped_nsid(const struct lxc_conf *conf, enum idtype idtype); - extern int mapped_hostid(unsigned id, const struct lxc_conf *conf, - enum idtype idtype); -+extern int chown_mapped_root(const char *path, const struct lxc_conf *conf); - extern int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), - void *data, const char *fn_name); - extern int userns_exec_full(struct lxc_conf *conf, int (*fn)(void *), - void *data, const char *fn_name); --extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, -- char **mntdata); - extern int parse_propagationopts(const char *mntopts, unsigned long *pflags); - extern void tmp_proc_unmount(struct lxc_conf *lxc_conf); --extern void turn_into_dependent_mounts(void); -+extern void remount_all_slave(void); - extern void suggest_default_idmap(void); - extern FILE *make_anonymous_mount_file(struct lxc_list *mount, - bool include_nesting_helpers); -@@ -473,11 +543,24 @@ extern int lxc_clear_namespace(struct lxc_conf *c); - extern int userns_exec_minimal(const struct lxc_conf *conf, - int (*fn_parent)(void *), void *fn_parent_data, - int (*fn_child)(void *), void *fn_child_data); --extern int userns_exec_mapped_root(const char *path, int path_fd, -- const struct lxc_conf *conf); --static inline int chown_mapped_root(const char *path, const struct lxc_conf *conf) --{ -- return userns_exec_mapped_root(path, -EBADF, conf); --} -+#ifdef HAVE_ISULAD -+// isulad modify -+extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, -+ unsigned long *pflags, char **mntdata); -+#else -+extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, -+ char **mntdata); -+#endif - -+#ifdef HAVE_ISULAD -+// isulad add -+int lxc_clear_init_args(struct lxc_conf *lxc_conf); -+int lxc_clear_init_groups(struct lxc_conf *lxc_conf); -+int lxc_clear_populate_devices(struct lxc_conf *c); -+int lxc_clear_rootfs_masked_paths(struct lxc_conf *c); -+int lxc_clear_rootfs_ro_paths(struct lxc_conf *c); -+int lxc_drop_caps(struct lxc_conf *conf); -+int run_oci_hooks(const char *name, const char *hookname, struct lxc_conf *conf, const char *lxcpath); -+void lxc_close_error_pipe(int *errpipe); -+#endif - #endif /* __LXC_CONF_H */ -diff --git a/src/lxc/confile.c b/src/lxc/confile.c -index 4c27e7d4b..b1d101a9d 100644 ---- a/src/lxc/confile.c -+++ b/src/lxc/confile.c -@@ -147,6 +147,18 @@ lxc_config_define(tty_dir); - lxc_config_define(uts_name); - lxc_config_define(sysctl); - lxc_config_define(proc); -+#ifdef HAVE_ISULAD -+lxc_config_define(init_args); -+lxc_config_define(init_groups); -+lxc_config_define(populate_device); -+lxc_config_define(umask); -+lxc_config_define(rootfs_masked_paths); -+lxc_config_define(rootfs_ro_paths); -+lxc_config_define(systemd); -+lxc_config_define(console_log_driver); -+lxc_config_define(console_syslog_tag); -+lxc_config_define(console_syslog_facility); -+#endif - - /* - * Important Note: -@@ -259,6 +271,18 @@ static struct lxc_config_t config_jump_table[] = { - { "lxc.uts.name", set_config_uts_name, get_config_uts_name, clr_config_uts_name, }, - { "lxc.sysctl", set_config_sysctl, get_config_sysctl, clr_config_sysctl, }, - { "lxc.proc", set_config_proc, get_config_proc, clr_config_proc, }, -+#ifdef HAVE_ISULAD -+ { "lxc.isulad.init.args", set_config_init_args, get_config_init_args, clr_config_init_args, }, -+ { "lxc.isulad.init.groups", set_config_init_groups, get_config_init_groups, clr_config_init_groups, }, -+ { "lxc.isulad.populate.device", set_config_populate_device, get_config_populate_device, clr_config_populate_device, }, -+ { "lxc.isulad.umask", set_config_umask, get_config_umask, clr_config_umask, }, -+ { "lxc.isulad.rootfs.maskedpaths", set_config_rootfs_masked_paths, get_config_rootfs_masked_paths, clr_config_rootfs_masked_paths, }, -+ { "lxc.isulad.rootfs.ropaths", set_config_rootfs_ro_paths, get_config_rootfs_ro_paths, clr_config_rootfs_ro_paths, }, -+ { "lxc.isulad.systemd", set_config_systemd, get_config_systemd, clr_config_systemd, }, -+ { "lxc.console.logdriver", set_config_console_log_driver, get_config_console_log_driver, clr_config_console_log_driver, }, -+ { "lxc.console.syslog_tag", set_config_console_syslog_tag, get_config_console_syslog_tag, clr_config_console_syslog_tag, }, -+ { "lxc.console.syslog_facility", set_config_console_syslog_facility, get_config_console_syslog_facility, clr_config_console_syslog_facility, }, -+#endif - }; - - static const size_t config_jump_table_size = sizeof(config_jump_table) / sizeof(struct lxc_config_t); -@@ -300,18 +324,14 @@ static int set_config_net_type(const char *key, const char *value, - netdev->type = LXC_NET_VETH; - lxc_list_init(&netdev->priv.veth_attr.ipv4_routes); - lxc_list_init(&netdev->priv.veth_attr.ipv6_routes); -- if (!lxc_veth_flag_to_mode(netdev->priv.veth_attr.mode)) -- lxc_veth_mode_to_flag(&netdev->priv.veth_attr.mode, "bridge"); -+ lxc_veth_mode_to_flag(&netdev->priv.veth_attr.mode, "bridge"); - } else if (strcmp(value, "macvlan") == 0) { - netdev->type = LXC_NET_MACVLAN; -- if (!lxc_macvlan_flag_to_mode(netdev->priv.veth_attr.mode)) -- lxc_macvlan_mode_to_flag(&netdev->priv.macvlan_attr.mode, "private"); -+ lxc_macvlan_mode_to_flag(&netdev->priv.macvlan_attr.mode, "private"); - } else if (strcmp(value, "ipvlan") == 0) { - netdev->type = LXC_NET_IPVLAN; -- if (!lxc_ipvlan_flag_to_mode(netdev->priv.ipvlan_attr.mode)) -- lxc_ipvlan_mode_to_flag(&netdev->priv.ipvlan_attr.mode, "l3"); -- if (!lxc_ipvlan_flag_to_isolation(netdev->priv.ipvlan_attr.isolation)) -- lxc_ipvlan_isolation_to_flag(&netdev->priv.ipvlan_attr.isolation, "bridge"); -+ lxc_ipvlan_mode_to_flag(&netdev->priv.ipvlan_attr.mode, "l3"); -+ lxc_ipvlan_isolation_to_flag(&netdev->priv.ipvlan_attr.isolation, "bridge"); - } else if (strcmp(value, "vlan") == 0) { - netdev->type = LXC_NET_VLAN; - } else if (strcmp(value, "phys") == 0) { -@@ -1348,6 +1368,10 @@ static int set_config_environment(const char *key, const char *value, - { - struct lxc_list *list_item = NULL; - -+#ifdef HAVE_ISULAD -+ char *replaced = NULL; -+#endif -+ - if (lxc_config_value_empty(value)) - return lxc_clear_environment(lxc_conf); - -@@ -1368,7 +1392,16 @@ static int set_config_environment(const char *key, const char *value, - env_var[1] = env_val; - list_item->elem = lxc_string_join("=", env_var, false); - } else { -+#ifdef HAVE_ISULAD -+ /* isulad: recover space replaced by SPACE_MAGIC_STR */ -+ replaced = lxc_string_replace(SPACE_MAGIC_STR, " ", value); -+ if(!replaced) -+ goto on_error; -+ -+ list_item->elem = replaced; -+#else - list_item->elem = strdup(value); -+#endif - } - - if (!list_item->elem) -@@ -2291,11 +2324,14 @@ static int set_config_console_rotate(const char *key, const char *value, - if (lxc_safe_uint(value, &lxc_conf->console.log_rotate) < 0) - return -1; - -+#ifndef HAVE_ISULAD -+ /* isulad: support rotate muti-files */ - if (lxc_conf->console.log_rotate > 1) { - ERROR("The \"lxc.console.rotate\" config key can only be set " - "to 0 or 1"); - return -1; - } -+#endif - - return 0; - } -@@ -2581,6 +2617,11 @@ static int set_config_rootfs_options(const char *key, const char *value, - int ret; - struct lxc_rootfs *rootfs = &lxc_conf->rootfs; - -+#ifdef HAVE_ISULAD -+ ret = parse_mntopts(value, &mflags, &pflags, &mdata); -+ if (ret < 0) -+ return -EINVAL; -+#else - ret = parse_mntopts(value, &mflags, &mdata); - if (ret < 0) - return -EINVAL; -@@ -2590,6 +2631,7 @@ static int set_config_rootfs_options(const char *key, const char *value, - free(mdata); - return -EINVAL; - } -+#endif - - ret = set_config_string_item(&opts, value); - if (ret < 0) { -@@ -2722,6 +2764,54 @@ struct parse_line_conf { - bool from_include; - }; - -+#ifdef HAVE_ISULAD -+// escape_string_decode compress some escape characters -+static char *escape_string_decode(const char *src) -+{ -+ size_t src_end = 0; -+ size_t dst_end = 0; -+ size_t len = 0; -+ char *dst = NULL; -+ -+ if (src == NULL) { -+ return NULL; -+ } -+ -+ len = strlen(src); -+ if (len == 0) { -+ return NULL; -+ } -+ -+ dst = calloc(1, len + 1); -+ if (dst == NULL) { -+ ERROR("Out of memory"); -+ return NULL; -+ } -+ -+ while(src_end < len) { -+ if (src[src_end] == '\\') { -+ switch (src[++src_end]) -+ { -+ case 'r': dst[dst_end] = '\r'; break; -+ case 'n': dst[dst_end] = '\n'; break; -+ case 'f': dst[dst_end] = '\f'; break; -+ case 'b': dst[dst_end] = '\b'; break; -+ case 't': dst[dst_end] = '\t'; break; -+ case '\\': dst[dst_end] = '\\'; break; -+ // default do not decode -+ default: dst[dst_end++] = '\\'; dst[dst_end] = src[src_end]; break; -+ } -+ } else { -+ dst[dst_end] = src[src_end]; -+ } -+ dst_end++; -+ src_end++; -+ } -+ -+ return dst; -+} -+#endif -+ - static int parse_line(char *buffer, void *data) - { - char *dot, *key, *line, *linep, *value; -@@ -2730,6 +2820,9 @@ static int parse_line(char *buffer, void *data) - int ret = 0; - char *dup = buffer; - struct parse_line_conf *plc = data; -+#ifdef HAVE_ISULAD -+ char *value_decode = NULL; -+#endif - - /* If there are newlines in the config file we should keep them. */ - empty_line = lxc_is_line_empty(dup); -@@ -2796,10 +2889,21 @@ static int parse_line(char *buffer, void *data) - goto on_error; - } - -+#ifdef HAVE_ISULAD -+ value_decode = escape_string_decode(value); -+ if (value_decode == NULL) { -+ ERROR("Value %s decode failed", value); -+ } -+ ret = config->set(key, value_decode ? value_decode: value, plc->conf, NULL); -+#else - ret = config->set(key, value, plc->conf, NULL); -+#endif - - on_error: - free(linep); -+#ifdef HAVE_ISULAD -+ free(value_decode); -+#endif - - return ret; - } -@@ -4192,7 +4296,12 @@ static int get_config_prlimit(const char *key, char *retv, int inlen, - - lxc_list_for_each(it, &c->limits) { - /* 2 colon separated 64 bit integers or the word 'unlimited' */ -+#ifdef HAVE_ISULAD -+#define MAX_LIMIT_BUF_LEN ((INTTYPE_TO_STRLEN(uint64_t) * 2) + 2) -+ char buf[MAX_LIMIT_BUF_LEN] = { 0 }; -+#else - char buf[INTTYPE_TO_STRLEN(uint64_t) * 2 + 2]; -+#endif - int partlen; - struct lxc_limit *lim = it->elem; - -@@ -4200,17 +4309,34 @@ static int get_config_prlimit(const char *key, char *retv, int inlen, - memcpy(buf, "unlimited", STRLITERALLEN("unlimited") + 1); - partlen = STRLITERALLEN("unlimited"); - } else { -+#ifdef HAVE_ISULAD -+ partlen = snprintf(buf, MAX_LIMIT_BUF_LEN, "%" PRIu64, (uint64_t)lim->limit.rlim_cur); -+ if (partlen < 0 || partlen >= MAX_LIMIT_BUF_LEN) { -+ return -1; -+ } -+#else - partlen = sprintf(buf, "%" PRIu64, - (uint64_t)lim->limit.rlim_cur); -+#endif - } - - if (lim->limit.rlim_cur != lim->limit.rlim_max) { - if (lim->limit.rlim_max == RLIM_INFINITY) - memcpy(buf + partlen, ":unlimited", - STRLITERALLEN(":unlimited") + 1); -+#ifdef HAVE_ISULAD -+ else { -+ int nret = snprintf(buf + partlen, (MAX_LIMIT_BUF_LEN - partlen), -+ ":%" PRIu64, (uint64_t)lim->limit.rlim_max); -+ if (nret < 0 || nret >= (MAX_LIMIT_BUF_LEN - partlen)) { -+ return -1; -+ } -+ } -+#else - else - sprintf(buf + partlen, ":%" PRIu64, - (uint64_t)lim->limit.rlim_max); -+#endif - } - - if (get_all) { -@@ -6098,3 +6224,506 @@ int lxc_list_net(struct lxc_conf *c, const char *key, char *retv, int inlen) - - return fulllen; - } -+ -+#ifdef HAVE_ISULAD -+/* isulad: set config for init args */ -+static int set_config_init_args(const char *key, const char *value, -+ struct lxc_conf *lxc_conf, void *data) -+{ -+ int ret = 0; -+ char *tmp = NULL; -+ char *new_value = NULL; -+ -+ ret = set_config_string_item(&new_value, value); -+ if (ret || !new_value) -+ return ret; -+ -+ tmp = realloc(lxc_conf->init_argv, (lxc_conf->init_argc + 1) * sizeof(char *)); -+ if (!tmp) { -+ ERROR("Out of memory"); -+ free(new_value); -+ return -1; -+ } -+ -+ lxc_conf->init_argv = (char **)tmp; -+ -+ lxc_conf->init_argv[lxc_conf->init_argc] = new_value; -+ lxc_conf->init_argc++; -+ -+ return 0; -+} -+ -+/* isulad: get config init args */ -+static int get_config_init_args(const char *key, char *retv, int inlen, -+ struct lxc_conf *c, void *data) -+{ -+ int i, len, fulllen = 0; -+ -+ if (!retv) -+ inlen = 0; -+ else -+ memset(retv, 0, inlen); -+ -+ for (i = 0; i < c->init_argc; i++) { -+ strprint(retv, inlen, "%s", c->init_argv[i]); -+ } -+ -+ return fulllen; -+} -+ -+/* isulad: clr config init args*/ -+static inline int clr_config_init_args(const char *key, struct lxc_conf *c, -+ void *data) -+{ -+ return lxc_clear_init_args(c); -+} -+ -+/* isulad: set config for init groups */ -+static int set_config_init_groups(const char *key, const char *value, -+ struct lxc_conf *lxc_conf, void *data) -+{ -+ char *groups = NULL; -+ char *token = NULL; -+ int ret = -1; -+ -+ if (lxc_config_value_empty(value)) -+ return lxc_clear_init_groups(lxc_conf); -+ -+ groups = strdup(value); -+ if (!groups) -+ return -1; -+ -+ /* In case several capability keep is specified in a single line -+ * split these caps in a single element for the list. -+ */ -+ lxc_iterate_parts(token, groups, " \t") { -+ gid_t *tmp = NULL; -+ if (lxc_mem_realloc((void **)&tmp, (lxc_conf->init_groups_len + 1) * sizeof(gid_t), lxc_conf->init_groups, -+ (lxc_conf->init_groups_len) * sizeof(gid_t)) != 0) { -+ ERROR("Out of memory"); -+ goto on_error; -+ } -+ lxc_conf->init_groups = tmp; -+ tmp[lxc_conf->init_groups_len] = atoll(token); -+ lxc_conf->init_groups_len++; -+ } -+ -+ ret = 0; -+ -+on_error: -+ free(groups); -+ -+ return ret; -+} -+ -+/* isulad: get config init groups */ -+static int get_config_init_groups(const char *key, char *retv, int inlen, -+ struct lxc_conf *c, void *data) -+{ -+ int i, len, fulllen = 0; -+ -+ if (!retv) -+ inlen = 0; -+ else -+ memset(retv, 0, inlen); -+ -+ for (i = 0; i < c->init_groups_len; i++) { -+ strprint(retv, inlen, "%u\n", c->init_groups[i]); -+ } -+ -+ return fulllen; -+} -+ -+/* isulad: clr config init args*/ -+static inline int clr_config_init_groups(const char *key, struct lxc_conf *c, -+ void *data) -+{ -+ return lxc_clear_init_groups(c); -+} -+ -+/* isulad: set config for populate device */ -+static int set_config_populate_device(const char *key, const char *value, -+ struct lxc_conf *lxc_conf, void *data) -+{ -+ int ret = 0, major = 0, minor = 0; -+ uid_t uid = (uid_t)-1; -+ gid_t gid = (gid_t)-1; -+ char name[4096] = {0}; /* MAX dev path name */ -+ char type[3] = {0}; -+ char *replace_value = NULL; -+ mode_t filemode = 0; -+ struct lxc_list *iter = NULL; -+ struct lxc_list *dev_list = NULL; -+ struct lxc_populate_devs *dev_elem = NULL; -+ -+ if (lxc_config_value_empty(value)) -+ return lxc_clear_populate_devices(lxc_conf); -+ -+ /* lxc.populate.device = PATH_IN_CONTAINER:DEVICETYPE:MAJOR:MINOR:MODE:UID:GID -+ * For e.g. lxc.populate.device = /dev/sda:b:8:0:0666:0:0 -+ */ -+ ret = sscanf(value, "%4095[^:]:%2[^:]:%i:%i:%i:%u:%u", name, type, &major, &minor, &filemode, &uid, &gid); -+ if (ret != 7) -+ return -1; -+ -+ /* find existing list element */ -+ lxc_list_for_each(iter, &lxc_conf->populate_devs) { -+ dev_elem = iter->elem; -+ -+ if (strcmp(name, dev_elem->name) != 0) -+ continue; -+ -+ replace_value = safe_strdup(type); -+ -+ free(dev_elem->type); -+ dev_elem->type = replace_value; -+ dev_elem->file_mode = filemode; -+ dev_elem->maj = major; -+ dev_elem->min = minor; -+ dev_elem->uid = (uid_t)uid; -+ dev_elem->gid = (gid_t)gid; -+ return 0; -+ } -+ -+ /* allocate list element */ -+ dev_list = malloc(sizeof(*dev_list)); -+ if (dev_list == NULL) -+ goto on_error; -+ -+ lxc_list_init(dev_list); -+ -+ dev_elem = malloc(sizeof(*dev_elem)); -+ if (dev_elem == NULL) -+ goto on_error; -+ memset(dev_elem, 0, sizeof(*dev_elem)); -+ -+ dev_elem->name = safe_strdup(name); -+ -+ dev_elem->type = safe_strdup(type); -+ -+ dev_elem->file_mode = filemode; -+ dev_elem->maj = major; -+ dev_elem->min = minor; -+ dev_elem->uid = (uid_t)uid; -+ dev_elem->gid = (gid_t)gid; -+ -+ lxc_list_add_elem(dev_list, dev_elem); -+ -+ lxc_list_add_tail(&lxc_conf->populate_devs, dev_list); -+ -+ return 0; -+ -+on_error: -+ free(dev_list); -+ if (dev_elem) { -+ free(dev_elem->name); -+ free(dev_elem->type); -+ free(dev_elem); -+ } -+ return -1; -+} -+ -+/* isulad: get config populate device -+ * If you ask for 'lxc.populate.device', then all populate device -+ * entries will be printed, in 'lxc.populate.device = path_in_container:type:major:minor:mode:uid:gid' format. -+ * For e.g. lxc.populate.device = /dev/sda:b:8:0:0666:0:0 -+ */ -+static int get_config_populate_device(const char *key, char *retv, int inlen, -+ struct lxc_conf *c, void *data) -+{ -+ int len; -+ struct lxc_list *it = NULL; -+ int fulllen = 0; -+ -+ if (!retv) -+ inlen = 0; -+ else -+ memset(retv, 0, inlen); -+ -+ lxc_list_for_each(it, &c->populate_devs) { -+ struct lxc_populate_devs *elem = it->elem; -+ strprint(retv, inlen, "lxc.populate.device = %s:%s:%d:%d:%o:%u:%u\n", -+ elem->name, elem->type, elem->maj, -+ elem->min, elem->file_mode, elem->uid, elem->gid); -+ } -+ -+ return fulllen; -+} -+ -+/* isulad: clr config populate devices*/ -+static inline int clr_config_populate_device(const char *key, struct lxc_conf *c, -+ void *data) -+{ -+ return lxc_clear_populate_devices(c); -+} -+ -+/* isulad: set config for umask */ -+static int set_config_umask(const char *key, const char *value, -+ struct lxc_conf *lxc_conf, void *data) -+{ -+ if (lxc_config_value_empty(value)) { -+ ERROR("Empty umask"); -+ return -1; -+ } -+ -+ if (strcmp(value, "normal") == 0) { -+ lxc_conf->umask = 0022; -+ return 0; -+ } else if (strcmp(value, "secure") == 0) { -+ lxc_conf->umask = 0027; -+ return 0; -+ } else { -+ ERROR("Invalid native umask: %s", value); -+ return -1; -+ } -+} -+ -+/* isulad add: get umask value*/ -+static int get_config_umask(const char *key, char *retv, int inlen, -+ struct lxc_conf *c, void *data) -+{ -+ return lxc_get_conf_size_t(c, retv, inlen, c->umask); -+} -+ -+/* isulad add: clear umask value */ -+static inline int clr_config_umask(const char *key, struct lxc_conf *c, -+ void *data) -+{ -+ c->umask = 0027; -+ return 0; -+} -+ -+/* isulad: set config for rootfs masked paths */ -+static int set_config_rootfs_masked_paths(const char *key, const char *value, -+ struct lxc_conf *lxc_conf, void *data) -+{ -+ struct lxc_list *list_item = NULL; -+ -+ if (lxc_config_value_empty(value)) -+ return lxc_clear_rootfs_masked_paths(lxc_conf); -+ -+ list_item = malloc(sizeof(*list_item)); -+ if (list_item == NULL) -+ goto on_error; -+ -+ list_item->elem = safe_strdup(value); -+ -+ lxc_list_add_tail(&lxc_conf->rootfs.maskedpaths, list_item); -+ -+ return 0; -+ -+on_error: -+ free(list_item); -+ -+ return -1; -+} -+ -+// isulad: get config rootfs masked paths -+static int get_config_rootfs_masked_paths(const char *key, char *retv, int inlen, -+ struct lxc_conf *c, void *data) -+{ -+ int len, fulllen = 0; -+ struct lxc_list *it = NULL; -+ -+ if (!retv) -+ inlen = 0; -+ else -+ memset(retv, 0, inlen); -+ -+ lxc_list_for_each(it, &c->rootfs.maskedpaths) { -+ strprint(retv, inlen, "%s\n", (char *)it->elem); -+ } -+ -+ return fulllen; -+} -+ -+/* isulad: set config for rootfs ro paths */ -+static int set_config_rootfs_ro_paths(const char *key, const char *value, -+ struct lxc_conf *lxc_conf, void *data) -+{ -+ struct lxc_list *list_item = NULL; -+ -+ if (lxc_config_value_empty(value)) -+ return lxc_clear_rootfs_ro_paths(lxc_conf); -+ -+ list_item = malloc(sizeof(*list_item)); -+ if (list_item == NULL) -+ goto on_error; -+ -+ list_item->elem = safe_strdup(value); -+ -+ lxc_list_add_tail(&lxc_conf->rootfs.ropaths, list_item); -+ -+ return 0; -+ -+on_error: -+ free(list_item); -+ -+ return -1; -+} -+ -+// isulad: get config rootfs ro paths -+static int get_config_rootfs_ro_paths(const char *key, char *retv, int inlen, -+ struct lxc_conf *c, void *data) -+{ -+ int len, fulllen = 0; -+ struct lxc_list *it = NULL; -+ -+ if (!retv) -+ inlen = 0; -+ else -+ memset(retv, 0, inlen); -+ -+ lxc_list_for_each(it, &c->rootfs.ropaths) { -+ strprint(retv, inlen, "%s\n", (char *)it->elem); -+ } -+ -+ return fulllen; -+} -+ -+/* isulad: clr config rootfs masked paths */ -+static inline int clr_config_rootfs_masked_paths(const char *key, struct lxc_conf *c, -+ void *data) -+{ -+ return lxc_clear_rootfs_masked_paths(c); -+} -+ -+/* isulad: clr config rootfs ro paths */ -+static inline int clr_config_rootfs_ro_paths(const char *key, struct lxc_conf *c, -+ void *data) -+{ -+ return lxc_clear_rootfs_ro_paths(c); -+} -+ -+/* isulad: set config for systemd */ -+static int set_config_systemd(const char *key, const char *value, -+ struct lxc_conf *lxc_conf, void *data) -+{ -+ if (lxc_config_value_empty(value)) { -+ ERROR("Empty umask"); -+ return -1; -+ } -+ lxc_conf->systemd = strdup(value); -+ return 0; -+} -+ -+/* isulad add: get systemd value*/ -+static int get_config_systemd(const char *key, char *retv, int inlen, -+ struct lxc_conf *c, void *data) -+{ -+ return lxc_get_conf_str(retv, inlen, c->systemd); -+} -+ -+/* isulad add: clear systemd value */ -+static inline int clr_config_systemd(const char *key, struct lxc_conf *c, -+ void *data) -+{ -+ free(c->systemd); -+ c->systemd = NULL; -+ return 0; -+} -+ -+static int set_config_console_log_driver(const char *key, const char *value, -+ struct lxc_conf *lxc_conf, void *data) -+{ -+ return set_config_string_item(&lxc_conf->console.log_driver, value); -+} -+ -+static int set_config_console_syslog_tag(const char *key, const char *value, -+ struct lxc_conf *lxc_conf, void *data) -+{ -+ char buf[16] = { 0 }; -+ -+ if (value == NULL) { -+ return -1; -+ } -+ (void)strlcpy(buf, value, 16); -+ return set_config_string_item(&lxc_conf->console.log_syslog_tag, buf); -+} -+ -+static int parse_facility(const char *facility) -+{ -+#define FACILITIES_LEN 20 -+ const char *facility_keys[FACILITIES_LEN] = { -+ "kern", "user", "mail", "daemon", "auth", -+ "syslog", "lpr", "news", "uucp", "cron", "authpriv", "ftp", -+ "local0", "local1", "local2", "local3", "local4", "local5", "local6", "local7" -+ }; -+ const int facilities[FACILITIES_LEN] = { -+ LOG_KERN, LOG_USER, LOG_MAIL, LOG_DAEMON, LOG_AUTH, LOG_SYSLOG, -+ LOG_LPR, LOG_NEWS, LOG_UUCP, LOG_CRON, LOG_AUTHPRIV, LOG_FTP, -+ LOG_LOCAL0, LOG_LOCAL1, LOG_LOCAL2, LOG_LOCAL3, LOG_LOCAL4, -+ LOG_LOCAL5, LOG_LOCAL6, LOG_LOCAL7 -+ }; -+ int i = 0; -+ -+ if (facility == NULL) { -+ return -1; -+ } -+ -+ for (; i < FACILITIES_LEN; i++) { -+ if (strcmp(facility, facility_keys[i]) == 0) { -+ return facilities[i]; -+ } -+ } -+ -+ return -1; -+} -+ -+static int set_config_console_syslog_facility(const char *key, const char *value, -+ struct lxc_conf *lxc_conf, void *data) -+{ -+ int facility; -+ -+ facility = parse_facility(value); -+ if (facility < 0) { -+ NOTICE("Invalid facility: %s", value); -+ facility = LOG_DAEMON; -+ } -+ -+ lxc_conf->console.log_syslog_facility = facility; -+ return 0; -+} -+ -+static int get_config_console_log_driver(const char *key, char *retv, int inlen, -+ struct lxc_conf *c, void *data) -+{ -+ return lxc_get_conf_str(retv, inlen, c->console.log_driver); -+} -+ -+static int get_config_console_syslog_tag(const char *key, char *retv, int inlen, -+ struct lxc_conf *c, void *data) -+{ -+ return lxc_get_conf_str(retv, inlen, c->console.log_syslog_tag); -+} -+ -+static int get_config_console_syslog_facility(const char *key, char *retv, int inlen, -+ struct lxc_conf *c, void *data) -+{ -+ return lxc_get_conf_int(c, retv, inlen, c->console.log_syslog_facility); -+} -+ -+static inline int clr_config_console_log_driver(const char *key, -+ struct lxc_conf *c, void *data) -+{ -+ free(c->console.log_driver); -+ c->console.log_driver = NULL; -+ return 0; -+} -+ -+static inline int clr_config_console_syslog_tag(const char *key, -+ struct lxc_conf *c, void *data) -+{ -+ free(c->console.log_syslog_tag); -+ c->console.log_syslog_tag= NULL; -+ return 0; -+} -+ -+static inline int clr_config_console_syslog_facility(const char *key, -+ struct lxc_conf *c, void *data) -+{ -+ c->console.log_syslog_facility = LOG_DAEMON; -+ return 0; -+} -+ -+#endif -diff --git a/src/lxc/confile.h b/src/lxc/confile.h -index a457c9a17..624d9a0c2 100644 ---- a/src/lxc/confile.h -+++ b/src/lxc/confile.h -@@ -9,8 +9,6 @@ - #include - #include - --#include "compiler.h" -- - struct lxc_conf; - struct lxc_list; - -@@ -48,24 +46,21 @@ struct new_config_item { - extern struct lxc_config_t *lxc_get_config(const char *key); - - /* List all available config items. */ --extern int lxc_list_config_items(char *retv, int inlen) --__access_rw(1, 2); -+extern int lxc_list_config_items(char *retv, int inlen); - - /* Given a configuration key namespace (e.g. lxc.apparmor) list all associated - * subkeys for that namespace. - * Must be implemented when adding a new configuration key. - */ - extern int lxc_list_subkeys(struct lxc_conf *conf, const char *key, char *retv, -- int inlen) --__access_rw(3, 4); -+ int inlen); - - /* List all configuration items associated with a given network. For example - * pass "lxc.net.[i]" to retrieve all configuration items associated with - * the network associated with index [i]. - */ - extern int lxc_list_net(struct lxc_conf *c, const char *key, char *retv, -- int inlen) --__access_rw(3, 4); -+ int inlen); - - extern int lxc_config_read(const char *file, struct lxc_conf *conf, - bool from_include); -diff --git a/src/lxc/confile_utils.c b/src/lxc/confile_utils.c -index 05dadf9ec..ff4ae7688 100644 ---- a/src/lxc/confile_utils.c -+++ b/src/lxc/confile_utils.c -@@ -506,18 +506,6 @@ int lxc_veth_mode_to_flag(int *mode, const char *value) - return ret_set_errno(-1, EINVAL); - } - --char *lxc_veth_flag_to_mode(int mode) --{ -- for (size_t i = 0; i < sizeof(veth_mode) / sizeof(veth_mode[0]); i++) { -- if (veth_mode[i].mode != mode) -- continue; -- -- return veth_mode[i].name; -- } -- -- return NULL; --} -- - static struct lxc_macvlan_mode { - char *name; - int mode; -diff --git a/src/lxc/confile_utils.h b/src/lxc/confile_utils.h -index 7c59deae5..62990e98c 100644 ---- a/src/lxc/confile_utils.h -+++ b/src/lxc/confile_utils.h -@@ -5,7 +5,6 @@ - - #include - --#include "compiler.h" - #include "conf.h" - #include "confile_utils.h" - -@@ -41,7 +40,6 @@ extern void lxc_log_configured_netdevs(const struct lxc_conf *conf); - extern bool lxc_remove_nic_by_idx(struct lxc_conf *conf, unsigned int idx); - extern void lxc_free_networks(struct lxc_list *networks); - extern int lxc_veth_mode_to_flag(int *mode, const char *value); --extern char *lxc_veth_flag_to_mode(int mode); - extern int lxc_macvlan_mode_to_flag(int *mode, const char *value); - extern char *lxc_macvlan_flag_to_mode(int mode); - extern int lxc_ipvlan_mode_to_flag(int *mode, const char *value); -@@ -51,16 +49,12 @@ extern char *lxc_ipvlan_flag_to_isolation(int mode); - - extern int set_config_string_item(char **conf_item, const char *value); - extern int set_config_string_item_max(char **conf_item, const char *value, -- size_t max) --__access_r(2, 3); -- -+ size_t max); - extern int set_config_path_item(char **conf_item, const char *value); - extern int set_config_bool_item(bool *conf_item, const char *value, - bool empty_conf_action); - extern int config_ip_prefix(struct in_addr *addr); --extern int network_ifname(char *valuep, const char *value, size_t size) --__access_r(2, 3); -- -+extern int network_ifname(char *valuep, const char *value, size_t size); - extern void rand_complete_hwaddr(char *hwaddr); - extern bool lxc_config_net_is_hwaddr(const char *line); - extern bool new_hwaddr(char *hwaddr); -diff --git a/src/lxc/criu.c b/src/lxc/criu.c -index 19f2a173f..14a8aae7d 100644 ---- a/src/lxc/criu.c -+++ b/src/lxc/criu.c -@@ -303,7 +303,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, - * the handler the restore task created. - */ - if (!strcmp(opts->action, "dump") || !strcmp(opts->action, "pre-dump")) { -- path = lxc_cmd_get_limiting_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]); -+ path = lxc_cmd_get_cgroup_path(opts->c->name, opts->c->config_path, controllers[0]); - if (!path) { - ERROR("failed to get cgroup path for %s", controllers[0]); - goto err; -@@ -311,7 +311,7 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, - } else { - const char *p; - -- p = cgroup_ops->get_limiting_cgroup(cgroup_ops, controllers[0]); -+ p = cgroup_ops->get_cgroup(cgroup_ops, controllers[0]); - if (!p) { - ERROR("failed to get cgroup path for %s", controllers[0]); - goto err; -@@ -371,8 +371,15 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, - char *mntdata = NULL; - char arg[2 * PATH_MAX + 2]; - -+#ifdef HAVE_ISULAD -+ unsigned long pflags; -+ -+ if (parse_mntopts(mntent.mnt_opts, &flags, &pflags, &mntdata) < 0) -+ goto err; -+#else - if (parse_mntopts(mntent.mnt_opts, &flags, &mntdata) < 0) - goto err; -+#endif - - free(mntdata); - -@@ -406,9 +413,9 @@ static void exec_criu(struct cgroup_ops *cgroup_ops, struct lxc_conf *conf, - DECLARE_ARG("-t"); - DECLARE_ARG(pid); - -- freezer_relative = lxc_cmd_get_limiting_cgroup_path(opts->c->name, -- opts->c->config_path, -- "freezer"); -+ freezer_relative = lxc_cmd_get_cgroup_path(opts->c->name, -+ opts->c->config_path, -+ "freezer"); - if (!freezer_relative) { - ERROR("failed getting freezer path"); - goto err; -@@ -942,7 +949,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ - close(fd); - } - -- handler = lxc_init_handler(NULL, c->name, c->lxc_conf, c->config_path, false); -+ handler = lxc_init_handler(c->name, c->lxc_conf, c->config_path, false); - if (!handler) - goto out; - -@@ -1011,7 +1018,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ - } - - if (mount(rootfs->path, rootfs->mount, NULL, MS_BIND, NULL) < 0) { -- (void)rmdir(rootfs->mount); -+ rmdir(rootfs->mount); - goto out_fini_handler; - } - } -@@ -1020,7 +1027,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ - os.action = "restore"; - os.user = opts; - os.c = c; -- os.console_fd = c->lxc_conf->console.pts; -+ os.console_fd = c->lxc_conf->console.slave; - os.criu_version = criu_version; - os.handler = handler; - -@@ -1046,7 +1053,7 @@ static void do_restore(struct lxc_container *c, int status_pipe, struct migrate_ - /* exec_criu() returning is an error */ - exec_criu(cgroup_ops, c->lxc_conf, &os); - umount(rootfs->mount); -- (void)rmdir(rootfs->mount); -+ rmdir(rootfs->mount); - goto out_fini_handler; - } else { - char title[2048]; -@@ -1323,7 +1330,7 @@ static bool do_dump(struct lxc_container *c, char *mode, struct migrate_opts *op - fail: - close(criuout[0]); - close(criuout[1]); -- (void)rmdir(opts->directory); -+ rmdir(opts->directory); - free(criu_version); - return false; - } -diff --git a/src/lxc/exec_commands.c b/src/lxc/exec_commands.c -new file mode 100644 -index 000000000..00129cb0e ---- /dev/null -+++ b/src/lxc/exec_commands.c -@@ -0,0 +1,416 @@ -+/****************************************************************************** -+ * Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. -+ * Author: lifeng -+ * Create: 2019-12-08 -+ * Description: provide container definition -+ * lxc: linux Container library -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ ******************************************************************************/ -+ -+#ifndef _GNU_SOURCE -+#define _GNU_SOURCE 1 -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "af_unix.h" -+#include "cgroup.h" -+#include "exec_commands.h" -+#include "commands_utils.h" -+#include "conf.h" -+#include "config.h" -+#include "confile.h" -+#include "log.h" -+#include "lxc.h" -+#include "lxclock.h" -+#include "mainloop.h" -+#include "monitor.h" -+#include "terminal.h" -+#include "utils.h" -+ -+lxc_log_define(commands_exec, lxc); -+ -+static const char *lxc_exec_cmd_str(lxc_exec_cmd_t cmd) -+{ -+ static const char *const cmdname[LXC_EXEC_CMD_MAX] = { -+ [LXC_EXEC_CMD_SET_TERMINAL_WINCH] = "set_exec_terminal_winch", -+ }; -+ -+ if (cmd >= LXC_EXEC_CMD_MAX) -+ return "Invalid request"; -+ -+ return cmdname[cmd]; -+} -+ -+static int lxc_exec_cmd_rsp_recv(int sock, struct lxc_exec_cmd_rr *cmd) -+{ -+ int ret, rspfd; -+ struct lxc_exec_cmd_rsp *rsp = &cmd->rsp; -+ -+ ret = lxc_abstract_unix_recv_fds_timeout(sock, &rspfd, 1, rsp, sizeof(*rsp), 1000 * 1000); -+ if (ret < 0) { -+ SYSERROR("Failed to receive response for command \"%s\"", -+ lxc_exec_cmd_str(cmd->req.cmd)); -+ -+ if (errno == ECONNRESET || errno == EAGAIN || errno == EWOULDBLOCK) { -+ errno = ECONNRESET; /*isulad set errno ECONNRESET when timeout */ -+ return -1; -+ } -+ -+ return -1; -+ } -+ TRACE("Command \"%s\" received response", lxc_exec_cmd_str(cmd->req.cmd)); -+ -+ if (rsp->datalen == 0) { -+ DEBUG("Response data length for command \"%s\" is 0", -+ lxc_exec_cmd_str(cmd->req.cmd)); -+ return ret; -+ } -+ -+ if (rsp->datalen > LXC_CMD_DATA_MAX) { -+ ERROR("Response data for command \"%s\" is too long: %d bytes > %d", -+ lxc_exec_cmd_str(cmd->req.cmd), rsp->datalen, LXC_CMD_DATA_MAX); -+ return -1; -+ } -+ -+ rsp->data = malloc(rsp->datalen); -+ if (!rsp->data) { -+ errno = ENOMEM; -+ ERROR("Failed to allocate response buffer for command \"%s\"", -+ lxc_exec_cmd_str(cmd->req.cmd)); -+ return -1; -+ } -+ -+ ret = lxc_recv_nointr(sock, rsp->data, rsp->datalen, 0); -+ if (ret != rsp->datalen) { -+ SYSERROR("Failed to receive response data for command \"%s\"", -+ lxc_exec_cmd_str(cmd->req.cmd)); -+ return -1; -+ } -+ -+ return ret; -+} -+ -+static int lxc_exec_cmd_rsp_send(int fd, struct lxc_exec_cmd_rsp *rsp) -+{ -+ ssize_t ret; -+ -+ errno = EMSGSIZE; -+ ret = lxc_send_nointr(fd, rsp, sizeof(*rsp), MSG_NOSIGNAL); -+ if (ret < 0 || (size_t)ret != sizeof(*rsp)) { -+ SYSERROR("Failed to send command response %zd", ret); -+ return -1; -+ } -+ -+ if (!rsp->data || rsp->datalen <= 0) -+ return 0; -+ -+ errno = EMSGSIZE; -+ ret = lxc_send_nointr(fd, rsp->data, rsp->datalen, MSG_NOSIGNAL); -+ if (ret < 0 || ret != (ssize_t)rsp->datalen) { -+ SYSWARN("Failed to send command response data %zd", ret); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static int lxc_exec_cmd_send(const char *name, struct lxc_exec_cmd_rr *cmd, -+ const char *lxcpath, const char *hashed_sock_name, const char *suffix) -+{ -+ int client_fd, saved_errno; -+ ssize_t ret = -1; -+ -+ client_fd = lxc_cmd_connect(name, lxcpath, hashed_sock_name, suffix); -+ if (client_fd < 0) -+ return -1; -+ -+ ret = lxc_abstract_unix_send_credential(client_fd, &cmd->req, -+ sizeof(cmd->req)); -+ if (ret < 0 || (size_t)ret != sizeof(cmd->req)) -+ goto on_error; -+ -+ if (cmd->req.datalen <= 0) -+ return client_fd; -+ -+ errno = EMSGSIZE; -+ ret = lxc_send_nointr(client_fd, (void *)cmd->req.data, -+ cmd->req.datalen, MSG_NOSIGNAL); -+ if (ret < 0 || ret != (ssize_t)cmd->req.datalen) -+ goto on_error; -+ -+ return client_fd; -+ -+on_error: -+ saved_errno = errno; -+ close(client_fd); -+ errno = saved_errno; -+ -+ return -1; -+} -+ -+static int lxc_exec_cmd(const char *name, struct lxc_exec_cmd_rr *cmd, const char *lxcpath, const char *hashed_sock_name, const char *suffix) -+{ -+ int client_fd = -1; -+ int saved_errno; -+ int ret = -1; -+ -+ client_fd = lxc_exec_cmd_send(name, cmd, lxcpath, hashed_sock_name, suffix); -+ if (client_fd < 0) { -+ SYSTRACE("Command \"%s\" failed to connect command socket", -+ lxc_exec_cmd_str(cmd->req.cmd)); -+ return -1; -+ } -+ -+ ret = lxc_exec_cmd_rsp_recv(client_fd, cmd); -+ -+ saved_errno = errno; -+ close(client_fd); -+ errno = saved_errno; -+ return ret; -+} -+ -+int lxc_exec_cmd_set_terminal_winch(const char *name, const char *lxcpath, const char *suffix, unsigned int height, unsigned int width) -+{ -+ int ret = 0; -+ struct lxc_exec_cmd_set_terminal_winch_request data = { 0 }; -+ -+ data.height = height; -+ data.width = width; -+ -+ struct lxc_exec_cmd_rr cmd = { -+ .req = { -+ .cmd = LXC_EXEC_CMD_SET_TERMINAL_WINCH, -+ .datalen = sizeof(struct lxc_exec_cmd_set_terminal_winch_request), -+ .data = &data, -+ }, -+ }; -+ -+ ret = lxc_exec_cmd(name, &cmd, lxcpath, NULL, suffix); -+ if (ret < 0) { -+ ERROR("Failed to send command to container"); -+ return -1; -+ } -+ -+ if (cmd.rsp.ret != 0) { -+ ERROR("Command response error:%d", cmd.rsp.ret); -+ return -1; -+ } -+ return 0; -+} -+ -+static int lxc_exec_cmd_set_terminal_winch_callback(int fd, struct lxc_exec_cmd_req *req, -+ struct lxc_exec_command_handler *handler) -+{ -+ struct lxc_exec_cmd_rsp rsp; -+ struct lxc_exec_cmd_set_terminal_winch_request *data = (struct lxc_exec_cmd_set_terminal_winch_request *)(req->data); -+ memset(&rsp, 0, sizeof(rsp)); -+ -+ rsp.ret = lxc_set_terminal_winsz(handler->terminal, data->height, data->width);; -+ -+ return lxc_exec_cmd_rsp_send(fd, &rsp); -+ -+} -+ -+static int lxc_exec_cmd_process(int fd, struct lxc_exec_cmd_req *req, -+ struct lxc_exec_command_handler *handler) -+{ -+ typedef int (*callback)(int, struct lxc_exec_cmd_req *, struct lxc_exec_command_handler *); -+ -+ callback cb[LXC_EXEC_CMD_MAX] = { -+ [LXC_EXEC_CMD_SET_TERMINAL_WINCH] = lxc_exec_cmd_set_terminal_winch_callback, -+ }; -+ -+ if (req->cmd >= LXC_EXEC_CMD_MAX) { -+ ERROR("Undefined command id %d", req->cmd); -+ return -1; -+ } -+ return cb[req->cmd](fd, req, handler); -+} -+ -+static void lxc_exec_cmd_fd_cleanup(int fd, struct lxc_epoll_descr *descr) -+{ -+ lxc_mainloop_del_handler(descr, fd); -+ close(fd); -+ return; -+} -+ -+static int lxc_exec_cmd_handler(int fd, uint32_t events, void *data, -+ struct lxc_epoll_descr *descr) -+{ -+ int ret; -+ struct lxc_exec_cmd_req req; -+ void *reqdata = NULL; -+ struct lxc_exec_command_handler *handler = data; -+ -+ ret = lxc_abstract_unix_rcv_credential(fd, &req, sizeof(req)); -+ if (ret < 0) { -+ SYSERROR("Failed to receive data on command socket for command " -+ "\"%s\"", lxc_exec_cmd_str(req.cmd)); -+ -+ if (errno == EACCES) { -+ /* We don't care for the peer, just send and close. */ -+ struct lxc_exec_cmd_rsp rsp = {.ret = ret}; -+ -+ lxc_exec_cmd_rsp_send(fd, &rsp); -+ } -+ -+ goto out_close; -+ } -+ -+ if (ret == 0) -+ goto out_close; -+ -+ if (ret != sizeof(req)) { -+ WARN("Failed to receive full command request. Ignoring request " -+ "for \"%s\"", lxc_exec_cmd_str(req.cmd)); -+ ret = -1; -+ goto out_close; -+ } -+ -+ if (req.datalen > LXC_CMD_DATA_MAX) { -+ ERROR("Received command data length %d is too large for " -+ "command \"%s\"", req.datalen, lxc_exec_cmd_str(req.cmd)); -+ errno = EFBIG; -+ ret = -EFBIG; -+ goto out_close; -+ } -+ -+ if (req.datalen > 0) { -+ reqdata = alloca(req.datalen); -+ if (!reqdata) { -+ ERROR("Failed to allocate memory for \"%s\" command", -+ lxc_exec_cmd_str(req.cmd)); -+ errno = ENOMEM; -+ ret = -ENOMEM; -+ goto out_close; -+ } -+ -+ ret = lxc_recv_nointr(fd, reqdata, req.datalen, 0); -+ if (ret != req.datalen) { -+ WARN("Failed to receive full command request. Ignoring " -+ "request for \"%s\"", lxc_exec_cmd_str(req.cmd)); -+ ret = LXC_MAINLOOP_ERROR; -+ goto out_close; -+ } -+ -+ req.data = reqdata; -+ } -+ -+ ret = lxc_exec_cmd_process(fd, &req, handler); -+ if (ret) { -+ /* This is not an error, but only a request to close fd. */ -+ ret = LXC_MAINLOOP_CONTINUE; -+ goto out_close; -+ } -+ -+out: -+ return ret; -+ -+out_close: -+ lxc_exec_cmd_fd_cleanup(fd, descr); -+ goto out; -+} -+ -+static int lxc_exec_cmd_accept(int fd, uint32_t events, void *data, -+ struct lxc_epoll_descr *descr) -+{ -+ int connection = -1; -+ int opt = 1, ret = -1; -+ -+ connection = accept(fd, NULL, 0); -+ if (connection < 0) { -+ SYSERROR("Failed to accept connection to run command"); -+ return LXC_MAINLOOP_ERROR; -+ } -+ -+ ret = fcntl(connection, F_SETFD, FD_CLOEXEC); -+ if (ret < 0) { -+ SYSERROR("Failed to set close-on-exec on incoming command connection"); -+ goto out_close; -+ } -+ -+ ret = setsockopt(connection, SOL_SOCKET, SO_PASSCRED, &opt, sizeof(opt)); -+ if (ret < 0) { -+ SYSERROR("Failed to enable necessary credentials on command socket"); -+ goto out_close; -+ } -+ -+ ret = lxc_mainloop_add_handler(descr, connection, lxc_exec_cmd_handler, data); -+ if (ret) { -+ ERROR("Failed to add command handler"); -+ goto out_close; -+ } -+ -+out: -+ return ret; -+ -+out_close: -+ close(connection); -+ goto out; -+} -+ -+int lxc_exec_cmd_init(const char *name, const char *lxcpath, const char *suffix) -+{ -+ int fd, ret; -+ char path[LXC_AUDS_ADDR_LEN] = {0}; -+ -+ ret = lxc_make_abstract_socket_name(path, sizeof(path), name, lxcpath, NULL, suffix); -+ if (ret < 0) -+ return -1; -+ TRACE("Creating abstract unix socket \"%s\"", &path[1]); -+ -+ fd = lxc_abstract_unix_open(path, SOCK_STREAM, 0); -+ if (fd < 0) { -+ SYSERROR("Failed to create command socket %s", &path[1]); -+ if (errno == EADDRINUSE) -+ ERROR("Container \"%s\" appears to be already running", name); -+ -+ return -1; -+ } -+ -+ ret = fcntl(fd, F_SETFD, FD_CLOEXEC); -+ if (ret < 0) { -+ SYSERROR("Failed to set FD_CLOEXEC on command socket file descriptor"); -+ close(fd); -+ return -1; -+ } -+ -+ return fd; -+} -+ -+int lxc_exec_cmd_mainloop_add(struct lxc_epoll_descr *descr, struct lxc_exec_command_handler *handler) -+{ -+ int ret; -+ int fd = handler->maincmd_fd; -+ -+ ret = lxc_mainloop_add_handler(descr, fd, lxc_exec_cmd_accept, handler); -+ if (ret < 0) { -+ ERROR("Failed to add handler for command socket"); -+ close(fd); -+ } -+ -+ return ret; -+} -diff --git a/src/lxc/exec_commands.h b/src/lxc/exec_commands.h -new file mode 100644 -index 000000000..2581ee903 ---- /dev/null -+++ b/src/lxc/exec_commands.h -@@ -0,0 +1,73 @@ -+/****************************************************************************** -+ * Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. -+ * Author: lifeng -+ * Create: 2019-12-08 -+ * Description: provide container definition -+ * lxc: linux Container library -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ ******************************************************************************/ -+ -+#ifndef __LXC_EXEC_COMMANDS_H -+#define __LXC_EXEC_COMMANDS_H -+ -+#include -+#include -+#include -+ -+#include "lxccontainer.h" -+#include "macro.h" -+#include "state.h" -+#include "terminal.h" -+ -+struct lxc_exec_command_handler { -+ int maincmd_fd; -+ struct lxc_terminal *terminal; -+}; -+ -+typedef enum { -+ LXC_EXEC_CMD_SET_TERMINAL_WINCH, -+ LXC_EXEC_CMD_MAX, -+} lxc_exec_cmd_t; -+ -+struct lxc_exec_cmd_req { -+ lxc_exec_cmd_t cmd; -+ int datalen; -+ const void *data; -+}; -+ -+struct lxc_exec_cmd_rsp { -+ int ret; /* 0 on success, -errno on failure */ -+ int datalen; -+ void *data; -+}; -+ -+struct lxc_exec_cmd_rr { -+ struct lxc_exec_cmd_req req; -+ struct lxc_exec_cmd_rsp rsp; -+}; -+ -+struct lxc_exec_cmd_set_terminal_winch_request { -+ unsigned int height; -+ unsigned int width; -+}; -+ -+struct lxc_epoll_descr; -+struct lxc_handler; -+ -+extern int lxc_exec_cmd_init(const char *name, const char *lxcpath, const char *suffix); -+extern int lxc_exec_cmd_mainloop_add(struct lxc_epoll_descr *descr, struct lxc_exec_command_handler *handler); -+extern int lxc_exec_cmd_set_terminal_winch(const char *name, const char *lxcpath, const char *suffix, unsigned int height, unsigned int width); -+ -+#endif /* __exec_commands_h */ -diff --git a/src/lxc/execute.c b/src/lxc/execute.c -index 7175ef2cf..16c0fed05 100644 ---- a/src/lxc/execute.c -+++ b/src/lxc/execute.c -@@ -14,12 +14,16 @@ - #include "config.h" - #include "log.h" - #include "start.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "utils.h" - - lxc_log_define(execute, start); - -+#ifdef HAVE_ISULAD -+static int execute_start(struct lxc_handler *handler, void* data, int fd) -+#else - static int execute_start(struct lxc_handler *handler, void* data) -+#endif - { - int argc_add, j; - char **argv; -@@ -66,11 +70,14 @@ static int execute_start(struct lxc_handler *handler, void* data) - NOTICE("Exec'ing \"%s\"", my_args->argv[0]); - - if (my_args->init_fd >= 0) -- execveat(my_args->init_fd, "", argv, environ, AT_EMPTY_PATH); -+ lxc_raw_execveat(my_args->init_fd, "", argv, environ, AT_EMPTY_PATH); - else - execvp(argv[0], argv); - SYSERROR("Failed to exec %s", argv[0]); - -+#ifdef HAVE_ISULAD -+ lxc_write_error_message(fd, "Failed to exec: \"%s\": %s.", argv[0], strerror(errno)); -+#endif - free(argv); - out1: - return 1; -@@ -88,14 +95,26 @@ static struct lxc_operations execute_start_ops = { - .post_start = execute_post_start - }; - -+#ifdef HAVE_ISULAD -+int lxc_execute(const char *name, char *const argv[], int quiet, -+ struct lxc_handler *handler, const char *lxcpath, -+ bool daemonize, int *error_num, unsigned int start_timeout) -+#else - int lxc_execute(const char *name, char *const argv[], int quiet, - struct lxc_handler *handler, const char *lxcpath, - bool daemonize, int *error_num) -+#endif - { -+ - struct execute_args args = {.argv = argv, .quiet = quiet}; - - TRACE("Doing lxc_execute"); - handler->conf->is_execute = true; -+#ifdef HAVE_ISULAD -+ return __lxc_start(handler, &execute_start_ops, &args, lxcpath, -+ daemonize, error_num, start_timeout); -+#else - return __lxc_start(handler, &execute_start_ops, &args, lxcpath, - daemonize, error_num); -+#endif - } -diff --git a/src/lxc/file_utils.h b/src/lxc/file_utils.h -index f9c8abe03..6d5dbf68d 100644 ---- a/src/lxc/file_utils.h -+++ b/src/lxc/file_utils.h -@@ -12,52 +12,27 @@ - #include - #include - --#include "compiler.h" -- - /* read and write whole files */ - extern int lxc_write_to_file(const char *filename, const void *buf, -- size_t count, bool add_newline, mode_t mode) --__access_r(2, 3); -- --extern int lxc_readat(int dirfd, const char *filename, void *buf, size_t count) --__access_w(3, 4); -- -+ size_t count, bool add_newline, mode_t mode); -+extern int lxc_readat(int dirfd, const char *filename, void *buf, size_t count); - extern int lxc_writeat(int dirfd, const char *filename, const void *buf, -- size_t count) --__access_r(3, 4); -- -+ size_t count); - extern int lxc_write_openat(const char *dir, const char *filename, -- const void *buf, size_t count) --__access_r(3, 4); -- --extern int lxc_read_from_file(const char *filename, void *buf, size_t count) --__access_w(2, 3); -+ const void *buf, size_t count); -+extern int lxc_read_from_file(const char *filename, void *buf, size_t count); - - /* send and receive buffers completely */ --extern ssize_t lxc_write_nointr(int fd, const void *buf, size_t count) --__access_r(2, 3); -- -+extern ssize_t lxc_write_nointr(int fd, const void *buf, size_t count); - extern ssize_t lxc_pwrite_nointr(int fd, const void *buf, size_t count, -- off_t offset) --__access_r(2, 3); -- --extern ssize_t lxc_send_nointr(int sockfd, void *buf, size_t len, int flags) --__access_r(2, 3); -- --extern ssize_t lxc_read_nointr(int fd, void *buf, size_t count) --__access_w(2, 3); -- -+ off_t offset); -+extern ssize_t lxc_send_nointr(int sockfd, void *buf, size_t len, int flags); -+extern ssize_t lxc_read_nointr(int fd, void *buf, size_t count); - extern ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count, -- const void *expected_buf) --__access_w(2, 3); -- -+ const void *expected_buf); - extern ssize_t lxc_read_file_expect(const char *path, void *buf, size_t count, -- const void *expected_buf) --__access_w(2, 3); -- --extern ssize_t lxc_recv_nointr(int sockfd, void *buf, size_t len, int flags) --__access_w(2, 3); -- -+ const void *expected_buf); -+extern ssize_t lxc_recv_nointr(int sockfd, void *buf, size_t len, int flags); - ssize_t lxc_recvmsg_nointr_iov(int sockfd, struct iovec *iov, size_t iovlen, - int flags); - -diff --git a/src/lxc/initutils.c b/src/lxc/initutils.c -index 5549c2e8f..76f00488a 100644 ---- a/src/lxc/initutils.c -+++ b/src/lxc/initutils.c -@@ -54,11 +54,15 @@ const char *lxc_global_config_value(const char *option_name) - { NULL, NULL }, - }; - -+#ifdef HAVE_ISULAD -+ static const char *values[sizeof(options) / sizeof(options[0])] = {0}; -+#else - /* placed in the thread local storage pool for non-bionic targets */ - #ifdef HAVE_TLS - static thread_local const char *values[sizeof(options) / sizeof(options[0])] = {0}; - #else - static const char *values[sizeof(options) / sizeof(options[0])] = {0}; -+#endif - #endif - - /* user_config_path is freed as soon as it is used */ -diff --git a/src/lxc/isulad_utils.c b/src/lxc/isulad_utils.c -new file mode 100644 -index 000000000..b2824045c ---- /dev/null -+++ b/src/lxc/isulad_utils.c -@@ -0,0 +1,99 @@ -+/* SPDX-License-Identifier: LGPL-2.1+ */ -+/****************************************************************************** -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. Allrights reserved -+ * Description: isulad utils -+ * Author: lifeng -+ * Create: 2020-04-11 -+******************************************************************************/ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "isulad_utils.h" -+#include "log.h" -+#include "path.h" -+#include "file_utils.h" -+ -+lxc_log_define(isulad_utils, lxc); -+ -+void *lxc_common_calloc_s(size_t size) -+{ -+ if (size == 0 || size > SIZE_MAX) { -+ return NULL; -+ } -+ -+ return calloc((size_t)1, size); -+} -+ -+int lxc_mem_realloc(void **newptr, size_t newsize, void *oldptr, size_t oldsize) -+{ -+ void *tmp = NULL; -+ -+ if (newsize == 0) { -+ goto err_out; -+ } -+ -+ tmp = lxc_common_calloc_s(newsize); -+ if (tmp == NULL) { -+ ERROR("Failed to malloc memory"); -+ goto err_out; -+ } -+ -+ if (oldptr != NULL) { -+ memcpy(tmp, oldptr, (newsize < oldsize) ? newsize : oldsize); -+ -+ memset(oldptr, 0, oldsize); -+ -+ free(oldptr); -+ } -+ -+ *newptr = tmp; -+ return 0; -+ -+err_out: -+ return -1; -+} -+ -+char *safe_strdup(const char *src) -+{ -+ char *dst = NULL; -+ -+ if (src == NULL) { -+ return NULL; -+ } -+ -+ dst = strdup(src); -+ if (dst == NULL) { -+ abort(); -+ } -+ -+ return dst; -+} -+ -+int lxc_open(const char *filename, int flags, mode_t mode) -+{ -+ char rpath[PATH_MAX] = {0x00}; -+ -+ if (cleanpath(filename, rpath, sizeof(rpath)) == NULL) { -+ return -1; -+ } -+ if (mode) { -+ return open(rpath, (int)((unsigned int)flags | O_CLOEXEC), mode); -+ } else { -+ return open(rpath, (int)((unsigned int)flags | O_CLOEXEC)); -+ } -+} -+ -+FILE *lxc_fopen(const char *filename, const char *mode) -+{ -+ char rpath[PATH_MAX] = {0x00}; -+ -+ if (cleanpath(filename, rpath, sizeof(rpath)) == NULL) { -+ return NULL; -+ } -+ -+ return fopen_cloexec(rpath, mode); -+} -diff --git a/src/lxc/isulad_utils.h b/src/lxc/isulad_utils.h -new file mode 100644 -index 000000000..7a6ab00e2 ---- /dev/null -+++ b/src/lxc/isulad_utils.h -@@ -0,0 +1,20 @@ -+/* SPDX-License-Identifier: LGPL-2.1+ */ -+/****************************************************************************** -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. Allrights reserved -+ * Description: isulad utils -+ * Author: lifeng -+ * Create: 2020-04-11 -+******************************************************************************/ -+#ifndef __iSULAD_UTILS_H -+#define __iSULAD_UTILS_H -+ -+#include -+ -+extern int lxc_mem_realloc(void **newptr, size_t newsize, void *oldptr, size_t oldsize); -+extern void *lxc_common_calloc_s(size_t size); -+extern char *safe_strdup(const char *src); -+ -+extern int lxc_open(const char *filename, int flags, mode_t mode); -+extern FILE *lxc_fopen(const char *filename, const char *mode); -+ -+#endif -diff --git a/src/lxc/json/defs.c b/src/lxc/json/defs.c -new file mode 100644 -index 000000000..4bf569a4e ---- /dev/null -+++ b/src/lxc/json/defs.c -@@ -0,0 +1,205 @@ -+// Generated from defs.json. Do not edit! -+#ifndef _GNU_SOURCE -+#define _GNU_SOURCE -+#endif -+#include -+#include -+#include "defs.h" -+ -+defs_hook *make_defs_hook(yajl_val tree, struct parser_context *ctx, parser_error *err) { -+ defs_hook *ret = NULL; -+ *err = 0; -+ if (tree == NULL) -+ return ret; -+ ret = safe_malloc(sizeof(*ret)); -+ { -+ yajl_val val = get_val(tree, "path", yajl_t_string); -+ if (val != NULL) { -+ char *str = YAJL_GET_STRING(val); -+ ret->path = safe_strdup(str ? str : ""); -+ } -+ } -+ { -+ yajl_val tmp = get_val(tree, "args", yajl_t_array); -+ if (tmp != NULL && YAJL_GET_ARRAY(tmp) != NULL && YAJL_GET_ARRAY(tmp)->len > 0) { -+ size_t i; -+ ret->args_len = YAJL_GET_ARRAY(tmp)->len; -+ if (YAJL_GET_ARRAY(tmp)->len > SIZE_MAX / sizeof(*ret->args) - 1) { -+ free_defs_hook(ret); -+ return NULL; -+ } -+ ret->args = safe_malloc((YAJL_GET_ARRAY(tmp)->len + 1) * sizeof(*ret->args)); -+ for (i = 0; i < YAJL_GET_ARRAY(tmp)->len; i++) { -+ yajl_val val = YAJL_GET_ARRAY(tmp)->values[i]; -+ if (val != NULL) { -+ char *str = YAJL_GET_STRING(val); -+ ret->args[i] = safe_strdup(str ? str : ""); -+ } -+ } -+ } -+ } -+ { -+ yajl_val tmp = get_val(tree, "env", yajl_t_array); -+ if (tmp != NULL && YAJL_GET_ARRAY(tmp) != NULL && YAJL_GET_ARRAY(tmp)->len > 0) { -+ size_t i; -+ ret->env_len = YAJL_GET_ARRAY(tmp)->len; -+ if (YAJL_GET_ARRAY(tmp)->len > SIZE_MAX / sizeof(*ret->env) - 1) { -+ free_defs_hook(ret); -+ return NULL; -+ } -+ ret->env = safe_malloc((YAJL_GET_ARRAY(tmp)->len + 1) * sizeof(*ret->env)); -+ for (i = 0; i < YAJL_GET_ARRAY(tmp)->len; i++) { -+ yajl_val val = YAJL_GET_ARRAY(tmp)->values[i]; -+ if (val != NULL) { -+ char *str = YAJL_GET_STRING(val); -+ ret->env[i] = safe_strdup(str ? str : ""); -+ } -+ } -+ } -+ } -+ { -+ yajl_val val = get_val(tree, "timeout", yajl_t_number); -+ if (val != NULL) { -+ int invalid = common_safe_int(YAJL_GET_NUMBER(val), (int *)&ret->timeout); -+ if (invalid) { -+ if (asprintf(err, "Invalid value '%s' with type 'integer' for key 'timeout': %s", YAJL_GET_NUMBER(val), strerror(-invalid)) < 0) -+ *err = safe_strdup("error allocating memory"); -+ free_defs_hook(ret); -+ return NULL; -+ } -+ } -+ } -+ if (ret->path == NULL) { -+ if (asprintf(err, "Required field '%s' not present", "path") < 0) -+ *err = safe_strdup("error allocating memory"); -+ free_defs_hook(ret); -+ return NULL; -+ } -+ -+ if (tree->type == yajl_t_object && (ctx->options & PARSE_OPTIONS_STRICT)) { -+ int i; -+ for (i = 0; i < tree->u.object.len; i++) -+ if (strcmp(tree->u.object.keys[i], "path") && -+ strcmp(tree->u.object.keys[i], "args") && -+ strcmp(tree->u.object.keys[i], "env") && -+ strcmp(tree->u.object.keys[i], "timeout")) { -+ if (ctx->stderr > 0) -+ fprintf(ctx->stderr, "WARNING: unknown key found: %s\n", tree->u.object.keys[i]); -+ } -+ } -+ return ret; -+} -+ -+void free_defs_hook(defs_hook *ptr) { -+ if (ptr == NULL) -+ return; -+ free(ptr->path); -+ ptr->path = NULL; -+ if (ptr->args != NULL) { -+ size_t i; -+ for (i = 0; i < ptr->args_len; i++) { -+ if (ptr->args[i] != NULL) { -+ free(ptr->args[i]); -+ ptr->args[i] = NULL; -+ } -+ } -+ free(ptr->args); -+ ptr->args = NULL; -+ } -+ if (ptr->env != NULL) { -+ size_t i; -+ for (i = 0; i < ptr->env_len; i++) { -+ if (ptr->env[i] != NULL) { -+ free(ptr->env[i]); -+ ptr->env[i] = NULL; -+ } -+ } -+ free(ptr->env); -+ ptr->env = NULL; -+ } -+ free(ptr); -+} -+ -+yajl_gen_status gen_defs_hook(yajl_gen g, defs_hook *ptr, struct parser_context *ctx, parser_error *err) { -+ yajl_gen_status stat = yajl_gen_status_ok; -+ *err = 0; -+ stat = reformat_start_map(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->path != NULL)) { -+ char *str = ""; -+ stat = reformat_map_key(g, "path", strlen("path")); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (ptr != NULL && ptr->path != NULL) { -+ str = ptr->path; -+ } -+ stat = reformat_string(g, str, strlen(str)); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) || (ptr != NULL && ptr->args != NULL)) { -+ size_t len = 0, i; -+ stat = reformat_map_key(g, "args", strlen("args")); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (ptr != NULL && ptr->args != NULL) { -+ len = ptr->args_len; -+ } -+ if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) -+ yajl_gen_config(g, yajl_gen_beautify, 0); -+ stat = reformat_start_array(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ for (i = 0; i < len; i++) { -+ stat = reformat_string(g, ptr->args[i], strlen(ptr->args[i])); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_end_array(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) -+ yajl_gen_config(g, yajl_gen_beautify, 1); -+ } -+ if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) || (ptr != NULL && ptr->env != NULL)) { -+ size_t len = 0, i; -+ stat = reformat_map_key(g, "env", strlen("env")); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (ptr != NULL && ptr->env != NULL) { -+ len = ptr->env_len; -+ } -+ if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) -+ yajl_gen_config(g, yajl_gen_beautify, 0); -+ stat = reformat_start_array(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ for (i = 0; i < len; i++) { -+ stat = reformat_string(g, ptr->env[i], strlen(ptr->env[i])); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_end_array(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) -+ yajl_gen_config(g, yajl_gen_beautify, 1); -+ } -+ if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->timeout)) { -+ long long int num = 0; -+ stat = reformat_map_key(g, "timeout", strlen("timeout")); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (ptr != NULL && ptr->timeout) { -+ num = (long long int)ptr->timeout; -+ } -+ stat = reformat_int(g, num); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_end_map(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ return yajl_gen_status_ok; -+} -diff --git a/src/lxc/json/defs.h b/src/lxc/json/defs.h -new file mode 100644 -index 000000000..0bbd8ac89 ---- /dev/null -+++ b/src/lxc/json/defs.h -@@ -0,0 +1,37 @@ -+// Generated from defs.json. Do not edit! -+#ifndef DEFS_SCHEMA_H -+#define DEFS_SCHEMA_H -+ -+#include -+#include -+#include "json_common.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+typedef struct { -+ char *path; -+ -+ char **args; -+ size_t args_len; -+ -+ char **env; -+ size_t env_len; -+ -+ int timeout; -+ -+} -+defs_hook; -+ -+void free_defs_hook(defs_hook *ptr); -+ -+defs_hook *make_defs_hook(yajl_val tree, struct parser_context *ctx, parser_error *err); -+ -+yajl_gen_status gen_defs_hook(yajl_gen g, defs_hook *ptr, struct parser_context *ctx, parser_error *err); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif -diff --git a/src/lxc/json/json_common.c b/src/lxc/json/json_common.c -new file mode 100755 -index 000000000..ec20c5982 ---- /dev/null -+++ b/src/lxc/json/json_common.c -@@ -0,0 +1,1153 @@ -+// Auto generated file. Do not edit! -+#define _GNU_SOURCE -+#include -+#include -+#include -+#include "json_common.h" -+ -+#define MAX_NUM_STR_LEN 21 -+ -+yajl_gen_status reformat_number(void *ctx, const char *str, size_t len) { -+ yajl_gen g = (yajl_gen) ctx; -+ return yajl_gen_number(g, str, len); -+} -+ -+yajl_gen_status reformat_uint(void *ctx, long long unsigned int num) { -+ char numstr[MAX_NUM_STR_LEN]; -+ int ret; -+ -+ ret = snprintf(numstr, MAX_NUM_STR_LEN, "%llu", num); -+ if (ret < 0 || ret >= MAX_NUM_STR_LEN) { -+ return yajl_gen_in_error_state; -+ } -+ return reformat_number(ctx, (const char *)numstr, strlen(numstr)); -+} -+ -+yajl_gen_status reformat_int(void *ctx, long long int num) { -+ char numstr[MAX_NUM_STR_LEN]; -+ int ret; -+ -+ ret = snprintf(numstr, MAX_NUM_STR_LEN, "%lld", num); -+ if (ret < 0 || ret >= MAX_NUM_STR_LEN) { -+ return yajl_gen_in_error_state; -+ } -+ return reformat_number(ctx, (const char *)numstr, strlen(numstr)); -+} -+ -+yajl_gen_status reformat_double(void *ctx, double num) { -+ yajl_gen g = (yajl_gen) ctx; -+ return yajl_gen_double(g, num); -+} -+ -+yajl_gen_status reformat_string(void *ctx, const char *str, size_t len) { -+ yajl_gen g = (yajl_gen) ctx; -+ return yajl_gen_string(g, (const unsigned char *)str, len); -+} -+ -+yajl_gen_status reformat_null(void *ctx) { -+ yajl_gen g = (yajl_gen) ctx; -+ return yajl_gen_null(g); -+} -+ -+yajl_gen_status reformat_bool(void *ctx, int boolean) { -+ yajl_gen g = (yajl_gen) ctx; -+ return yajl_gen_bool(g, boolean); -+} -+ -+yajl_gen_status reformat_map_key(void *ctx, const char *str, size_t len) { -+ yajl_gen g = (yajl_gen) ctx; -+ return yajl_gen_string(g, (const unsigned char *)str, len); -+} -+ -+yajl_gen_status reformat_start_map(void *ctx) { -+ yajl_gen g = (yajl_gen) ctx; -+ return yajl_gen_map_open(g); -+} -+ -+yajl_gen_status reformat_end_map(void *ctx) { -+ yajl_gen g = (yajl_gen) ctx; -+ return yajl_gen_map_close(g); -+} -+ -+yajl_gen_status reformat_start_array(void *ctx) { -+ yajl_gen g = (yajl_gen) ctx; -+ return yajl_gen_array_open(g); -+} -+ -+yajl_gen_status reformat_end_array(void *ctx) { -+ yajl_gen g = (yajl_gen) ctx; -+ return yajl_gen_array_close(g); -+} -+ -+bool json_gen_init(yajl_gen *g, struct parser_context *ctx) { -+ *g = yajl_gen_alloc(NULL); -+ if (NULL == *g) { -+ return false; -+ -+ } -+ yajl_gen_config(*g, yajl_gen_beautify, !(ctx->options & GEN_OPTIONS_SIMPLIFY)); -+ yajl_gen_config(*g, yajl_gen_validate_utf8, !(ctx->options & GEN_OPTIONS_NOT_VALIDATE_UTF8)); -+ return true; -+} -+ -+yajl_val get_val(yajl_val tree, const char *name, yajl_type type) { -+ const char *path[] = { name, NULL }; -+ return yajl_tree_get(tree, path, type); -+} -+ -+void *safe_malloc(size_t size) { -+ void *ret = NULL; -+ if (size == 0) { -+ abort(); -+ } -+ ret = calloc(1, size); -+ if (ret == NULL) { -+ abort(); -+ } -+ return ret; -+} -+ -+int common_safe_double(const char *numstr, double *converted) { -+ char *err_str = NULL; -+ double d; -+ -+ if (numstr == NULL) { -+ return -EINVAL; -+ } -+ -+ errno = 0; -+ d = strtod(numstr, &err_str); -+ if (errno > 0) { -+ return -errno; -+ } -+ -+ if (err_str == NULL || err_str == numstr || *err_str != '\0') { -+ return -EINVAL; -+ } -+ -+ *converted = d; -+ return 0; -+} -+ -+int common_safe_uint8(const char *numstr, uint8_t *converted) { -+ char *err = NULL; -+ unsigned long int uli; -+ -+ if (numstr == NULL) { -+ return -EINVAL; -+ } -+ -+ errno = 0; -+ uli = strtoul(numstr, &err, 0); -+ if (errno > 0) { -+ return -errno; -+ } -+ -+ if (err == NULL || err == numstr || *err != '\0') { -+ return -EINVAL; -+ } -+ -+ if (uli > UINT8_MAX) { -+ return -ERANGE; -+ } -+ -+ *converted = (uint8_t)uli; -+ return 0; -+} -+ -+int common_safe_uint16(const char *numstr, uint16_t *converted) { -+ char *err = NULL; -+ unsigned long int uli; -+ -+ if (numstr == NULL) { -+ return -EINVAL; -+ } -+ -+ errno = 0; -+ uli = strtoul(numstr, &err, 0); -+ if (errno > 0) { -+ return -errno; -+ } -+ -+ if (err == NULL || err == numstr || *err != '\0') { -+ return -EINVAL; -+ } -+ -+ if (uli > UINT16_MAX) { -+ return -ERANGE; -+ } -+ -+ *converted = (uint16_t)uli; -+ return 0; -+} -+ -+int common_safe_uint32(const char *numstr, uint32_t *converted) { -+ char *err = NULL; -+ unsigned long long int ull; -+ -+ if (numstr == NULL) { -+ return -EINVAL; -+ } -+ -+ errno = 0; -+ ull = strtoull(numstr, &err, 0); -+ if (errno > 0) { -+ return -errno; -+ } -+ -+ if (err == NULL || err == numstr || *err != '\0') { -+ return -EINVAL; -+ } -+ -+ if (ull > UINT32_MAX) { -+ return -ERANGE; -+ } -+ -+ *converted = (uint32_t)ull; -+ return 0; -+} -+ -+int common_safe_uint64(const char *numstr, uint64_t *converted) { -+ char *err = NULL; -+ unsigned long long int ull; -+ -+ if (numstr == NULL) { -+ return -EINVAL; -+ } -+ -+ errno = 0; -+ ull = strtoull(numstr, &err, 0); -+ if (errno > 0) { -+ return -errno; -+ } -+ -+ if (err == NULL || err == numstr || *err != '\0') { -+ return -EINVAL; -+ } -+ -+ *converted = (uint64_t)ull; -+ return 0; -+} -+ -+int common_safe_uint(const char *numstr, unsigned int *converted) { -+ char *err = NULL; -+ unsigned long long int ull; -+ -+ if (numstr == NULL) { -+ return -EINVAL; -+ } -+ -+ errno = 0; -+ ull = strtoull(numstr, &err, 0); -+ if (errno > 0) { -+ return -errno; -+ } -+ -+ if (err == NULL || err == numstr || *err != '\0') { -+ return -EINVAL; -+ } -+ -+ if (ull > UINT_MAX) { -+ return -ERANGE; -+ } -+ -+ *converted = (unsigned int)ull; -+ return 0; -+} -+ -+int common_safe_int8(const char *numstr, int8_t *converted) { -+ char *err = NULL; -+ long int li; -+ -+ if (numstr == NULL) { -+ return -EINVAL; -+ } -+ -+ errno = 0; -+ li = strtol(numstr, &err, 0); -+ if (errno > 0) { -+ return -errno; -+ } -+ -+ if (err == NULL || err == numstr || *err != '\0') { -+ return -EINVAL; -+ } -+ -+ if (li > INT8_MAX || li < INT8_MIN) { -+ return -ERANGE; -+ } -+ -+ *converted = (int8_t)li; -+ return 0; -+} -+ -+int common_safe_int16(const char *numstr, int16_t *converted) { -+ char *err = NULL; -+ long int li; -+ -+ if (numstr == NULL) { -+ return -EINVAL; -+ } -+ -+ errno = 0; -+ li = strtol(numstr, &err, 0); -+ if (errno > 0) { -+ return -errno; -+ } -+ -+ if (err == NULL || err == numstr || *err != '\0') { -+ return -EINVAL; -+ } -+ -+ if (li > INT16_MAX || li < INT16_MIN) { -+ return -ERANGE; -+ } -+ -+ *converted = (int16_t)li; -+ return 0; -+} -+ -+int common_safe_int32(const char *numstr, int32_t *converted) { -+ char *err = NULL; -+ long long int lli; -+ -+ if (numstr == NULL) { -+ return -EINVAL; -+ } -+ -+ errno = 0; -+ lli = strtol(numstr, &err, 0); -+ if (errno > 0) { -+ return -errno; -+ } -+ -+ if (err == NULL || err == numstr || *err != '\0') { -+ return -EINVAL; -+ } -+ -+ if (lli > INT32_MAX || lli < INT32_MIN) { -+ return -ERANGE; -+ } -+ -+ *converted = (int32_t)lli; -+ return 0; -+} -+ -+int common_safe_int64(const char *numstr, int64_t *converted) { -+ char *err = NULL; -+ long long int lli; -+ -+ if (numstr == NULL) { -+ return -EINVAL; -+ } -+ -+ errno = 0; -+ lli = strtoll(numstr, &err, 0); -+ if (errno > 0) { -+ return -errno; -+ } -+ -+ if (err == NULL || err == numstr || *err != '\0') { -+ return -EINVAL; -+ } -+ -+ *converted = (int64_t)lli; -+ return 0; -+} -+ -+int common_safe_int(const char *numstr, int *converted) { -+ char *err = NULL; -+ long long int lli; -+ -+ if (numstr == NULL) { -+ return -EINVAL; -+ } -+ -+ errno = 0; -+ lli = strtol(numstr, &err, 0); -+ if (errno > 0) { -+ return -errno; -+ } -+ -+ if (err == NULL || err == numstr || *err != '\0') { -+ return -EINVAL; -+ } -+ -+ if (lli > INT_MAX || lli < INT_MIN) { -+ return -ERANGE; -+ } -+ -+ *converted = (int)lli; -+ return 0; -+} -+ -+yajl_gen_status gen_json_map_int_int(void *ctx, json_map_int_int *map, struct parser_context *ptx, parser_error *err) { -+ yajl_gen_status stat = yajl_gen_status_ok; -+ yajl_gen g = (yajl_gen) ctx; -+ size_t len = 0, i = 0; -+ if (map != NULL) { -+ len = map->len; -+ } -+ if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { -+ yajl_gen_config(g, yajl_gen_beautify, 0); -+ } -+ stat = reformat_start_map(g); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ -+ } -+ for (i = 0; i < len; i++) { -+ char numstr[MAX_NUM_STR_LEN]; -+ int nret; -+ nret = snprintf(numstr, MAX_NUM_STR_LEN, "%lld", (long long int)map->keys[i]); -+ if (nret < 0 || nret >= MAX_NUM_STR_LEN) { -+ if (!*err && asprintf(err, "Error to print string") < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ return yajl_gen_in_error_state; -+ } -+ stat = reformat_string(g, numstr, strlen(numstr)); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_int(g, map->values[i]); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ } -+ -+ stat = reformat_end_map(g); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { -+ yajl_gen_config(g, yajl_gen_beautify, 1); -+ } -+ return yajl_gen_status_ok; -+} -+ -+void free_json_map_int_int(json_map_int_int *map) { -+ if (map != NULL) { -+ size_t i; -+ for (i = 0; i < map->len; i++) { -+ // No need to free key for type int -+ // No need to free value for type int -+ } -+ free(map->keys); -+ map->keys = NULL; -+ free(map->values); -+ map->values = NULL; -+ free(map); -+ } -+} -+json_map_int_int *make_json_map_int_int(yajl_val src, struct parser_context *ctx, parser_error *err) { -+ json_map_int_int *ret = NULL; -+ if (src != NULL && YAJL_GET_OBJECT(src) != NULL) { -+ size_t i; -+ size_t len = YAJL_GET_OBJECT(src)->len; -+ if (len > SIZE_MAX / sizeof(int) - 1) { -+ return NULL; -+ } -+ ret = safe_malloc(sizeof(*ret)); -+ ret->len = len; -+ ret->keys = safe_malloc((len + 1) * sizeof(int)); -+ ret->values = safe_malloc((len + 1) * sizeof(int)); -+ for (i = 0; i < len; i++) { -+ const char *srckey = YAJL_GET_OBJECT(src)->keys[i]; -+ yajl_val srcval = YAJL_GET_OBJECT(src)->values[i]; -+ -+ if (srckey != NULL) { -+ int invalid; -+ invalid = common_safe_int(srckey, &(ret->keys[i])); -+ if (invalid) { -+ if (*err == NULL && asprintf(err, "Invalid key '%s' with type 'int': %s", srckey, strerror(-invalid)) < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ free_json_map_int_int(ret); -+ return NULL; -+ } -+ } -+ -+ if (srcval != NULL) { -+ int invalid; -+ if (!YAJL_IS_NUMBER(srcval)) { -+ if (*err == NULL && asprintf(err, "Invalid value with type 'int' for key '%s'", srckey) < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ free_json_map_int_int(ret); -+ return NULL; -+ } -+ invalid = common_safe_int(YAJL_GET_NUMBER(srcval), &(ret->values[i])); -+ if (invalid) { -+ if (*err == NULL && asprintf(err, "Invalid value with type 'int' for key '%s': %s", srckey, strerror(-invalid)) < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ free_json_map_int_int(ret); -+ return NULL; -+ } -+ } -+ } -+ } -+ return ret; -+} -+int append_json_map_int_int(json_map_int_int *map, int key, int val) { -+ size_t len; -+ int *keys = NULL; -+ int *vals = NULL; -+ -+ if (map == NULL) { -+ return -1; -+ } -+ -+ if ((SIZE_MAX / sizeof(int) - 1) < map->len) { -+ return -1; -+ } -+ -+ len = map->len + 1; -+ keys = safe_malloc(len * sizeof(int)); -+ vals = safe_malloc(len * sizeof(int)); -+ -+ if (map->len) { -+ (void)memcpy(keys, map->keys, map->len * sizeof(int)); -+ (void)memcpy(vals, map->values, map->len * sizeof(int)); -+ } -+ free(map->keys); -+ map->keys = keys; -+ free(map->values); -+ map->values = vals; -+ map->keys[map->len] = key; -+ map->values[map->len] = val; -+ -+ map->len++; -+ return 0; -+} -+ -+yajl_gen_status gen_json_map_int_bool(void *ctx, json_map_int_bool *map, struct parser_context *ptx, parser_error *err) { -+ yajl_gen_status stat = yajl_gen_status_ok; -+ yajl_gen g = (yajl_gen) ctx; -+ size_t len = 0, i = 0; -+ if (map != NULL) { -+ len = map->len; -+ } -+ if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { -+ yajl_gen_config(g, yajl_gen_beautify, 0); -+ } -+ stat = reformat_start_map(g); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ -+ } -+ for (i = 0; i < len; i++) { -+ char numstr[MAX_NUM_STR_LEN]; -+ int nret; -+ nret = snprintf(numstr, MAX_NUM_STR_LEN, "%lld", (long long int)map->keys[i]); -+ if (nret < 0 || nret >= MAX_NUM_STR_LEN) { -+ if (!*err && asprintf(err, "Error to print string") < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ return yajl_gen_in_error_state; -+ } -+ stat = reformat_string(g, numstr, strlen(numstr)); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_bool(g, map->values[i]); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ } -+ -+ stat = reformat_end_map(g); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { -+ yajl_gen_config(g, yajl_gen_beautify, 1); -+ } -+ return yajl_gen_status_ok; -+} -+ -+void free_json_map_int_bool(json_map_int_bool *map) { -+ if (map != NULL) { -+ free(map->keys); -+ map->keys = NULL; -+ free(map->values); -+ map->values = NULL; -+ free(map); -+ } -+} -+json_map_int_bool *make_json_map_int_bool(yajl_val src, struct parser_context *ctx, parser_error *err) { -+ json_map_int_bool *ret = NULL; -+ if (src != NULL && YAJL_GET_OBJECT(src) != NULL) { -+ size_t i; -+ size_t len = YAJL_GET_OBJECT(src)->len; -+ if (len > SIZE_MAX / sizeof(int) - 1) { -+ return NULL; -+ } -+ ret = safe_malloc(sizeof(*ret)); -+ ret->len = len; -+ ret->keys = safe_malloc((len + 1) * sizeof(int)); -+ ret->values = safe_malloc((len + 1) * sizeof(bool)); -+ for (i = 0; i < len; i++) { -+ const char *srckey = YAJL_GET_OBJECT(src)->keys[i]; -+ yajl_val srcval = YAJL_GET_OBJECT(src)->values[i]; -+ -+ if (srckey != NULL) { -+ int invalid; -+ invalid = common_safe_int(srckey, &(ret->keys[i])); -+ if (invalid) { -+ if (*err == NULL && asprintf(err, "Invalid key '%s' with type 'int': %s", srckey, strerror(-invalid)) < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ free_json_map_int_bool(ret); -+ return NULL; -+ } -+ } -+ -+ if (srcval != NULL) { -+ if (YAJL_IS_TRUE(srcval)) { -+ ret->values[i] = true; -+ } else if (YAJL_IS_FALSE(srcval)) { -+ ret->values[i] = false; -+ } else { -+ if (*err == NULL && asprintf(err, "Invalid value with type 'bool' for key '%s'", srckey) < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ free_json_map_int_bool(ret); -+ return NULL; -+ } -+ } -+ } -+ } -+ return ret; -+} -+int append_json_map_int_bool(json_map_int_bool *map, int key, bool val) { -+ size_t len; -+ int *keys = NULL; -+ bool *vals = NULL; -+ -+ if (map == NULL) { -+ return -1; -+ } -+ -+ if ((SIZE_MAX / sizeof(int) - 1) < map->len || (SIZE_MAX / sizeof(bool) - 1) < map->len) { -+ return -1; -+ } -+ -+ len = map->len + 1; -+ keys = safe_malloc(len * sizeof(int)); -+ vals = safe_malloc(len * sizeof(bool)); -+ -+ if (map->len) { -+ (void)memcpy(keys, map->keys, map->len * sizeof(int)); -+ (void)memcpy(vals, map->values, map->len * sizeof(bool)); -+ } -+ free(map->keys); -+ map->keys = keys; -+ free(map->values); -+ map->values = vals; -+ map->keys[map->len] = key; -+ map->values[map->len] = val; -+ -+ map->len++; -+ return 0; -+} -+ -+yajl_gen_status gen_json_map_int_string(void *ctx, json_map_int_string *map, struct parser_context *ptx, parser_error *err) { -+ yajl_gen_status stat = yajl_gen_status_ok; -+ yajl_gen g = (yajl_gen) ctx; -+ size_t len = 0, i = 0; -+ if (map != NULL) { -+ len = map->len; -+ } -+ if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { -+ yajl_gen_config(g, yajl_gen_beautify, 0); -+ } -+ stat = reformat_start_map(g); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ -+ } -+ for (i = 0; i < len; i++) { -+ char numstr[MAX_NUM_STR_LEN]; -+ int nret; -+ nret = snprintf(numstr, MAX_NUM_STR_LEN, "%lld", (long long int)map->keys[i]); -+ if (nret < 0 || nret >= MAX_NUM_STR_LEN) { -+ if (!*err && asprintf(err, "Error to print string") < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ return yajl_gen_in_error_state; -+ } -+ stat = reformat_string(g, numstr, strlen(numstr)); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_string(g, map->values[i], strlen(map->values[i]));; -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ } -+ -+ stat = reformat_end_map(g); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { -+ yajl_gen_config(g, yajl_gen_beautify, 1); -+ } -+ return yajl_gen_status_ok; -+} -+ -+void free_json_map_int_string(json_map_int_string *map) { -+ if (map != NULL) { -+ size_t i; -+ for (i = 0; i < map->len; i++) { -+ // No need to free key for type int -+ free(map->values[i]); -+ map->values[i] = NULL; -+ } -+ free(map->keys); -+ map->keys = NULL; -+ free(map->values); -+ map->values = NULL; -+ free(map); -+ } -+} -+json_map_int_string *make_json_map_int_string(yajl_val src, struct parser_context *ctx, parser_error *err) { -+ json_map_int_string *ret = NULL; -+ if (src != NULL && YAJL_GET_OBJECT(src) != NULL) { -+ size_t i; -+ size_t len = YAJL_GET_OBJECT(src)->len; -+ if (len > SIZE_MAX / sizeof(char *) - 1) { -+ return NULL; -+ } -+ ret = safe_malloc(sizeof(*ret)); -+ ret->len = len; -+ ret->keys = safe_malloc((len + 1) * sizeof(int)); -+ ret->values = safe_malloc((len + 1) * sizeof(char *)); -+ for (i = 0; i < len; i++) { -+ const char *srckey = YAJL_GET_OBJECT(src)->keys[i]; -+ yajl_val srcval = YAJL_GET_OBJECT(src)->values[i]; -+ -+ if (srckey != NULL) { -+ int invalid; -+ invalid = common_safe_int(srckey, &(ret->keys[i])); -+ if (invalid) { -+ if (*err == NULL && asprintf(err, "Invalid key '%s' with type 'int': %s", srckey, strerror(-invalid)) < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ free_json_map_int_string(ret); -+ return NULL; -+ } -+ } -+ -+ if (srcval != NULL) { -+ if (!YAJL_IS_STRING(srcval)) { -+ if (*err == NULL && asprintf(err, "Invalid value with type 'string' for key '%s'", srckey) < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ free_json_map_int_string(ret); -+ return NULL; -+ } -+ char *str = YAJL_GET_STRING(srcval); -+ ret->values[i] = safe_strdup(str ? str : ""); -+ } -+ } -+ } -+ return ret; -+} -+int append_json_map_int_string(json_map_int_string *map, int key, const char *val) { -+ size_t len; -+ int *keys = NULL; -+ char **vals = NULL; -+ -+ if (map == NULL) { -+ return -1; -+ } -+ -+ if ((SIZE_MAX / sizeof(int) - 1) < map->len || (SIZE_MAX / sizeof(char *) - 1) < map->len) { -+ return -1; -+ } -+ -+ len = map->len + 1; -+ keys = safe_malloc(len * sizeof(int)); -+ vals = safe_malloc(len * sizeof(char *)); -+ -+ if (map->len) { -+ (void)memcpy(keys, map->keys, map->len * sizeof(int)); -+ (void)memcpy(vals, map->values, map->len * sizeof(char *)); -+ } -+ free(map->keys); -+ map->keys = keys; -+ free(map->values); -+ map->values = vals; -+ map->keys[map->len] = key; -+ map->values[map->len] = safe_strdup(val ? val : ""); -+ -+ map->len++; -+ return 0; -+} -+ -+yajl_gen_status gen_json_map_string_int(void *ctx, json_map_string_int *map, struct parser_context *ptx, parser_error *err) { -+ yajl_gen_status stat = yajl_gen_status_ok; -+ yajl_gen g = (yajl_gen) ctx; -+ size_t len = 0, i = 0; -+ if (map != NULL) { -+ len = map->len; -+ } -+ if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { -+ yajl_gen_config(g, yajl_gen_beautify, 0); -+ } -+ stat = reformat_start_map(g); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ -+ } -+ for (i = 0; i < len; i++) { -+ stat = reformat_string(g, map->keys[i], strlen(map->keys[i])); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_int(g, map->values[i]); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ } -+ -+ stat = reformat_end_map(g); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { -+ yajl_gen_config(g, yajl_gen_beautify, 1); -+ } -+ return yajl_gen_status_ok; -+} -+ -+void free_json_map_string_int(json_map_string_int *map) { -+ if (map != NULL) { -+ size_t i; -+ for (i = 0; i < map->len; i++) { -+ free(map->keys[i]); -+ map->keys[i] = NULL; -+ // No need to free value for type int -+ } -+ free(map->keys); -+ map->keys = NULL; -+ free(map->values); -+ map->values = NULL; -+ free(map); -+ } -+} -+json_map_string_int *make_json_map_string_int(yajl_val src, struct parser_context *ctx, parser_error *err) { -+ json_map_string_int *ret = NULL; -+ if (src != NULL && YAJL_GET_OBJECT(src) != NULL) { -+ size_t i; -+ size_t len = YAJL_GET_OBJECT(src)->len; -+ if (len > SIZE_MAX / sizeof(char *) - 1) { -+ return NULL; -+ } -+ ret = safe_malloc(sizeof(*ret)); -+ ret->len = len; -+ ret->keys = safe_malloc((len + 1) * sizeof(char *)); -+ ret->values = safe_malloc((len + 1) * sizeof(int)); -+ for (i = 0; i < len; i++) { -+ const char *srckey = YAJL_GET_OBJECT(src)->keys[i]; -+ yajl_val srcval = YAJL_GET_OBJECT(src)->values[i]; -+ ret->keys[i] = safe_strdup(srckey ? srckey : ""); -+ -+ if (srcval != NULL) { -+ int invalid; -+ if (!YAJL_IS_NUMBER(srcval)) { -+ if (*err == NULL && asprintf(err, "Invalid value with type 'int' for key '%s'", srckey) < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ free_json_map_string_int(ret); -+ return NULL; -+ } -+ invalid = common_safe_int(YAJL_GET_NUMBER(srcval), &(ret->values[i])); -+ if (invalid) { -+ if (*err == NULL && asprintf(err, "Invalid value with type 'int' for key '%s': %s", srckey, strerror(-invalid)) < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ free_json_map_string_int(ret); -+ return NULL; -+ } -+ } -+ } -+ } -+ return ret; -+} -+int append_json_map_string_int(json_map_string_int *map, const char *key, int val) { -+ size_t len; -+ char **keys = NULL; -+ int *vals = NULL; -+ -+ if (map == NULL) { -+ return -1; -+ } -+ -+ if ((SIZE_MAX / sizeof(char *) - 1) < map->len || (SIZE_MAX / sizeof(int) - 1) < map->len) { -+ return -1; -+ } -+ -+ len = map->len + 1; -+ keys = safe_malloc(len * sizeof(char *)); -+ vals = safe_malloc(len * sizeof(int)); -+ -+ if (map->len) { -+ (void)memcpy(keys, map->keys, map->len * sizeof(char *)); -+ (void)memcpy(vals, map->values, map->len * sizeof(int)); -+ } -+ free(map->keys); -+ map->keys = keys; -+ free(map->values); -+ map->values = vals; -+ map->keys[map->len] = safe_strdup(key ? key : ""); -+ map->values[map->len] = val; -+ -+ map->len++; -+ return 0; -+} -+ -+yajl_gen_status gen_json_map_string_bool(void *ctx, json_map_string_bool *map, struct parser_context *ptx, parser_error *err) { -+ yajl_gen_status stat = yajl_gen_status_ok; -+ yajl_gen g = (yajl_gen) ctx; -+ size_t len = 0, i = 0; -+ if (map != NULL) { -+ len = map->len; -+ } -+ if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { -+ yajl_gen_config(g, yajl_gen_beautify, 0); -+ } -+ stat = reformat_start_map(g); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ -+ } -+ for (i = 0; i < len; i++) { -+ stat = reformat_string(g, map->keys[i], strlen(map->keys[i])); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_bool(g, map->values[i]); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ } -+ -+ stat = reformat_end_map(g); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { -+ yajl_gen_config(g, yajl_gen_beautify, 1); -+ } -+ return yajl_gen_status_ok; -+} -+ -+void free_json_map_string_bool(json_map_string_bool *map) { -+ if (map != NULL) { -+ size_t i; -+ for (i = 0; i < map->len; i++) { -+ free(map->keys[i]); -+ map->keys[i] = NULL; -+ // No need to free value for type bool -+ } -+ free(map->keys); -+ map->keys = NULL; -+ free(map->values); -+ map->values = NULL; -+ free(map); -+ } -+} -+json_map_string_bool *make_json_map_string_bool(yajl_val src, struct parser_context *ctx, parser_error *err) { -+ json_map_string_bool *ret = NULL; -+ if (src != NULL && YAJL_GET_OBJECT(src) != NULL) { -+ size_t i; -+ size_t len = YAJL_GET_OBJECT(src)->len; -+ if (len > SIZE_MAX / sizeof(char *) - 1) { -+ return NULL; -+ } -+ ret = safe_malloc(sizeof(*ret)); -+ ret->len = len; -+ ret->keys = safe_malloc((len + 1) * sizeof(char *)); -+ ret->values = safe_malloc((len + 1) * sizeof(bool)); -+ for (i = 0; i < len; i++) { -+ const char *srckey = YAJL_GET_OBJECT(src)->keys[i]; -+ yajl_val srcval = YAJL_GET_OBJECT(src)->values[i]; -+ ret->keys[i] = safe_strdup(srckey ? srckey : ""); -+ -+ if (srcval != NULL) { -+ if (YAJL_IS_TRUE(srcval)) { -+ ret->values[i] = true; -+ } else if (YAJL_IS_FALSE(srcval)) { -+ ret->values[i] = false; -+ } else { -+ if (*err == NULL && asprintf(err, "Invalid value with type 'bool' for key '%s'", srckey) < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ free_json_map_string_bool(ret); -+ return NULL; -+ } -+ } -+ } -+ } -+ return ret; -+} -+ -+int append_json_map_string_bool(json_map_string_bool *map, const char *key, bool val) { -+ size_t len; -+ char **keys = NULL; -+ bool *vals = NULL; -+ -+ if (map == NULL) { -+ return -1; -+ } -+ -+ if ((SIZE_MAX / sizeof(char *) - 1) < map->len || (SIZE_MAX / sizeof(bool) - 1) < map->len) { -+ return -1; -+ } -+ -+ len = map->len + 1; -+ keys = safe_malloc(len * sizeof(char *)); -+ vals = safe_malloc(len * sizeof(bool)); -+ -+ if (map->len) { -+ (void)memcpy(keys, map->keys, map->len * sizeof(char *)); -+ (void)memcpy(vals, map->values, map->len * sizeof(bool)); -+ } -+ free(map->keys); -+ map->keys = keys; -+ free(map->values); -+ map->values = vals; -+ map->keys[map->len] = safe_strdup(key ? key : ""); -+ map->values[map->len] = val; -+ -+ map->len++; -+ return 0; -+} -+ -+yajl_gen_status gen_json_map_string_string(void *ctx, json_map_string_string *map, struct parser_context *ptx, parser_error *err) { -+ yajl_gen_status stat = yajl_gen_status_ok; -+ yajl_gen g = (yajl_gen) ctx; -+ size_t len = 0, i = 0; -+ if (map != NULL) { -+ len = map->len; -+ } -+ if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { -+ yajl_gen_config(g, yajl_gen_beautify, 0); -+ } -+ stat = reformat_start_map(g); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ -+ } -+ for (i = 0; i < len; i++) { -+ stat = reformat_string(g, map->keys[i], strlen(map->keys[i])); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_string(g, map->values[i], strlen(map->values[i]));; -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ } -+ -+ stat = reformat_end_map(g); -+ if (yajl_gen_status_ok != stat) { -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ if (!len && !(ptx->options & GEN_OPTIONS_SIMPLIFY)) { -+ yajl_gen_config(g, yajl_gen_beautify, 1); -+ } -+ return yajl_gen_status_ok; -+} -+ -+void free_json_map_string_string(json_map_string_string *map) { -+ if (map != NULL) { -+ size_t i; -+ for (i = 0; i < map->len; i++) { -+ free(map->keys[i]); -+ map->keys[i] = NULL; -+ free(map->values[i]); -+ map->values[i] = NULL; -+ } -+ free(map->keys); -+ map->keys = NULL; -+ free(map->values); -+ map->values = NULL; -+ free(map); -+ } -+} -+json_map_string_string *make_json_map_string_string(yajl_val src, struct parser_context *ctx, parser_error *err) { -+ json_map_string_string *ret = NULL; -+ if (src != NULL && YAJL_GET_OBJECT(src) != NULL) { -+ size_t i; -+ size_t len = YAJL_GET_OBJECT(src)->len; -+ if (len > SIZE_MAX / sizeof(char *) - 1) { -+ return NULL; -+ } -+ ret = safe_malloc(sizeof(*ret)); -+ ret->len = len; -+ ret->keys = safe_malloc((len + 1) * sizeof(char *)); -+ ret->values = safe_malloc((len + 1) * sizeof(char *)); -+ for (i = 0; i < len; i++) { -+ const char *srckey = YAJL_GET_OBJECT(src)->keys[i]; -+ yajl_val srcval = YAJL_GET_OBJECT(src)->values[i]; -+ ret->keys[i] = safe_strdup(srckey ? srckey : ""); -+ -+ if (srcval != NULL) { -+ if (!YAJL_IS_STRING(srcval)) { -+ if (*err == NULL && asprintf(err, "Invalid value with type 'string' for key '%s'", srckey) < 0) { -+ *(err) = safe_strdup("error allocating memory"); -+ } -+ free_json_map_string_string(ret); -+ return NULL; -+ } -+ char *str = YAJL_GET_STRING(srcval); -+ ret->values[i] = safe_strdup(str ? str : ""); -+ } -+ } -+ } -+ return ret; -+} -+int append_json_map_string_string(json_map_string_string *map, const char *key, const char *val) { -+ size_t len, i; -+ char **keys = NULL; -+ char **vals = NULL; -+ -+ if (map == NULL) { -+ return -1; -+ } -+ -+ for (i = 0; i < map->len; i++) { -+ if (strcmp(map->keys[i], key) == 0) { -+ free(map->values[i]); -+ map->values[i] = safe_strdup(val ? val : ""); -+ return 0; -+ } -+ } -+ -+ if ((SIZE_MAX / sizeof(char *) - 1) < map->len) { -+ return -1; -+ } -+ -+ len = map->len + 1; -+ keys = safe_malloc(len * sizeof(char *)); -+ vals = safe_malloc(len * sizeof(char *)); -+ -+ if (map->len) { -+ (void)memcpy(keys, map->keys, map->len * sizeof(char *)); -+ (void)memcpy(vals, map->values, map->len * sizeof(char *)); -+ } -+ free(map->keys); -+ map->keys = keys; -+ free(map->values); -+ map->values = vals; -+ map->keys[map->len] = safe_strdup(key ? key : ""); -+ map->values[map->len] = safe_strdup(val ? val : ""); -+ -+ map->len++; -+ return 0; -+} -diff --git a/src/lxc/json/json_common.h b/src/lxc/json/json_common.h -new file mode 100755 -index 000000000..60aa5fd93 ---- /dev/null -+++ b/src/lxc/json/json_common.h -@@ -0,0 +1,185 @@ -+// Auto generated file. Do not edit! -+#ifndef _JSON_COMMON_H -+#define _JSON_COMMON_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "utils.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+# undef linux -+ -+//options to report error if there is unknown key found in json -+# define PARSE_OPTIONS_STRICT 0x01 -+//options to generate all key and value -+# define GEN_OPTIONS_ALLKEYVALUE 0x02 -+//options to generate simplify(no indent) json string -+# define GEN_OPTIONS_SIMPLIFY 0x04 -+//options not to validate utf8 data -+# define GEN_OPTIONS_NOT_VALIDATE_UTF8 0x08 -+ -+#define GEN_SET_ERROR_AND_RETURN(stat, err) { \ -+ if (*(err) == NULL) {\ -+ if (asprintf(err, "%s: %s: %d: error generating json, errcode: %d", __FILE__, __func__, __LINE__, stat) < 0) { \ -+ *(err) = safe_strdup("error allocating memory"); \ -+ } \ -+ }\ -+ return stat; \ -+ } -+ -+typedef char *parser_error; -+ -+struct parser_context { -+ unsigned int options; -+ FILE *stderr; -+}; -+ -+yajl_gen_status reformat_number(void *ctx, const char *str, size_t len); -+ -+yajl_gen_status reformat_uint(void *ctx, long long unsigned int num); -+ -+yajl_gen_status reformat_int(void *ctx, long long int num); -+ -+yajl_gen_status reformat_double(void *ctx, double num); -+ -+yajl_gen_status reformat_string(void *ctx, const char *str, size_t len); -+ -+yajl_gen_status reformat_null(void *ctx); -+ -+yajl_gen_status reformat_bool(void *ctx, int boolean); -+ -+yajl_gen_status reformat_map_key(void *ctx, const char *str, size_t len); -+ -+yajl_gen_status reformat_start_map(void *ctx); -+ -+yajl_gen_status reformat_end_map(void *ctx); -+ -+yajl_gen_status reformat_start_array(void *ctx); -+ -+yajl_gen_status reformat_end_array(void *ctx); -+ -+bool json_gen_init(yajl_gen *g, struct parser_context *ctx); -+ -+yajl_val get_val(yajl_val tree, const char *name, yajl_type type); -+ -+void *safe_malloc(size_t size); -+ -+int common_safe_double(const char *numstr, double *converted); -+ -+int common_safe_uint8(const char *numstr, uint8_t *converted); -+ -+int common_safe_uint16(const char *numstr, uint16_t *converted); -+ -+int common_safe_uint32(const char *numstr, uint32_t *converted); -+ -+int common_safe_uint64(const char *numstr, uint64_t *converted); -+ -+int common_safe_uint(const char *numstr, unsigned int *converted); -+ -+int common_safe_int8(const char *numstr, int8_t *converted); -+ -+int common_safe_int16(const char *numstr, int16_t *converted); -+ -+int common_safe_int32(const char *numstr, int32_t *converted); -+ -+int common_safe_int64(const char *numstr, int64_t *converted); -+ -+int common_safe_int(const char *numstr, int *converted); -+ -+typedef struct { -+ int *keys; -+ int *values; -+ size_t len; -+} json_map_int_int; -+ -+void free_json_map_int_int(json_map_int_int *map); -+ -+json_map_int_int *make_json_map_int_int(yajl_val src, struct parser_context *ctx, parser_error *err); -+ -+yajl_gen_status gen_json_map_int_int(void *ctx, json_map_int_int *map, struct parser_context *ptx, parser_error *err); -+ -+int append_json_map_int_int(json_map_int_int *map, int key, int val); -+ -+typedef struct { -+ int *keys; -+ bool *values; -+ size_t len; -+} json_map_int_bool; -+ -+void free_json_map_int_bool(json_map_int_bool *map); -+ -+json_map_int_bool *make_json_map_int_bool(yajl_val src, struct parser_context *ctx, parser_error *err); -+ -+yajl_gen_status gen_json_map_int_bool(void *ctx, json_map_int_bool *map, struct parser_context *ptx, parser_error *err); -+ -+int append_json_map_int_bool(json_map_int_bool *map, int key, bool val); -+ -+typedef struct { -+ int *keys; -+ char **values; -+ size_t len; -+} json_map_int_string; -+ -+void free_json_map_int_string(json_map_int_string *map); -+ -+json_map_int_string *make_json_map_int_string(yajl_val src, struct parser_context *ctx, parser_error *err); -+ -+yajl_gen_status gen_json_map_int_string(void *ctx, json_map_int_string *map, struct parser_context *ptx, parser_error *err); -+ -+int append_json_map_int_string(json_map_int_string *map, int key, const char *val); -+ -+typedef struct { -+ char **keys; -+ int *values; -+ size_t len; -+} json_map_string_int; -+ -+void free_json_map_string_int(json_map_string_int *map); -+ -+json_map_string_int *make_json_map_string_int(yajl_val src, struct parser_context *ctx, parser_error *err); -+ -+yajl_gen_status gen_json_map_string_int(void *ctx, json_map_string_int *map, struct parser_context *ptx, parser_error *err); -+ -+int append_json_map_string_int(json_map_string_int *map, const char *key, int val); -+ -+typedef struct { -+ char **keys; -+ bool *values; -+ size_t len; -+} json_map_string_bool; -+ -+void free_json_map_string_bool(json_map_string_bool *map); -+ -+json_map_string_bool *make_json_map_string_bool(yajl_val src, struct parser_context *ctx, parser_error *err); -+ -+yajl_gen_status gen_json_map_string_bool(void *ctx, json_map_string_bool *map, struct parser_context *ptx, parser_error *err); -+ -+int append_json_map_string_bool(json_map_string_bool *map, const char *key, bool val); -+ -+typedef struct { -+ char **keys; -+ char **values; -+ size_t len; -+} json_map_string_string; -+ -+void free_json_map_string_string(json_map_string_string *map); -+ -+json_map_string_string *make_json_map_string_string(yajl_val src, struct parser_context *ctx, parser_error *err); -+ -+yajl_gen_status gen_json_map_string_string(void *ctx, json_map_string_string *map, struct parser_context *ptx, parser_error *err); -+ -+int append_json_map_string_string(json_map_string_string *map, const char *key, const char *val); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif -\ No newline at end of file -diff --git a/src/lxc/json/logger_json_file.c b/src/lxc/json/logger_json_file.c -new file mode 100644 -index 000000000..6abeef458 ---- /dev/null -+++ b/src/lxc/json/logger_json_file.c -@@ -0,0 +1,246 @@ -+// Generated from json-file.json. Do not edit! -+#ifndef _GNU_SOURCE -+#define _GNU_SOURCE -+#endif -+#include -+#include -+#include "logger_json_file.h" -+ -+logger_json_file *make_logger_json_file(yajl_val tree, struct parser_context *ctx, parser_error *err) { -+ logger_json_file *ret = NULL; -+ *err = 0; -+ if (tree == NULL) -+ return ret; -+ ret = safe_malloc(sizeof(*ret)); -+ { -+ yajl_val tmp = get_val(tree, "log", yajl_t_string); -+ if (tmp != NULL) { -+ char *str = YAJL_GET_STRING(tmp); -+ ret->log = (uint8_t *)safe_strdup(str ? str : ""); -+ ret->log_len = str != NULL ? strlen(str) : 0; -+ } -+ } -+ { -+ yajl_val val = get_val(tree, "stream", yajl_t_string); -+ if (val != NULL) { -+ char *str = YAJL_GET_STRING(val); -+ ret->stream = safe_strdup(str ? str : ""); -+ } -+ } -+ { -+ yajl_val val = get_val(tree, "time", yajl_t_string); -+ if (val != NULL) { -+ char *str = YAJL_GET_STRING(val); -+ ret->time = safe_strdup(str ? str : ""); -+ } -+ } -+ { -+ yajl_val tmp = get_val(tree, "attrs", yajl_t_string); -+ if (tmp != NULL) { -+ char *str = YAJL_GET_STRING(tmp); -+ ret->attrs = (uint8_t *)safe_strdup(str ? str : ""); -+ ret->attrs_len = str != NULL ? strlen(str) : 0; -+ } -+ } -+ -+ if (tree->type == yajl_t_object && (ctx->options & PARSE_OPTIONS_STRICT)) { -+ int i; -+ for (i = 0; i < tree->u.object.len; i++) -+ if (strcmp(tree->u.object.keys[i], "log") && -+ strcmp(tree->u.object.keys[i], "stream") && -+ strcmp(tree->u.object.keys[i], "time") && -+ strcmp(tree->u.object.keys[i], "attrs")) { -+ if (ctx->stderr > 0) -+ fprintf(ctx->stderr, "WARNING: unknown key found: %s\n", tree->u.object.keys[i]); -+ } -+ } -+ return ret; -+} -+ -+void free_logger_json_file(logger_json_file *ptr) { -+ if (ptr == NULL) -+ return; -+ free(ptr->log); -+ ptr->log = NULL; -+ free(ptr->stream); -+ ptr->stream = NULL; -+ free(ptr->time); -+ ptr->time = NULL; -+ free(ptr->attrs); -+ ptr->attrs = NULL; -+ free(ptr); -+} -+ -+yajl_gen_status gen_logger_json_file(yajl_gen g, logger_json_file *ptr, struct parser_context *ctx, parser_error *err) { -+ yajl_gen_status stat = yajl_gen_status_ok; -+ *err = 0; -+ stat = reformat_start_map(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) || (ptr != NULL && ptr->log != NULL && ptr->log_len)) { -+ const char *str = ""; -+ size_t len = 0; -+ stat = reformat_map_key(g, "log", strlen("log")); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (ptr != NULL && ptr->log != NULL) { -+ str = (const char *)ptr->log; -+ len = ptr->log_len; -+ } -+ stat = reformat_string(g, str, len); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->stream != NULL)) { -+ char *str = ""; -+ stat = reformat_map_key(g, "stream", strlen("stream")); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (ptr != NULL && ptr->stream != NULL) { -+ str = ptr->stream; -+ } -+ stat = reformat_string(g, str, strlen(str)); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->time != NULL)) { -+ char *str = ""; -+ stat = reformat_map_key(g, "time", strlen("time")); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (ptr != NULL && ptr->time != NULL) { -+ str = ptr->time; -+ } -+ stat = reformat_string(g, str, strlen(str)); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) || (ptr != NULL && ptr->attrs != NULL && ptr->attrs_len)) { -+ const char *str = ""; -+ size_t len = 0; -+ stat = reformat_map_key(g, "attrs", strlen("attrs")); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (ptr != NULL && ptr->attrs != NULL) { -+ str = (const char *)ptr->attrs; -+ len = ptr->attrs_len; -+ } -+ stat = reformat_string(g, str, len); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_end_map(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ return yajl_gen_status_ok; -+} -+ -+ -+logger_json_file *logger_json_file_parse_file(const char *filename, struct parser_context *ctx, parser_error *err) { -+ logger_json_file *ptr = NULL; -+ size_t filesize; -+ char *content = NULL; -+ -+ if (filename == NULL || err == NULL) -+ return NULL; -+ -+ *err = NULL; -+ content = read_file(filename, &filesize); -+ if (content == NULL) { -+ if (asprintf(err, "cannot read the file: %s", filename) < 0) -+ *err = safe_strdup("error allocating memory"); -+ return NULL; -+ } -+ ptr = logger_json_file_parse_data(content, ctx, err); -+ free(content); -+ return ptr; -+} -+ -+logger_json_file *logger_json_file_parse_file_stream(FILE *stream, struct parser_context *ctx, parser_error *err) { -+ logger_json_file *ptr = NULL; -+ size_t filesize; -+ char *content = NULL ; -+ -+ if (stream == NULL || err == NULL) -+ return NULL; -+ -+ *err = NULL; -+ content = fread_file(stream, &filesize); -+ if (content == NULL) { -+ *err = safe_strdup("cannot read the file"); -+ return NULL; -+ } -+ ptr = logger_json_file_parse_data(content, ctx, err); -+ free(content); -+ return ptr; -+} -+ -+logger_json_file *logger_json_file_parse_data(const char *jsondata, struct parser_context *ctx, parser_error *err) { -+ logger_json_file *ptr = NULL; -+ yajl_val tree; -+ char errbuf[1024]; -+ struct parser_context tmp_ctx; -+ -+ if (jsondata == NULL || err == NULL) -+ return NULL; -+ -+ *err = NULL; -+ if (ctx == NULL) { -+ ctx = &tmp_ctx; -+ memset(&tmp_ctx, 0, sizeof(tmp_ctx)); -+ } -+ tree = yajl_tree_parse(jsondata, errbuf, sizeof(errbuf)); -+ if (tree == NULL) { -+ if (asprintf(err, "cannot parse the data: %s", errbuf) < 0) -+ *err = safe_strdup("error allocating memory"); -+ return NULL; -+ } -+ ptr = make_logger_json_file(tree, ctx, err); -+ yajl_tree_free(tree); -+ return ptr; -+} -+char *logger_json_file_generate_json(logger_json_file *ptr, struct parser_context *ctx, parser_error *err) { -+ yajl_gen g = NULL; -+ struct parser_context tmp_ctx; -+ const unsigned char *gen_buf = NULL; -+ char *json_buf = NULL; -+ size_t gen_len = 0; -+ -+ if (ptr == NULL || err == NULL) -+ return NULL; -+ -+ *err = NULL; -+ if (ctx == NULL) { -+ ctx = &tmp_ctx; -+ memset(&tmp_ctx, 0, sizeof(tmp_ctx)); -+ } -+ -+ if (!json_gen_init(&g, ctx)) { -+ *err = safe_strdup("Json_gen init failed"); -+ goto out; -+ } -+ if (yajl_gen_status_ok != gen_logger_json_file(g, ptr, ctx, err)) { -+ if (*err == NULL) -+ *err = safe_strdup("Failed to generate json"); -+ goto free_out; -+ } -+ yajl_gen_get_buf(g, &gen_buf, &gen_len); -+ if (gen_buf == NULL) { -+ *err = safe_strdup("Error to get generated json"); -+ goto free_out; -+ } -+ -+ if (gen_len == SIZE_MAX) { -+ *err = safe_strdup("Invalid buffer length"); -+ goto free_out; -+ } -+ json_buf = safe_malloc(gen_len + 1); -+ (void)memcpy(json_buf, gen_buf, gen_len); -+ json_buf[gen_len] = '\0'; -+ -+free_out: -+ yajl_gen_clear(g); -+ yajl_gen_free(g); -+out: -+ return json_buf; -+} -diff --git a/src/lxc/json/logger_json_file.h b/src/lxc/json/logger_json_file.h -new file mode 100644 -index 000000000..ad5af7b49 ---- /dev/null -+++ b/src/lxc/json/logger_json_file.h -@@ -0,0 +1,45 @@ -+// Generated from json-file.json. Do not edit! -+#ifndef LOGGER_JSON_FILE_SCHEMA_H -+#define LOGGER_JSON_FILE_SCHEMA_H -+ -+#include -+#include -+#include "json_common.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+typedef struct { -+ uint8_t *log; -+ size_t log_len; -+ -+ char *stream; -+ -+ char *time; -+ -+ uint8_t *attrs; -+ size_t attrs_len; -+ -+} -+logger_json_file; -+ -+void free_logger_json_file(logger_json_file *ptr); -+ -+logger_json_file *make_logger_json_file(yajl_val tree, struct parser_context *ctx, parser_error *err); -+ -+yajl_gen_status gen_logger_json_file(yajl_gen g, logger_json_file *ptr, struct parser_context *ctx, parser_error *err); -+ -+logger_json_file *logger_json_file_parse_file(const char *filename, struct parser_context *ctx, parser_error *err); -+ -+logger_json_file *logger_json_file_parse_file_stream(FILE *stream, struct parser_context *ctx, parser_error *err); -+ -+logger_json_file *logger_json_file_parse_data(const char *jsondata, struct parser_context *ctx, parser_error *err); -+ -+char *logger_json_file_generate_json(logger_json_file *ptr, struct parser_context *ctx, parser_error *err); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif -diff --git a/src/lxc/json/oci_runtime_hooks.c b/src/lxc/json/oci_runtime_hooks.c -new file mode 100644 -index 000000000..41ddb672d ---- /dev/null -+++ b/src/lxc/json/oci_runtime_hooks.c -@@ -0,0 +1,52 @@ -+/****************************************************************************** -+ * Copyright (C), 1988-1999, Huawei Tech. Co., Ltd. -+ * FileName: oci_runtime_hooks.c -+ * Author: maoweiyong Version: 0.1 Date: 2018-11-07 -+ * Explanation: provide oci runtime hooks functions -+ ******************************************************************************/ -+#ifndef _GNU_SOURCE -+#define _GNU_SOURCE -+#endif -+#include -+#include "oci_runtime_hooks.h" -+ -+#include "log.h" -+#include "utils.h" -+ -+#define PARSE_ERR_BUFFER_SIZE 1024 -+ -+oci_runtime_spec_hooks *oci_runtime_spec_hooks_parse_file(const char *filename, -+ struct parser_context *ctx, parser_error *err) -+{ -+ yajl_val tree; -+ size_t filesize; -+ -+ if (!filename || !err) { -+ return NULL; -+ } -+ *err = NULL; -+ struct parser_context tmp_ctx; -+ if (!ctx) { -+ ctx = &tmp_ctx; -+ memset(&tmp_ctx, 0, sizeof(tmp_ctx)); -+ } -+ char *content = read_file(filename, &filesize); -+ char errbuf[PARSE_ERR_BUFFER_SIZE]; -+ if (content == NULL) { -+ if (asprintf(err, "cannot read the file: %s", filename) < 0) { -+ *err = safe_strdup("error allocating memory"); -+ } -+ return NULL; -+ } -+ tree = yajl_tree_parse(content, errbuf, sizeof(errbuf)); -+ free(content); -+ if (tree == NULL) { -+ if (asprintf(err, "cannot parse the file: %s", errbuf) < 0) { -+ *err = safe_strdup("error allocating memory"); -+ } -+ return NULL; -+ } -+ oci_runtime_spec_hooks *ptr = make_oci_runtime_spec_hooks(tree, ctx, err); -+ yajl_tree_free(tree); -+ return ptr; -+} -diff --git a/src/lxc/json/oci_runtime_hooks.h b/src/lxc/json/oci_runtime_hooks.h -new file mode 100644 -index 000000000..bf570c9e0 ---- /dev/null -+++ b/src/lxc/json/oci_runtime_hooks.h -@@ -0,0 +1,15 @@ -+/****************************************************************************** -+ * Copyright (C), 1988-1999, Huawei Tech. Co., Ltd. -+ * FileName: oci_runtime_hooks.h -+ * Author: tanyifeng Version: 0.1 Date: 2018-11-08 -+ * Explanation: provide container oci runtime hooks function definition -+ ******************************************************************************/ -+#ifndef _CONTAINER_HOOKS_H -+# define _CONTAINER_HOOKS_H -+ -+# include "oci_runtime_spec.h" -+ -+oci_runtime_spec_hooks *oci_runtime_spec_hooks_parse_file(const char *filename, -+ struct parser_context *ctx, parser_error *err); -+ -+#endif -diff --git a/src/lxc/json/oci_runtime_spec.c b/src/lxc/json/oci_runtime_spec.c -new file mode 100644 -index 000000000..fd342deb9 ---- /dev/null -+++ b/src/lxc/json/oci_runtime_spec.c -@@ -0,0 +1,195 @@ -+// Generated from spec.json. Do not edit! -+#ifndef _GNU_SOURCE -+#define _GNU_SOURCE -+#endif -+#include -+#include -+#include "oci_runtime_spec.h" -+ -+oci_runtime_spec_hooks *make_oci_runtime_spec_hooks(yajl_val tree, struct parser_context *ctx, parser_error *err) { -+ oci_runtime_spec_hooks *ret = NULL; -+ *err = 0; -+ if (tree == NULL) -+ return ret; -+ ret = safe_malloc(sizeof(*ret)); -+ { -+ yajl_val tmp = get_val(tree, "prestart", yajl_t_array); -+ if (tmp != NULL && YAJL_GET_ARRAY(tmp) != NULL && YAJL_GET_ARRAY(tmp)->len > 0) { -+ size_t i; -+ ret->prestart_len = YAJL_GET_ARRAY(tmp)->len; -+ ret->prestart = safe_malloc((YAJL_GET_ARRAY(tmp)->len + 1) * sizeof(*ret->prestart)); -+ for (i = 0; i < YAJL_GET_ARRAY(tmp)->len; i++) { -+ yajl_val val = YAJL_GET_ARRAY(tmp)->values[i]; -+ ret->prestart[i] = make_defs_hook(val, ctx, err); -+ if (ret->prestart[i] == NULL) { -+ free_oci_runtime_spec_hooks(ret); -+ return NULL; -+ } -+ } -+ } -+ } -+ { -+ yajl_val tmp = get_val(tree, "poststart", yajl_t_array); -+ if (tmp != NULL && YAJL_GET_ARRAY(tmp) != NULL && YAJL_GET_ARRAY(tmp)->len > 0) { -+ size_t i; -+ ret->poststart_len = YAJL_GET_ARRAY(tmp)->len; -+ ret->poststart = safe_malloc((YAJL_GET_ARRAY(tmp)->len + 1) * sizeof(*ret->poststart)); -+ for (i = 0; i < YAJL_GET_ARRAY(tmp)->len; i++) { -+ yajl_val val = YAJL_GET_ARRAY(tmp)->values[i]; -+ ret->poststart[i] = make_defs_hook(val, ctx, err); -+ if (ret->poststart[i] == NULL) { -+ free_oci_runtime_spec_hooks(ret); -+ return NULL; -+ } -+ } -+ } -+ } -+ { -+ yajl_val tmp = get_val(tree, "poststop", yajl_t_array); -+ if (tmp != NULL && YAJL_GET_ARRAY(tmp) != NULL && YAJL_GET_ARRAY(tmp)->len > 0) { -+ size_t i; -+ ret->poststop_len = YAJL_GET_ARRAY(tmp)->len; -+ ret->poststop = safe_malloc((YAJL_GET_ARRAY(tmp)->len + 1) * sizeof(*ret->poststop)); -+ for (i = 0; i < YAJL_GET_ARRAY(tmp)->len; i++) { -+ yajl_val val = YAJL_GET_ARRAY(tmp)->values[i]; -+ ret->poststop[i] = make_defs_hook(val, ctx, err); -+ if (ret->poststop[i] == NULL) { -+ free_oci_runtime_spec_hooks(ret); -+ return NULL; -+ } -+ } -+ } -+ } -+ -+ if (tree->type == yajl_t_object && (ctx->options & PARSE_OPTIONS_STRICT)) { -+ int i; -+ for (i = 0; i < tree->u.object.len; i++) -+ if (strcmp(tree->u.object.keys[i], "prestart") && -+ strcmp(tree->u.object.keys[i], "poststart") && -+ strcmp(tree->u.object.keys[i], "poststop")) { -+ if (ctx->stderr > 0) -+ fprintf(ctx->stderr, "WARNING: unknown key found: %s\n", tree->u.object.keys[i]); -+ } -+ } -+ return ret; -+} -+ -+void free_oci_runtime_spec_hooks(oci_runtime_spec_hooks *ptr) { -+ if (ptr == NULL) -+ return; -+ if (ptr->prestart != NULL) { -+ size_t i; -+ for (i = 0; i < ptr->prestart_len; i++) -+ if (ptr->prestart[i] != NULL) { -+ free_defs_hook(ptr->prestart[i]); -+ ptr->prestart[i] = NULL; -+ } -+ free(ptr->prestart); -+ ptr->prestart = NULL; -+ } -+ if (ptr->poststart != NULL) { -+ size_t i; -+ for (i = 0; i < ptr->poststart_len; i++) -+ if (ptr->poststart[i] != NULL) { -+ free_defs_hook(ptr->poststart[i]); -+ ptr->poststart[i] = NULL; -+ } -+ free(ptr->poststart); -+ ptr->poststart = NULL; -+ } -+ if (ptr->poststop != NULL) { -+ size_t i; -+ for (i = 0; i < ptr->poststop_len; i++) -+ if (ptr->poststop[i] != NULL) { -+ free_defs_hook(ptr->poststop[i]); -+ ptr->poststop[i] = NULL; -+ } -+ free(ptr->poststop); -+ ptr->poststop = NULL; -+ } -+ free(ptr); -+} -+ -+yajl_gen_status gen_oci_runtime_spec_hooks(yajl_gen g, oci_runtime_spec_hooks *ptr, struct parser_context *ctx, parser_error *err) { -+ yajl_gen_status stat = yajl_gen_status_ok; -+ *err = 0; -+ stat = reformat_start_map(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->prestart != NULL)) { -+ size_t len = 0, i; -+ stat = reformat_map_key(g, "prestart", strlen("prestart")); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (ptr != NULL && ptr->prestart != NULL) { -+ len = ptr->prestart_len; -+ } -+ if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) -+ yajl_gen_config(g, yajl_gen_beautify, 0); -+ stat = reformat_start_array(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ for (i = 0; i < len; i++) { -+ stat = gen_defs_hook(g, ptr->prestart[i], ctx, err); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_end_array(g); -+ if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) -+ yajl_gen_config(g, yajl_gen_beautify, 1); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->poststart != NULL)) { -+ size_t len = 0, i; -+ stat = reformat_map_key(g, "poststart", strlen("poststart")); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (ptr != NULL && ptr->poststart != NULL) { -+ len = ptr->poststart_len; -+ } -+ if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) -+ yajl_gen_config(g, yajl_gen_beautify, 0); -+ stat = reformat_start_array(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ for (i = 0; i < len; i++) { -+ stat = gen_defs_hook(g, ptr->poststart[i], ctx, err); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_end_array(g); -+ if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) -+ yajl_gen_config(g, yajl_gen_beautify, 1); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ if ((ctx->options & GEN_OPTIONS_ALLKEYVALUE) ||(ptr != NULL && ptr->poststop != NULL)) { -+ size_t len = 0, i; -+ stat = reformat_map_key(g, "poststop", strlen("poststop")); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ if (ptr != NULL && ptr->poststop != NULL) { -+ len = ptr->poststop_len; -+ } -+ if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) -+ yajl_gen_config(g, yajl_gen_beautify, 0); -+ stat = reformat_start_array(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ for (i = 0; i < len; i++) { -+ stat = gen_defs_hook(g, ptr->poststop[i], ctx, err); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_end_array(g); -+ if (!len && !(ctx->options & GEN_OPTIONS_SIMPLIFY)) -+ yajl_gen_config(g, yajl_gen_beautify, 1); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ } -+ stat = reformat_end_map(g); -+ if (yajl_gen_status_ok != stat) -+ GEN_SET_ERROR_AND_RETURN(stat, err); -+ return yajl_gen_status_ok; -+} -diff --git a/src/lxc/json/oci_runtime_spec.h b/src/lxc/json/oci_runtime_spec.h -new file mode 100644 -index 000000000..ef3f1619a ---- /dev/null -+++ b/src/lxc/json/oci_runtime_spec.h -@@ -0,0 +1,37 @@ -+// Generated from spec.json. Do not edit! -+#ifndef OCI_RUNTIME_SPEC_SCHEMA_H -+#define OCI_RUNTIME_SPEC_SCHEMA_H -+ -+#include -+#include -+#include "json_common.h" -+#include "defs.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+typedef struct { -+ defs_hook **prestart; -+ size_t prestart_len; -+ -+ defs_hook **poststart; -+ size_t poststart_len; -+ -+ defs_hook **poststop; -+ size_t poststop_len; -+ -+} -+oci_runtime_spec_hooks; -+ -+void free_oci_runtime_spec_hooks(oci_runtime_spec_hooks *ptr); -+ -+oci_runtime_spec_hooks *make_oci_runtime_spec_hooks(yajl_val tree, struct parser_context *ctx, parser_error *err); -+ -+yajl_gen_status gen_oci_runtime_spec_hooks(yajl_gen g, oci_runtime_spec_hooks *ptr, struct parser_context *ctx, parser_error *err); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif -diff --git a/src/lxc/json/read-file.c b/src/lxc/json/read-file.c -new file mode 100644 -index 000000000..70e73e51a ---- /dev/null -+++ b/src/lxc/json/read-file.c -@@ -0,0 +1,95 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include "read-file.h" -+ -+#ifndef O_CLOEXEC -+#define O_CLOEXEC 02000000 -+#endif -+ -+char *fread_file(FILE *stream, size_t *length) -+{ -+ char *buf = NULL, *tmpbuf = NULL; -+ size_t off = 0; -+ -+ while (1) { -+ size_t ret, newsize; -+ -+ newsize = off + BUFSIZ + 1; -+ tmpbuf = (char *)calloc(1, newsize); -+ if (tmpbuf == NULL) { -+ goto out; -+ } -+ -+ if (buf) { -+ memcpy(tmpbuf, buf, off); -+ -+ memset(buf, 0, off); -+ -+ free(buf); -+ } -+ -+ buf = tmpbuf; -+ ret = fread(buf + off, 1, BUFSIZ, stream); -+ if (!ret && ferror(stream)) { -+ tmpbuf = NULL; -+ goto out; -+ } -+ if (ret < BUFSIZ || feof(stream)) { -+ *length = off + ret + 1; -+ buf[*length - 1] = '\0'; -+ return buf; -+ } -+ off += BUFSIZ; -+ } -+out: -+ if (buf) { -+ free(buf); -+ } -+ if (tmpbuf) { -+ free(tmpbuf); -+ } -+ return NULL; -+ -+} -+ -+char *read_file(const char *path, size_t *length) -+{ -+ char *buf = NULL; -+ char rpath[PATH_MAX + 1] = {0}; -+ int fd = -1; -+ int tmperrno; -+ FILE *fp = NULL; -+ -+ if (!path || !length) { -+ return NULL; -+ } -+ -+ if (strlen(path) > PATH_MAX || NULL == realpath(path, rpath)) { -+ return NULL; -+ } -+ -+ fd = open(rpath, O_RDONLY | O_CLOEXEC, 0640); -+ if (fd < 0) { -+ return NULL; -+ } -+ -+ fp = fdopen(fd, "r"); -+ tmperrno = errno; -+ if (!fp) { -+ close(fd); -+ errno = tmperrno; -+ return NULL; -+ } -+ -+ buf = fread_file(fp, length); -+ fclose(fp); -+ return buf; -+} -diff --git a/src/lxc/json/read-file.h b/src/lxc/json/read-file.h -new file mode 100644 -index 000000000..5d6e0eb62 ---- /dev/null -+++ b/src/lxc/json/read-file.h -@@ -0,0 +1,11 @@ -+#ifndef READ_FILE_H -+#define READ_FILE_H -+ -+#include -+#include -+ -+extern char *fread_file(FILE *stream, size_t *length); -+ -+extern char *read_file(const char *path, size_t *length); -+ -+#endif -diff --git a/src/lxc/log.c b/src/lxc/log.c -index 59644aa7a..79caa2cce 100644 ---- a/src/lxc/log.c -+++ b/src/lxc/log.c -@@ -44,7 +44,7 @@ - #define LXC_LOG_TIME_SIZE ((INTTYPE_TO_STRLEN(uint64_t)) * 2) - - int lxc_log_fd = -EBADF; --static bool wants_syslog = false; -+static int syslog_enable = 0; - int lxc_quiet_specified; - int lxc_log_use_global_fd; - static int lxc_loglevel_specified; -@@ -55,6 +55,38 @@ static char *log_vmname = NULL; - - lxc_log_define(log, lxc); - -+#ifdef HAVE_ISULAD -+static inline const char *isulad_get_fifo_path(const char *file) -+{ -+#define ISULAD_FIFO_PREFIX "fifo:" -+ -+ if (strncmp(file, ISULAD_FIFO_PREFIX, strlen(ISULAD_FIFO_PREFIX)) == 0) { -+ return (file + strlen(ISULAD_FIFO_PREFIX)); -+ } -+ return NULL; -+} -+ -+static int isulad_open_fifo(const char *file_path) -+{ -+#define LOG_FIFO_SIZE (1024 * 1024) -+ int fd; -+ -+ fd = lxc_unpriv(open(file_path, O_RDWR | O_NONBLOCK | O_CLOEXEC, 0640)); -+ if (fd == -1) { -+ fprintf(stderr, "Open fifo %s failed: %s\n", file_path, strerror(errno)); -+ return -1; -+ } -+ -+ if (fcntl(fd, F_SETPIPE_SZ, LOG_FIFO_SIZE) == -1) { -+ printf("Set fifo buffer size failed: %s", strerror(errno)); -+ close(fd); -+ return -1; -+ } -+ -+ return fd; -+} -+#endif -+ - static int lxc_log_priority_to_syslog(int priority) - { - switch (priority) { -@@ -128,7 +160,7 @@ static int log_append_syslog(const struct lxc_log_appender *appender, - __do_free char *msg = NULL; - const char *log_container_name; - -- if (!wants_syslog) -+ if (!syslog_enable) - return 0; - - log_container_name = lxc_log_get_container_name(); -@@ -321,6 +353,12 @@ static int log_append_logfile(const struct lxc_log_appender *appender, - #endif - - log_container_name = lxc_log_get_container_name(); -+#ifdef HAVE_ISULAD -+ /* use isulad log format */ -+ if (log_container_name != NULL && strlen(log_container_name) > 15) { -+ log_container_name = log_container_name + (strlen(log_container_name) - 15); -+ } -+#endif - - if (fd_to_use < 0) - fd_to_use = lxc_log_fd; -@@ -333,9 +371,13 @@ static int log_append_logfile(const struct lxc_log_appender *appender, - return ret; - - n = snprintf(buffer, sizeof(buffer), -+#if HAVE_ISULAD -+ "%15s %s %-8s %s - %s:%s:%d -", -+#else - "%s%s%s %s %-8s %s - %s:%s:%d - ", - log_prefix, - log_container_name ? " " : "", -+#endif - log_container_name ? log_container_name : "", - date_time, - lxc_log_priority_to_string(event->priority), -@@ -485,9 +527,10 @@ static int build_dir(const char *name) - *p = '\0'; - - ret = lxc_unpriv(mkdir(n, 0755)); -- *p = '/'; - if (ret && errno != EEXIST) - return log_error_errno(-errno, errno, "Failed to create directory \"%s\"", n); -+ -+ *p = '/'; - } - - return 0; -@@ -589,6 +632,13 @@ static int __lxc_log_set_file(const char *fname, int create_dirs) - return ret_errno(EINVAL); - } - -+#ifdef HAVE_ISULAD -+ fname = isulad_get_fifo_path(fname); -+ if (fname == NULL) { -+ return ret_errno(EINVAL); -+ } -+#endif -+ - #if USE_CONFIGPATH_LOGS - /* We don't build_dir for the default if the default is i.e. - * /var/lib/lxc/$container/$container.log. -@@ -598,7 +648,11 @@ static int __lxc_log_set_file(const char *fname, int create_dirs) - if (build_dir(fname)) - return log_error_errno(-errno, errno, "Failed to create dir for log file \"%s\"", fname); - -+#if HAVE_ISULAD -+ lxc_log_fd = isulad_open_fifo(fname); -+#else - lxc_log_fd = log_open(fname); -+#endif - if (lxc_log_fd < 0) - return lxc_log_fd; - -@@ -694,6 +748,9 @@ int lxc_log_init(struct lxc_log *log) - - if (lxc_log_fd >= 0) { - lxc_log_category_lxc.appender = &log_appender_logfile; -+#ifdef HAVE_ISULAD -+ if (!lxc_quiet_specified && !log->quiet) -+#endif - lxc_log_category_lxc.appender->next = &log_appender_stderr; - } - -@@ -738,14 +795,9 @@ int lxc_log_syslog(int facility) - return 0; - } - --void lxc_log_syslog_enable(void) --{ -- wants_syslog = true; --} -- --void lxc_log_syslog_disable(void) -+inline void lxc_log_enable_syslog(void) - { -- wants_syslog = false; -+ syslog_enable = 1; - } - - /* -diff --git a/src/lxc/log.h b/src/lxc/log.h -index 3f91d9bc5..d28065624 100644 ---- a/src/lxc/log.h -+++ b/src/lxc/log.h -@@ -3,9 +3,6 @@ - #ifndef __LXC_LOG_H - #define __LXC_LOG_H - --#ifndef _GNU_SOURCE --#define _GNU_SOURCE 1 --#endif - #include - #include - #include -@@ -17,7 +14,6 @@ - #include - - #include "conf.h" --#include "config.h" - - #ifndef O_CLOEXEC - #define O_CLOEXEC 02000000 -@@ -392,7 +388,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ - LXC_FATAL(&locinfo, format, ##__VA_ARGS__); \ - } while (0) - --#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD -+#if HAVE_M_FORMAT - #define SYSTRACE(format, ...) \ - TRACE("%m - " format, ##__VA_ARGS__) - #else -@@ -403,7 +399,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ - } while (0) - #endif - --#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD -+#if HAVE_M_FORMAT - #define SYSDEBUG(format, ...) \ - DEBUG("%m - " format, ##__VA_ARGS__) - #else -@@ -415,7 +411,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ - #endif - - --#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD -+#if HAVE_M_FORMAT - #define SYSINFO(format, ...) \ - INFO("%m - " format, ##__VA_ARGS__) - #else -@@ -426,7 +422,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ - } while (0) - #endif - --#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD -+#if HAVE_M_FORMAT - #define SYSNOTICE(format, ...) \ - NOTICE("%m - " format, ##__VA_ARGS__) - #else -@@ -437,7 +433,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ - } while (0) - #endif - --#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD -+#if HAVE_M_FORMAT - #define SYSWARN(format, ...) \ - WARN("%m - " format, ##__VA_ARGS__) - #else -@@ -448,7 +444,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ - } while (0) - #endif - --#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD -+#if HAVE_M_FORMAT - #define SYSERROR(format, ...) \ - ERROR("%m - " format, ##__VA_ARGS__) - #else -@@ -459,7 +455,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ - } while (0) - #endif - --#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD -+#if HAVE_M_FORMAT - #define CMD_SYSERROR(format, ...) \ - fprintf(stderr, "%s: %d: %s - %m - " format "\n", __FILE__, __LINE__, \ - __func__, ##__VA_ARGS__); -@@ -472,7 +468,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ - } while (0) - #endif - --#if HAVE_M_FORMAT && !ENABLE_COVERITY_BUILD -+#if HAVE_M_FORMAT - #define CMD_SYSINFO(format, ...) \ - printf("%s: %d: %s - %m - " format "\n", __FILE__, __LINE__, __func__, \ - ##__VA_ARGS__); -@@ -563,8 +559,7 @@ __lxc_unused static inline void LXC_##LEVEL(struct lxc_log_locinfo* locinfo, \ - extern int lxc_log_fd; - - extern int lxc_log_syslog(int facility); --extern void lxc_log_syslog_enable(void); --extern void lxc_log_syslog_disable(void); -+extern void lxc_log_enable_syslog(void); - extern int lxc_log_set_level(int *dest, int level); - extern int lxc_log_get_level(void); - extern bool lxc_log_has_valid_level(void); -diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c -index 02f824f97..f251e5e7e 100644 ---- a/src/lxc/lsm/apparmor.c -+++ b/src/lxc/lsm/apparmor.c -@@ -19,7 +19,7 @@ - #include "log.h" - #include "lsm.h" - #include "parse.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "utils.h" - - lxc_log_define(apparmor, lsm); -@@ -121,8 +121,8 @@ static const char AA_PROFILE_BASE[] = - " # deny reads from debugfs\n" - " deny /sys/kernel/debug/{,**} rwklx,\n" - "\n" --" # allow paths to be made dependent, shared, private or unbindable\n" --" # TODO: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.\n" -+" # allow paths to be made slave, shared, private or unbindable\n" -+" # FIXME: This currently doesn't work due to the apparmor parser treating those as allowing all mounts.\n" - "# mount options=(rw,make-slave) -> **,\n" - "# mount options=(rw,make-rslave) -> **,\n" - "# mount options=(rw,make-shared) -> **,\n" -@@ -343,7 +343,7 @@ static const char AA_PROFILE_NESTING_BASE[] = - " mount /var/lib/lxd/shmounts/ -> /var/lib/lxd/shmounts/,\n" - " mount options=bind /var/lib/lxd/shmounts/** -> /var/lib/lxd/**,\n" - "\n" --" # TODO: There doesn't seem to be a way to ask for:\n" -+" # FIXME: There doesn't seem to be a way to ask for:\n" - " # mount options=(ro,nosuid,nodev,noexec,remount,bind),\n" - " # as we always get mount to $cdir/proc/sys with those flags denied\n" - " # So allow all mounts until that is straightened out:\n" -@@ -538,7 +538,7 @@ static inline char *apparmor_namespace(const char *ctname, const char *lxcpath) - return full; - } - --/* TODO: This is currently run only in the context of a constructor (via the -+/* FIXME: This is currently run only in the context of a constructor (via the - * initial lsm_init() called due to its __attribute__((constructor)), so we - * do not have ERROR/... macros available, so there are some fprintf(stderr)s - * in there. -@@ -560,7 +560,7 @@ static bool check_apparmor_parser_version() - lxc_pclose(parserpipe); - /* We stay silent for now as this most likely means the shell - * lxc_popen executed failed to find the apparmor_parser binary. -- * See the TODO comment above for details. -+ * See the FIXME comment above for details. - */ - return false; - } -diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h -index 630eff0b4..ec2feaa5b 100644 ---- a/src/lxc/lxc.h -+++ b/src/lxc/lxc.h -@@ -32,9 +32,14 @@ struct lxc_handler; - * @daemonize : whether or not the container is daemonized - * Returns 0 on success, < 0 otherwise - */ -+#ifdef HAVE_ISULAD -+extern int lxc_start(char *const argv[], struct lxc_handler *handler, -+ const char *lxcpath, bool daemonize, int *error_num, -+ unsigned int start_timeout); -+#else - extern int lxc_start(char *const argv[], struct lxc_handler *handler, - const char *lxcpath, bool daemonize, int *error_num); -- -+#endif - /* - * Start the specified command inside an application container - * @name : the name of the container -@@ -44,9 +49,15 @@ extern int lxc_start(char *const argv[], struct lxc_handler *handler, - * @daemonize : whether or not the container is daemonized - * Returns 0 on success, < 0 otherwise - */ -+#ifdef HAVE_ISULAD -+extern int lxc_execute(const char *name, char *const argv[], int quiet, -+ struct lxc_handler *handler, const char *lxcpath, -+ bool daemonize, int *error_num, unsigned int start_timeout); -+#else - extern int lxc_execute(const char *name, char *const argv[], int quiet, - struct lxc_handler *handler, const char *lxcpath, - bool daemonize, int *error_num); -+#endif - - /* - * Close the fd associated with the monitoring -@@ -83,6 +94,13 @@ extern lxc_state_t lxc_state(const char *name, const char *lxcpath); - */ - extern struct lxc_container *lxc_container_new(const char *name, const char *configpath); - -+#ifdef HAVE_ISULAD -+/* -+ * Create a new container without loading config. -+ */ -+extern struct lxc_container *lxc_container_without_config_new(const char *name, const char *configpath); -+#endif -+ - /* - * Returns 1 on success, 0 on failure. - */ -diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c -index aac621482..eef98df67 100644 ---- a/src/lxc/lxccontainer.c -+++ b/src/lxc/lxccontainer.c -@@ -49,7 +49,7 @@ - #include "namespace.h" - #include "network.h" - #include "parse.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "start.h" - #include "state.h" - #include "storage.h" -@@ -62,6 +62,10 @@ - #include "utils.h" - #include "version.h" - -+#ifdef HAVE_ISULAD -+#include "exec_commands.h" -+#endif -+ - #if HAVE_OPENSSL - #include - #endif -@@ -281,6 +285,13 @@ static void lxc_container_free(struct lxc_container *c) - free(c->config_path); - c->config_path = NULL; - -+#ifdef HAVE_ISULAD -+ free(c->exit_fifo); -+ c->exit_fifo = NULL; -+ free(c->ocihookfile); -+ c->ocihookfile = NULL; -+#endif -+ - free(c); - } - -@@ -505,6 +516,20 @@ static bool do_lxcapi_is_running(struct lxc_container *c) - - WRAP_API(bool, lxcapi_is_running) - -+#ifdef HAVE_ISULAD -+static bool do_lxcapi_freeze(struct lxc_container *c) -+{ -+ if (!c || !c->lxc_conf) { -+ return false; -+ } -+ -+ if (lxc_freeze(c->lxc_conf, c->name, c->config_path) < 0) { -+ return false; -+ } -+ -+ return true; -+} -+#else - static bool do_lxcapi_freeze(struct lxc_container *c) - { - lxc_state_t s; -@@ -518,9 +543,25 @@ static bool do_lxcapi_freeze(struct lxc_container *c) - - return true; - } -+#endif -+ - - WRAP_API(bool, lxcapi_freeze) - -+#ifdef HAVE_ISULAD -+static bool do_lxcapi_unfreeze(struct lxc_container *c) -+{ -+ if (!c || !c->lxc_conf) { -+ return false; -+ } -+ -+ if (lxc_unfreeze(c->lxc_conf, c->name, c->config_path) < 0) { -+ return false; -+ } -+ -+ return true; -+} -+#else - static bool do_lxcapi_unfreeze(struct lxc_container *c) - { - lxc_state_t s; -@@ -534,15 +575,16 @@ static bool do_lxcapi_unfreeze(struct lxc_container *c) - - return true; - } -+#endif - - WRAP_API(bool, lxcapi_unfreeze) - --static int do_lxcapi_console_getfd(struct lxc_container *c, int *ttynum, int *ptmxfd) -+static int do_lxcapi_console_getfd(struct lxc_container *c, int *ttynum, int *masterfd) - { - if (!c) - return -1; - -- return lxc_terminal_getfd(c, ttynum, ptmxfd); -+ return lxc_terminal_getfd(c, ttynum, masterfd); - } - - WRAP_API_2(int, lxcapi_console_getfd, int *, int *) -@@ -623,6 +665,66 @@ static bool load_config_locked(struct lxc_container *c, const char *fname) - return true; - } - -+#ifdef HAVE_ISULAD -+static bool load_ocihooks_locked(struct lxc_container *c) -+{ -+ parser_error err = NULL; -+ oci_runtime_spec_hooks *hooks = NULL; -+ -+ if (!c->lxc_conf) -+ c->lxc_conf = lxc_conf_init(); -+ -+ if (!c->lxc_conf) -+ return false; -+ -+ hooks = oci_runtime_spec_hooks_parse_file(c->ocihookfile, NULL, &err); -+ if (!hooks) { -+ fprintf(stderr, "parse oci hooks config failed: %s\n", err); -+ free(err); -+ return true; -+ } -+ c->lxc_conf->ocihooks = hooks; -+ -+ if (err) -+ free(err); -+ return true; -+} -+ -+/* -+ * isulad: set oci hook file path -+ * */ -+static bool set_oci_hook_config_filename(struct lxc_container *c) -+{ -+#define OCI_HOOK_JSON_FILE_NAME "ocihooks.json" -+ char *newpath = NULL; -+ int len, ret; -+ -+ if (!c->config_path) -+ return false; -+ -+ /* $lxc_path + "/" + c->name + "/" + "config" + '\0' */ -+ if (strlen(c->config_path) + strlen(c->name) > SIZE_MAX - strlen(OCI_HOOK_JSON_FILE_NAME) - 3) -+ return false; -+ len = strlen(c->config_path) + strlen(c->name) + strlen(OCI_HOOK_JSON_FILE_NAME) + 3; -+ -+ newpath = malloc(len); -+ if (newpath == NULL) -+ return false; -+ -+ ret = snprintf(newpath, len, "%s/%s/%s", c->config_path, c->name, OCI_HOOK_JSON_FILE_NAME); -+ if (ret < 0 || ret >= len) { -+ fprintf(stderr, "Error printing out config file name\n"); -+ free(newpath); -+ return false; -+ } -+ -+ free(c->ocihookfile); -+ c->ocihookfile = newpath; -+ -+ return true; -+} -+#endif -+ - static bool do_lxcapi_load_config(struct lxc_container *c, const char *alt_file) - { - int lret; -@@ -656,6 +758,11 @@ static bool do_lxcapi_load_config(struct lxc_container *c, const char *alt_file) - - ret = load_config_locked(c, fname); - -+#ifdef HAVE_ISULAD -+ if (ret && file_exists(c->ocihookfile)) -+ ret = load_ocihooks_locked(c); -+#endif -+ - if (need_disklock) - container_disk_unlock(c); - else -@@ -830,12 +937,14 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid) - DEBUG("First child %d exited", pid); - - /* Close write end of the socket pair. */ -- close_prot_errno_disarm(handler->state_socket_pair[1]); -+ close(handler->state_socket_pair[1]); -+ handler->state_socket_pair[1] = -1; - - state = lxc_rcv_status(handler->state_socket_pair[0]); - - /* Close read end of the socket pair. */ -- close_prot_errno_disarm(handler->state_socket_pair[0]); -+ close(handler->state_socket_pair[0]); -+ handler->state_socket_pair[0] = -1; - - if (state < 0) { - SYSERROR("Failed to receive the container state"); -@@ -855,6 +964,33 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid) - return true; - } - -+#ifdef HAVE_ISULAD -+/* isulad: use init argv as init cmd */ -+static char **use_init_args(char **init_argv, size_t init_args) -+{ -+ size_t i; -+ int nargs = 0; -+ char **argv; -+ -+ if (!init_argv) -+ return NULL; -+ -+ do { -+ argv = malloc(sizeof(char *)); -+ } while (!argv); -+ -+ argv[0] = NULL; -+ for (i = 0; i < init_args; i++) -+ push_arg(&argv, init_argv[i], &nargs); -+ -+ if (nargs == 0) { -+ free(argv); -+ return NULL; -+ } -+ return argv; -+} -+#endif -+ - static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const argv[]) - { - int ret; -@@ -865,6 +1001,13 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - NULL, - }; - char **init_cmd = NULL; -+#ifdef HAVE_ISULAD -+ int keepfds[] = {-1, -1, -1, -1, -1}; -+ ssize_t size_read; -+ char errbuf[BUFSIZ + 1] = {0}; -+#else -+ int keepfds[3] = {-1, -1, -1}; -+#endif - - /* container does exist */ - if (!c) -@@ -898,7 +1041,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - conf = c->lxc_conf; - - /* initialize handler */ -- handler = lxc_init_handler(NULL, c->name, conf, c->config_path, c->daemonize); -+ handler = lxc_init_handler(c->name, conf, c->config_path, c->daemonize); - - container_mem_unlock(c); - if (!handler) -@@ -911,11 +1054,17 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - argv = init_cmd = split_init_cmd(conf->init_cmd); - } - -+#ifdef HAVE_ISULAD -+ if (!argv) { -+ argv = init_cmd = use_init_args(conf->init_argv, conf->init_argc); -+ } -+#endif -+ - /* ... otherwise use default_args. */ - if (!argv) { - if (useinit) { - ERROR("No valid init detected"); -- lxc_put_handler(handler); -+ lxc_free_handler(handler); - return false; - } - argv = default_args; -@@ -930,10 +1079,23 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - char title[2048]; - pid_t pid_first, pid_second; - -+#ifdef HAVE_ISULAD -+ //isulad: pipdfd for get error message of child or grandchild process. -+ if (pipe2(conf->errpipe, O_CLOEXEC) != 0) { -+ SYSERROR("Failed to init errpipe"); -+ free_init_cmd(init_cmd); -+ lxc_free_handler(handler); -+ return false; -+ } -+#endif -+ - pid_first = fork(); - if (pid_first < 0) { - free_init_cmd(init_cmd); -- lxc_put_handler(handler); -+ lxc_free_handler(handler); -+#ifdef HAVE_ISULAD -+ lxc_close_error_pipe(conf->errpipe); -+#endif - return false; - } - -@@ -943,14 +1105,28 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - * the PID file, child will do the free and unlink. - */ - c->pidfile = NULL; -+#ifdef HAVE_ISULAD -+ close(conf->errpipe[1]); -+ conf->errpipe[1] = -1; -+#endif - - /* Wait for container to tell us whether it started - * successfully. - */ - started = wait_on_daemonized_start(handler, pid_first); -+#ifdef HAVE_ISULAD -+ if (!started) { -+ size_read = read(conf->errpipe[0], errbuf, BUFSIZ); -+ if (size_read > 0) { -+ conf->errmsg = safe_strdup(errbuf); -+ } -+ } -+ close(conf->errpipe[0]); -+ conf->errpipe[0] = -1; -+#endif - - free_init_cmd(init_cmd); -- lxc_put_handler(handler); -+ lxc_free_handler(handler); - return started; - } - -@@ -982,7 +1158,10 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - /* second parent */ - if (pid_second != 0) { - free_init_cmd(init_cmd); -- lxc_put_handler(handler); -+ lxc_free_handler(handler); -+#ifdef HAVE_ISULAD -+ lxc_close_error_pipe(conf->errpipe); -+#endif - _exit(EXIT_SUCCESS); - } - -@@ -995,7 +1174,16 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - _exit(EXIT_FAILURE); - } - -- ret = inherit_fds(handler, true); -+ keepfds[0] = handler->conf->maincmd_fd; -+ keepfds[1] = handler->state_socket_pair[0]; -+ keepfds[2] = handler->state_socket_pair[1]; -+#ifdef HAVE_ISULAD -+ keepfds[4] = conf->errpipe[1]; -+ close(conf->errpipe[0]); -+ conf->errpipe[0] = -1; -+#endif -+ ret = lxc_check_inherited(conf, true, keepfds, -+ sizeof(keepfds) / sizeof(keepfds[0])); - if (ret < 0) - _exit(EXIT_FAILURE); - -@@ -1013,7 +1201,7 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - } else if (!am_single_threaded()) { - ERROR("Cannot start non-daemonized container when threaded"); - free_init_cmd(init_cmd); -- lxc_put_handler(handler); -+ lxc_free_handler(handler); - return false; - } - -@@ -1027,7 +1215,10 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - w = snprintf(pidstr, sizeof(pidstr), "%d", lxc_raw_getpid()); - if (w < 0 || (size_t)w >= sizeof(pidstr)) { - free_init_cmd(init_cmd); -- lxc_put_handler(handler); -+ lxc_free_handler(handler); -+#ifdef HAVE_ISULAD -+ lxc_close_error_pipe(conf->errpipe); -+#endif - - SYSERROR("Failed to write monitor pid to \"%s\"", c->pidfile); - -@@ -1040,7 +1231,10 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - ret = lxc_write_to_file(c->pidfile, pidstr, w, false, 0600); - if (ret < 0) { - free_init_cmd(init_cmd); -- lxc_put_handler(handler); -+ lxc_free_handler(handler); -+#ifdef HAVE_ISULAD -+ lxc_close_error_pipe(conf->errpipe); -+#endif - - SYSERROR("Failed to write monitor pid to \"%s\"", c->pidfile); - -@@ -1051,6 +1245,19 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - } - } - -+#ifdef HAVE_ISULAD -+ /* isulad: open exit fifo */ -+ if (c->exit_fifo) { -+ conf->exit_fd = lxc_open(c->exit_fifo, O_WRONLY | O_NONBLOCK | O_CLOEXEC, 0); -+ if (conf->exit_fd < 0) { -+ ERROR("Failed to open exit fifo %s: %s.", c->exit_fifo, strerror(errno)); -+ lxc_free_handler(handler); -+ ret = 1; -+ goto on_error; -+ } -+ } -+#endif -+ - conf->reboot = REBOOT_NONE; - - /* Unshare the mount namespace if requested */ -@@ -1058,15 +1265,15 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - ret = unshare(CLONE_NEWNS); - if (ret < 0) { - SYSERROR("Failed to unshare mount namespace"); -- lxc_put_handler(handler); -+ lxc_free_handler(handler); - ret = 1; - goto on_error; - } - - ret = mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL); - if (ret < 0) { -- SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing..."); -- lxc_put_handler(handler); -+ SYSERROR("Failed to make / rslave at startup"); -+ lxc_free_handler(handler); - ret = 1; - goto on_error; - } -@@ -1075,26 +1282,55 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - reboot: - if (conf->reboot == REBOOT_INIT) { - /* initialize handler */ -- handler = lxc_init_handler(handler, c->name, conf, c->config_path, c->daemonize); -+ handler = lxc_init_handler(c->name, conf, c->config_path, c->daemonize); - if (!handler) { - ret = 1; - goto on_error; - } - } - -- ret = inherit_fds(handler, c->daemonize); -+ keepfds[0] = handler->conf->maincmd_fd; -+ keepfds[1] = handler->state_socket_pair[0]; -+ keepfds[2] = handler->state_socket_pair[1]; -+ -+#ifdef HAVE_ISULAD -+ /* keep exit fifo fd */ -+ if (conf->exit_fd >= 0) { -+ keepfds[3] = conf->exit_fd; -+ } -+ /* isulad: keep errpipe fd */ -+ if (c->daemonize) -+ keepfds[4] = conf->errpipe[1]; -+#endif -+ -+ ret = lxc_check_inherited(conf, c->daemonize, keepfds, -+ sizeof(keepfds) / sizeof(keepfds[0])); - if (ret < 0) { -- lxc_put_handler(handler); -+ lxc_free_handler(handler); - ret = 1; - goto on_error; - } - -- if (useinit) -+#ifdef HAVE_ISULAD -+ if (useinit) { -+ ret = lxc_execute(c->name, argv, 1, handler, c->config_path, -+ c->daemonize, &c->error_num, c->start_timeout); -+ } -+ else { -+ handler->disable_pty = c->disable_pty; -+ handler->open_stdin = c->open_stdin; -+ ret = lxc_start(argv, handler, c->config_path, c->daemonize, -+ &c->error_num, c->start_timeout); -+#else -+ if (useinit) { - ret = lxc_execute(c->name, argv, 1, handler, c->config_path, - c->daemonize, &c->error_num); -- else -+ } -+ else { - ret = lxc_start(argv, handler, c->config_path, c->daemonize, - &c->error_num); -+#endif -+ } - - if (conf->reboot == REBOOT_REQ) { - INFO("Container requested reboot"); -@@ -1185,6 +1421,7 @@ WRAP_API(bool, lxcapi_stop) - - static int do_create_container_dir(const char *path, struct lxc_conf *conf) - { -+ __do_free char *p = NULL; - int lasterr; - int ret = -1; - -@@ -1200,8 +1437,10 @@ static int do_create_container_dir(const char *path, struct lxc_conf *conf) - ret = 0; - } - -+ p = must_copy_string(path); -+ - if (!lxc_list_empty(&conf->id_map)) { -- ret = chown_mapped_root(path, conf); -+ ret = chown_mapped_root(p, conf); - if (ret < 0) - ret = -1; - } -@@ -1345,8 +1584,14 @@ static bool create_run_template(struct lxc_container *c, char *tpath, - _exit(EXIT_FAILURE); - } - -- if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) -- SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing..."); -+ ret = detect_shared_rootfs(); -+ if (ret == 1) { -+ ret = mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL); -+ if (ret < 0) { -+ SYSERROR("Failed to make \"/\" rslave"); -+ ERROR("Continuing..."); -+ } -+ } - } - - if (strcmp(bdev->type, "dir") != 0 && strcmp(bdev->type, "btrfs") != 0) { -@@ -2048,7 +2293,12 @@ WRAP_API_1(bool, lxcapi_reboot2, int) - static bool do_lxcapi_shutdown(struct lxc_container *c, int timeout) - { - __do_close int pidfd = -EBADF, state_client_fd = -EBADF; -+#ifdef HAVE_ISULAD -+ // isulad: keep default signal the same as docker -+ int haltsignal = SIGTERM; -+#else - int haltsignal = SIGPWR; -+#endif - pid_t pid = -1; - lxc_state_t states[MAX_STATE] = {0}; - int killret, ret; -@@ -2064,12 +2314,13 @@ static bool do_lxcapi_shutdown(struct lxc_container *c, int timeout) - if (pid <= 0) - return true; - -- /* Detect whether we should send SIGRTMIN + 3 (e.g. systemd). */ - if (c->lxc_conf && c->lxc_conf->haltsignal) - haltsignal = c->lxc_conf->haltsignal; -+#ifndef HAVE_ISULAD -+ /* Detect whether we should send SIGRTMIN + 3 (e.g. systemd). */ - else if (task_blocks_signal(pid, (SIGRTMIN + 3))) - haltsignal = (SIGRTMIN + 3); -- -+#endif - - /* - * Add a new state client before sending the shutdown signal so -@@ -2090,41 +2341,41 @@ static bool do_lxcapi_shutdown(struct lxc_container *c, int timeout) - - if (ret < MAX_STATE) - return false; -- } - -- if (pidfd >= 0) { -- struct pollfd pidfd_poll = { -- .events = POLLIN, -- .fd = pidfd, -- }; -+ if (pidfd >= 0) { -+ struct pollfd pidfd_poll = { -+ .events = POLLIN, -+ .fd = pidfd, -+ }; - -- killret = lxc_raw_pidfd_send_signal(pidfd, haltsignal, -- NULL, 0); -- if (killret < 0) -- return log_warn(false, "Failed to send signal %d to pidfd %d", -- haltsignal, pidfd); -+ killret = lxc_raw_pidfd_send_signal(pidfd, haltsignal, -+ NULL, 0); -+ if (killret < 0) -+ return log_warn(false, "Failed to send signal %d to pidfd %d", -+ haltsignal, pidfd); - -- TRACE("Sent signal %d to pidfd %d", haltsignal, pidfd); -+ TRACE("Sent signal %d to pidfd %d", haltsignal, pidfd); - -- /* -- * No need for going through all of the state server -- * complications anymore. We can just poll on pidfds. :) -- */ -+ /* -+ * No need for going through all of the state server -+ * complications anymore. We can just poll on pidfds. :) -+ */ - -- if (timeout != 0) { -- ret = poll(&pidfd_poll, 1, timeout * 1000); -- if (ret < 0 || !(pidfd_poll.revents & POLLIN)) -- return false; -+ if (timeout != 0) { -+ ret = poll(&pidfd_poll, 1, timeout * 1000); -+ if (ret < 0 || !(pidfd_poll.revents & POLLIN)) -+ return false; - -- TRACE("Pidfd polling detected container exit"); -- } -- } else { -- killret = kill(pid, haltsignal); -- if (killret < 0) -- return log_warn(false, "Failed to send signal %d to pid %d", -- haltsignal, pid); -+ TRACE("Pidfd polling detected container exit"); -+ } -+ } else { -+ killret = kill(pid, haltsignal); -+ if (killret < 0) -+ return log_warn(false, "Failed to send signal %d to pid %d", -+ haltsignal, pid); - -- TRACE("Sent signal %d to pid %d", haltsignal, pid); -+ TRACE("Sent signal %d to pid %d", haltsignal, pid); -+ } - } - - if (timeout == 0) -@@ -2948,8 +3199,19 @@ static bool container_destroy(struct lxc_container *c, - bool bret = false; - int ret = 0; - -+#ifdef HAVE_ISULAD -+ if (!c) -+ return false; -+ // isulad: if container is not defined, we need to remove disk lock file -+ // which is created in lxc_container_new. -+ if (!do_lxcapi_is_defined(c)) { -+ container_disk_removelock(c); -+ return false; -+ } -+#else - if (!c || !do_lxcapi_is_defined(c)) - return false; -+#endif - - conf = c->lxc_conf; - if (container_disk_lock(c)) -@@ -3069,6 +3331,15 @@ static bool container_destroy(struct lxc_container *c, - if (ret < 0) { - ERROR("Failed to destroy directory \"%s\" for \"%s\"", path, - c->name); -+#ifdef HAVE_ISULAD -+ char msg[BUFSIZ] = { 0 }; -+ ret = snprintf(msg, BUFSIZ, "Failed to destroy directory \"%s\": %s", path, errno ? strerror(errno) : "error"); -+ if (ret < 0 || ret >= BUFSIZ) { -+ ERROR("Sprintf failed"); -+ goto out; -+ } -+ c->error_string = safe_strdup(msg); -+#endif - goto out; - } - INFO("Destroyed directory \"%s\" for \"%s\"", path, c->name); -@@ -3081,13 +3352,23 @@ out: - free(path); - - container_disk_unlock(c); -+#ifdef HAVE_ISULAD -+ if (bret && container_disk_removelock(c)) { -+ bret = false; -+ } -+#endif - return bret; - } - - static bool do_lxcapi_destroy(struct lxc_container *c) - { -+#ifdef HAVE_ISULAD -+ if (!c) -+ return false; -+#else - if (!c || !lxcapi_is_defined(c)) - return false; -+#endif - - if (c->lxc_conf && c->lxc_conf->rootfs.managed) { - if (has_snapshots(c)) { -@@ -3665,8 +3946,12 @@ static int clone_update_rootfs(struct clone_update_data *data) - return -1; - } - -- if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) -- SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing..."); -+ if (detect_shared_rootfs()) { -+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) { -+ SYSERROR("Failed to make / rslave"); -+ ERROR("Continuing..."); -+ } -+ } - - if (bdev->ops->mount(bdev) < 0) { - storage_put(bdev); -@@ -4030,8 +4315,13 @@ static int lxcapi_attach(struct lxc_container *c, - - current_config = c->lxc_conf; - -+#ifdef HAVE_ISULAD -+ ret = lxc_attach(c, exec_function, exec_payload, options, -+ attached_process, &c->lxc_conf->errmsg); -+#else - ret = lxc_attach(c, exec_function, exec_payload, options, - attached_process); -+#endif - current_config = NULL; - return ret; - } -@@ -4051,7 +4341,11 @@ static int do_lxcapi_attach_run_wait(struct lxc_container *c, - command.program = (char *)program; - command.argv = (char **)argv; - -+#ifdef HAVE_ISULAD -+ ret = lxc_attach(c, lxc_attach_run_command, &command, options, &pid, NULL); -+#else - ret = lxc_attach(c, lxc_attach_run_command, &command, options, &pid); -+#endif - if (ret < 0) - return ret; - -@@ -5230,7 +5524,223 @@ static int do_lxcapi_seccomp_notify_fd(struct lxc_container *c) - - WRAP_API(int, lxcapi_seccomp_notify_fd) - -+#ifdef HAVE_ISULAD -+/* isulad add set console fifos*/ -+static bool do_lxcapi_set_terminal_default_fifos(struct lxc_container *c, const char *in, const char *out, const char *err) -+{ -+ struct lxc_conf *conf = NULL; -+ -+ if (!c || !c->lxc_conf) -+ return false; -+ if (container_mem_lock(c)) { -+ ERROR("Error getting mem lock"); -+ return false; -+ } -+ -+ conf = c->lxc_conf; -+ if (in) { -+ if (conf->console.init_fifo[0]) -+ free(conf->console.init_fifo[0]); -+ conf->console.init_fifo[0] = safe_strdup(in); -+ } -+ if (out) { -+ if (conf->console.init_fifo[1]) -+ free(conf->console.init_fifo[1]); -+ conf->console.init_fifo[1] = safe_strdup(out); -+ } -+ if (err) { -+ if (conf->console.init_fifo[2]) -+ free(conf->console.init_fifo[2]); -+ conf->console.init_fifo[2] = safe_strdup(err); -+ } -+ -+ container_mem_unlock(c); -+ return true; -+} -+ -+WRAP_API_3(bool, lxcapi_set_terminal_default_fifos, const char *, const char *, const char *) -+ -+/* isulad add set info file path */ -+static bool do_lxcapi_set_container_info_file(struct lxc_container *c, const char *info_file) -+{ -+ struct lxc_conf *conf = NULL; -+ -+ if (!c || !c->lxc_conf || !info_file) -+ return false; -+ if (container_mem_lock(c)) { -+ ERROR("Error getting mem lock"); -+ return false; -+ } -+ -+ conf = c->lxc_conf; -+ if (conf->container_info_file) -+ free(conf->container_info_file); -+ conf->container_info_file = safe_strdup(info_file); -+ -+ container_mem_unlock(c); -+ return true; -+} -+ -+WRAP_API_1(bool, lxcapi_set_container_info_file, const char *) -+ -+static bool do_lxcapi_want_disable_pty(struct lxc_container *c, bool state) -+{ -+ if (!c || !c->lxc_conf) -+ return false; -+ -+ if (container_mem_lock(c)) -+ return false; -+ -+ c->disable_pty = state; -+ -+ container_mem_unlock(c); -+ -+ return true; -+} -+ -+WRAP_API_1(bool, lxcapi_want_disable_pty, bool) -+ -+static bool do_lxcapi_want_open_stdin(struct lxc_container *c, bool state) -+{ -+ if (!c || !c->lxc_conf) -+ return false; -+ -+ if (container_mem_lock(c)) -+ return false; -+ -+ c->open_stdin = state; -+ -+ container_mem_unlock(c); -+ -+ return true; -+} -+ -+WRAP_API_1(bool, lxcapi_want_open_stdin, bool) -+ -+/* isulad add clean resources */ -+static bool do_lxcapi_add_terminal_fifo(struct lxc_container *c, const char *in_fifo, const char *out_fifo, const char *err_fifo) -+{ -+ bool ret = true; -+ -+ if (!c || !c->lxc_conf) -+ return false; -+ if (container_mem_lock(c)) { -+ ERROR("Error getting mem lock"); -+ return false; -+ } -+ -+ if (lxc_cmd_set_terminal_fifos(c->name, c->config_path, in_fifo, out_fifo, err_fifo)) { -+ ERROR("Error set console fifos"); -+ ret = false; -+ } -+ -+ container_mem_unlock(c); -+ return ret; -+} -+ -+WRAP_API_3(bool, lxcapi_add_terminal_fifo, const char *, const char *, const char *) -+ -+static bool do_lxcapi_set_terminal_winch(struct lxc_container *c, unsigned int height, unsigned int width) -+{ -+ bool ret = true; -+ -+ if (!c || !c->lxc_conf) -+ return false; -+ if (container_mem_lock(c)) { -+ ERROR("Error getting mem lock"); -+ return false; -+ } -+ -+ if (lxc_cmd_set_terminal_winch(c->name, c->config_path, height, width)) { -+ ERROR("Error set terminal winch"); -+ ret = false; -+ } -+ -+ container_mem_unlock(c); -+ return ret; -+} -+ -+WRAP_API_2(bool, lxcapi_set_terminal_winch, unsigned int, unsigned int) -+ -+static bool do_lxcapi_set_exec_terminal_winch(struct lxc_container *c, const char *suffix, unsigned int height, unsigned int width) -+{ -+ bool ret = true; -+ -+ if (!c || !c->lxc_conf) -+ return false; -+ if (container_mem_lock(c)) { -+ ERROR("Error getting mem lock"); -+ return false; -+ } -+ -+ if (lxc_exec_cmd_set_terminal_winch(c->name, c->config_path, suffix, height, width)) { -+ ERROR("Error set terminal winch"); -+ ret = false; -+ } -+ -+ container_mem_unlock(c); -+ return ret; -+} -+ -+WRAP_API_3(bool, lxcapi_set_exec_terminal_winch, const char *, unsigned int, unsigned int) -+ -+/* isulad add clean resources */ -+static bool do_lxcapi_clean_container_resource(struct lxc_container *c, pid_t pid) -+{ -+ int ret; -+ -+ if (!c) -+ return false; -+ -+ ret = do_lxcapi_clean_resource(c->name, c->config_path, c->lxc_conf, pid); -+ if (ret) -+ ERROR("Failed to clean container %s resource", c->name); -+ return ret == 0; -+ -+} -+ -+WRAP_API_1(bool, lxcapi_clean_container_resource, pid_t) -+ -+/* isulad get coantainer pids */ -+static bool do_lxcapi_get_container_pids(struct lxc_container *c, pid_t **pids,size_t *pids_len) -+{ -+ int ret; -+ -+ if (!c) -+ return false; -+ -+ ret = do_lxcapi_get_pids(c->name, c->config_path, c->lxc_conf, pids,pids_len); -+ if (ret) -+ ERROR("Failed to get container %s pids", c->name); -+ return ret == 0; -+ -+} -+ -+WRAP_API_2(bool, lxcapi_get_container_pids, pid_t **,size_t *) -+ -+/* isulad add start timeout */ -+static bool do_lxcapi_set_start_timeout(struct lxc_container *c, unsigned int start_timeout) -+{ -+ if (!c || !c->lxc_conf) -+ return false; -+ if (container_mem_lock(c)) { -+ ERROR("Error getting mem lock"); -+ return false; -+ } -+ c->start_timeout = start_timeout; -+ container_mem_unlock(c); -+ return true; -+} -+ -+WRAP_API_1(bool, lxcapi_set_start_timeout, unsigned int) -+ -+#endif -+ -+#ifdef HAVE_ISULAD -+static struct lxc_container *do_lxc_container_new(const char *name, const char *configpath, bool load_config) -+#else - struct lxc_container *lxc_container_new(const char *name, const char *configpath) -+#endif - { - struct lxc_container *c; - size_t len; -@@ -5283,10 +5793,24 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath - goto err; - } - -+#ifdef HAVE_ISULAD -+ if (!set_oci_hook_config_filename(c)) { -+ fprintf(stderr, "Error allocating oci hooks file pathname\n"); -+ goto err; -+ } -+ -+ if (load_config && file_exists(c->configfile)) { -+ if (!lxcapi_load_config(c, NULL)) { -+ fprintf(stderr, "Failed to load config for %s\n", name); -+ goto err; -+ } -+ } -+#else - if (file_exists(c->configfile) && !lxcapi_load_config(c, NULL)) { - fprintf(stderr, "Failed to load config for %s\n", name); - goto err; - } -+#endif - - rc = ongoing_create(c); - switch (rc) { -@@ -5371,7 +5895,18 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath - c->mount = lxcapi_mount; - c->umount = lxcapi_umount; - c->seccomp_notify_fd = lxcapi_seccomp_notify_fd; -- -+#ifdef HAVE_ISULAD -+ c->set_container_info_file = lxcapi_set_container_info_file; -+ c->set_terminal_init_fifos = lxcapi_set_terminal_default_fifos; -+ c->add_terminal_fifos = lxcapi_add_terminal_fifo; -+ c->set_terminal_winch = lxcapi_set_terminal_winch; -+ c->set_exec_terminal_winch = lxcapi_set_exec_terminal_winch; -+ c->want_disable_pty = lxcapi_want_disable_pty; -+ c->want_open_stdin = lxcapi_want_open_stdin; -+ c->clean_container_resource = lxcapi_clean_container_resource; -+ c->get_container_pids = lxcapi_get_container_pids; -+ c->set_start_timeout = lxcapi_set_start_timeout; -+#endif - return c; - - err: -@@ -5379,6 +5914,19 @@ err: - return NULL; - } - -+#ifdef HAVE_ISULAD -+// isulad: new container without load config to save time -+struct lxc_container *lxc_container_without_config_new(const char *name, const char *configpath) -+{ -+ return do_lxc_container_new(name, configpath, false); -+} -+ -+struct lxc_container *lxc_container_new(const char *name, const char *configpath) -+{ -+ return do_lxc_container_new(name, configpath, true); -+} -+#endif -+ - int lxc_get_wait_states(const char **states) - { - int i; -@@ -5557,11 +6105,21 @@ int list_active_containers(const char *lxcpath, char ***nret, - continue; - } - -+#ifdef HAVE_ISULAD -+ if (ct_name && ct_name_cnt) { -+ if (array_contains(&ct_name, p, ct_name_cnt)) { -+ if (is_hashed) -+ free(p); -+ continue; -+ } -+ } -+#else - if (array_contains(&ct_name, p, ct_name_cnt)) { - if (is_hashed) - free(p); - continue; - } -+#endif - - if (!add_to_array(&ct_name, p, ct_name_cnt)) { - if (is_hashed) -diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h -index b4ec1d6d5..2951ac7b4 100644 ---- a/src/lxc/lxccontainer.h -+++ b/src/lxc/lxccontainer.h -@@ -90,7 +90,7 @@ struct lxc_container { - * \private - * Container configuration. - * -- * \internal TODO: do we want the whole lxc_handler? -+ * \internal FIXME: do we want the whole lxc_handler? - */ - struct lxc_conf *lxc_conf; - -@@ -107,6 +107,30 @@ struct lxc_container { - /*! Full path to configuration file */ - char *config_path; - -+ /*! isulad: -+ * \private -+ * exit FIFO File to open used monitor the state of lxc monitor process. -+ */ -+ char *exit_fifo; -+ /*! Whether container wishes to create pty or pipes for console log */ -+ bool disable_pty; -+ -+ /*! Whether container wishes to keep stdin active */ -+ bool open_stdin; -+ -+ /*! -+ * \private -+ * isulad: support oci hook from json file -+ * full path of json file -+ * */ -+ char *ocihookfile; -+ -+ /*! isulad: -+ * \private -+ * start_timeout. -+ */ -+ unsigned int start_timeout; -+ - /*! - * \brief Determine if \c /var/lib/lxc/$name/config exists. - * -@@ -563,7 +587,7 @@ struct lxc_container { - * \param c Container. - * \param[in,out] ttynum Terminal number to attempt to allocate, - * or \c -1 to allocate the first available tty. -- * \param[out] ptmxfd File descriptor referring to the ptmx side of the pty. -+ * \param[out] masterfd File descriptor referring to the master side of the pty. - * - * \return tty file descriptor number on success, or \c -1 on - * failure. -@@ -575,7 +599,7 @@ struct lxc_container { - * descriptor when no longer required so that it may be allocated - * by another caller. - */ -- int (*console_getfd)(struct lxc_container *c, int *ttynum, int *ptmxfd); -+ int (*console_getfd)(struct lxc_container *c, int *ttynum, int *masterfd); - - /*! - * \brief Allocate and run a console tty. -@@ -865,6 +889,93 @@ struct lxc_container { - * \return pidfd of init process of the container. - */ - int (*init_pidfd)(struct lxc_container *c); -+ -+ /*! isulad add -+ * \brief An API call to set the path of info file -+ * -+ * \param c Container. -+ * \param info_file Value of the path of info file. -+ * -+ * \return \c true on success, else \c false. -+ */ -+ bool (*set_container_info_file) (struct lxc_container *c, const char *info_file); -+ -+ /*! isulad add -+ * \brief An API call to change the path of the console default fifos -+ * -+ * \param c Container. -+ * \param path Value of the console path. -+ * -+ * \return \c true on success, else \c false. -+ */ -+ bool (*set_terminal_init_fifos)(struct lxc_container *c, const char *in, const char *out, const char *err); -+ -+ /*! isulad add -+ * \brief An API call to add the path of terminal fifos -+ * -+ * \param c Container. -+ * \param path Value of the console path.. -+ * -+ * \return \c true on success, else \c false. -+ */ -+ bool (*add_terminal_fifos)(struct lxc_container *c, const char *in, const char *out, const char *err); -+ -+ bool (*set_terminal_winch)(struct lxc_container *c, unsigned int height, unsigned int width); -+ -+ bool (*set_exec_terminal_winch)(struct lxc_container *c, const char *suffix, unsigned int height, unsigned int width); -+ -+ /*! -+ * \brief Change whether the container wants to create pty or pipes -+ * from the console log. -+ * -+ * \param c Container. -+ * \param state Value for the disable pty bit (0 or 1). -+ * -+ * \return \c true on success, else \c false. -+ */ -+ bool (*want_disable_pty)(struct lxc_container *c, bool state); -+ -+ /*! -+ * \brief Change whether the container wants to keep stdin active -+ * for parent process of container -+ * -+ * \param c Container. -+ * \param state Value for the open_stdin bit (0 or 1). -+ * -+ * \return \c true on success, else \c false. -+ */ -+ bool (*want_open_stdin)(struct lxc_container *c, bool state); -+ -+ /*! isulad add -+ * \brief An API call to clean resources of container -+ * -+ * \param c Container. -+ * \param pid Value of container process. -+ * -+ * \return \c true on success, else \c false. -+ */ -+ bool (*clean_container_resource) (struct lxc_container *c, pid_t pid); -+ -+ /*! isulad add -+ * \brief An API call to get container pids -+ * -+ * \param c Container. -+ * \param pids Value of container pids. -+ * \param pids_len Value of container pids len. -+ * \param pid Value of container pid. -+ * \return \c true on success, else \c false. -+ */ -+ bool (*get_container_pids)(struct lxc_container *c,pid_t **pids,size_t *pids_len); -+ -+ /*! isulad add -+ * \brief An API call to set start timeout -+ * -+ * \param c Container. -+ * \param start_timeout Value of start timeout. -+ * -+ * \return \c true on success, else \c false. -+ */ -+ bool (*set_start_timeout)(struct lxc_container *c, unsigned int start_timeout); - }; - - /*! -@@ -998,6 +1109,18 @@ struct lxc_console_log { - */ - struct lxc_container *lxc_container_new(const char *name, const char *configpath); - -+/*! -+ * \brief Create a new container without loading config. -+ * -+ * \param name Name to use for container. -+ * \param configpath Full path to configuration file to use. -+ * -+ * \return Newly-allocated container, or \c NULL on error. -+ * -+ * \note This function can only used for listing container. -+ */ -+struct lxc_container *lxc_container_without_config_new(const char *name, const char *configpath); -+ - /*! - * \brief Add a reference to the specified container. - * -diff --git a/src/lxc/lxclock.c b/src/lxc/lxclock.c -index 318e5bf5a..bb0dca0c9 100644 ---- a/src/lxc/lxclock.c -+++ b/src/lxc/lxclock.c -@@ -370,3 +370,30 @@ void container_disk_unlock(struct lxc_container *c) - lxcunlock(c->slock); - lxcunlock(c->privlock); - } -+ -+#ifdef HAVE_ISULAD -+static int lxc_removelock(struct lxc_lock *l) -+{ -+ int ret = 0; -+ -+ if (l->type == LXC_LOCK_FLOCK) { -+ ret = unlink(l->u.f.fname); -+ if (ret && errno != ENOENT) { -+ SYSERROR("Error unlink %s", l->u.f.fname); -+ return ret; -+ } -+ } -+ -+ return ret; -+} -+ -+int container_disk_removelock(struct lxc_container *c) -+{ -+ int ret; -+ -+ ret = lxc_removelock(c->slock); -+ if (ret) -+ return ret; -+ return lxc_removelock(c->privlock); -+} -+#endif -diff --git a/src/lxc/lxclock.h b/src/lxc/lxclock.h -index 9f9bc3bf6..6a71d7c5e 100644 ---- a/src/lxc/lxclock.h -+++ b/src/lxc/lxclock.h -@@ -154,4 +154,8 @@ extern int container_disk_lock(struct lxc_container *c); - */ - extern void container_disk_unlock(struct lxc_container *c); - -+#ifdef HAVE_ISULAD -+int container_disk_removelock(struct lxc_container *c); -+#endif -+ - #endif -diff --git a/src/lxc/macro.h b/src/lxc/macro.h -index 7b2ad79ed..3df19d6d3 100644 ---- a/src/lxc/macro.h -+++ b/src/lxc/macro.h -@@ -57,20 +57,6 @@ - #define CAP_SETGID 6 - #endif - --/* move_mount */ --#ifndef MOVE_MOUNT_F_EMPTY_PATH --#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ --#endif -- --/* open_tree */ --#ifndef OPEN_TREE_CLONE --#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ --#endif -- --#ifndef OPEN_TREE_CLOEXEC --#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ --#endif -- - /* prctl */ - #ifndef PR_CAPBSET_READ - #define PR_CAPBSET_READ 23 -@@ -433,9 +419,6 @@ enum { - - #define PTR_TO_UINT64(p) ((uint64_t)((intptr_t)(p))) - --#define UINT_TO_PTR(u) ((void *) ((uintptr_t) (u))) --#define PTR_TO_USHORT(p) ((unsigned short)((uintptr_t)(p))) -- - #define LXC_INVALID_UID ((uid_t)-1) - #define LXC_INVALID_GID ((gid_t)-1) - -diff --git a/src/lxc/mainloop.c b/src/lxc/mainloop.c -index d5ae2a67a..6d4c5935a 100644 ---- a/src/lxc/mainloop.c -+++ b/src/lxc/mainloop.c -@@ -59,10 +59,8 @@ int lxc_mainloop(struct lxc_epoll_descr *descr, int timeout_ms) - } - } - --int lxc_mainloop_add_handler_events(struct lxc_epoll_descr *descr, int fd, -- int events, -- lxc_mainloop_callback_t callback, -- void *data) -+int lxc_mainloop_add_handler(struct lxc_epoll_descr *descr, int fd, -+ lxc_mainloop_callback_t callback, void *data) - { - __do_free struct mainloop_handler *handler = NULL; - __do_free struct lxc_list *item = NULL; -@@ -79,7 +77,7 @@ int lxc_mainloop_add_handler_events(struct lxc_epoll_descr *descr, int fd, - handler->fd = fd; - handler->data = data; - -- ev.events = events; -+ ev.events = EPOLLIN; - ev.data.ptr = handler; - - if (epoll_ctl(descr->epfd, EPOLL_CTL_ADD, fd, &ev) < 0) -@@ -94,13 +92,6 @@ int lxc_mainloop_add_handler_events(struct lxc_epoll_descr *descr, int fd, - return 0; - } - --int lxc_mainloop_add_handler(struct lxc_epoll_descr *descr, int fd, -- lxc_mainloop_callback_t callback, void *data) --{ -- return lxc_mainloop_add_handler_events(descr, fd, EPOLLIN, callback, -- data); --} -- - int lxc_mainloop_del_handler(struct lxc_epoll_descr *descr, int fd) - { - struct mainloop_handler *handler; -diff --git a/src/lxc/mainloop.h b/src/lxc/mainloop.h -index e6ab9a6d9..8afac60d3 100644 ---- a/src/lxc/mainloop.h -+++ b/src/lxc/mainloop.h -@@ -22,10 +22,6 @@ typedef int (*lxc_mainloop_callback_t)(int fd, uint32_t event, void *data, - - extern int lxc_mainloop(struct lxc_epoll_descr *descr, int timeout_ms); - --extern int lxc_mainloop_add_handler_events(struct lxc_epoll_descr *descr, -- int fd, int events, -- lxc_mainloop_callback_t callback, -- void *data); - extern int lxc_mainloop_add_handler(struct lxc_epoll_descr *descr, int fd, - lxc_mainloop_callback_t callback, - void *data); -diff --git a/src/lxc/memory_utils.h b/src/lxc/memory_utils.h -index d3b68a1e9..29878fb67 100644 ---- a/src/lxc/memory_utils.h -+++ b/src/lxc/memory_utils.h -@@ -41,10 +41,10 @@ define_cleanup_function(FILE *, fclose); - define_cleanup_function(DIR *, closedir); - #define __do_closedir call_cleaner(closedir) - --#define free_disarm(ptr) \ -- ({ \ -- free(ptr); \ -- ptr = NULL; \ -+#define free_disarm(ptr) \ -+ ({ \ -+ free(ptr); \ -+ move_ptr(ptr); \ - }) - - static inline void free_disarm_function(void *ptr) -diff --git a/src/lxc/namespace.c b/src/lxc/namespace.c -index f2e017563..38d2ae5d7 100644 ---- a/src/lxc/namespace.c -+++ b/src/lxc/namespace.c -@@ -21,6 +21,33 @@ - - lxc_log_define(namespace, lxc); - -+/* -+ * Let's use the "standard stack limit" (i.e. glibc thread size default) for -+ * stack sizes: 8MB. -+ */ -+#define __LXC_STACK_SIZE (8 * 1024 * 1024) -+pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd) -+{ -+ pid_t ret; -+ void *stack; -+ -+ stack = malloc(__LXC_STACK_SIZE); -+ if (!stack) { -+ SYSERROR("Failed to allocate clone stack"); -+ return -ENOMEM; -+ } -+ -+#ifdef __ia64__ -+ ret = __clone2(fn, stack, __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd); -+#else -+ ret = clone(fn, stack + __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd); -+#endif -+ if (ret < 0) -+ SYSERROR("Failed to clone (%#x)", flags); -+ -+ return ret; -+} -+ - /* Leave the user namespace at the first position in the array of structs so - * that we always attach to it first when iterating over the struct and using - * setns() to switch namespaces. This especially affects lxc_attach(): Suppose -diff --git a/src/lxc/namespace.h b/src/lxc/namespace.h -index 84976f60f..a8fda783c 100644 ---- a/src/lxc/namespace.h -+++ b/src/lxc/namespace.h -@@ -7,6 +7,63 @@ - #include - #include - -+#ifndef CLONE_PARENT_SETTID -+#define CLONE_PARENT_SETTID 0x00100000 -+#endif -+ -+#ifndef CLONE_CHILD_CLEARTID -+#define CLONE_CHILD_CLEARTID 0x00200000 -+#endif -+ -+#ifndef CLONE_CHILD_SETTID -+#define CLONE_CHILD_SETTID 0x01000000 -+#endif -+ -+#ifndef CLONE_VFORK -+#define CLONE_VFORK 0x00004000 -+#endif -+ -+#ifndef CLONE_THREAD -+#define CLONE_THREAD 0x00010000 -+#endif -+ -+#ifndef CLONE_SETTLS -+#define CLONE_SETTLS 0x00080000 -+#endif -+ -+#ifndef CLONE_VM -+#define CLONE_VM 0x00000100 -+#endif -+ -+#ifndef CLONE_FILES -+#define CLONE_FILES 0x00000400 -+#endif -+ -+#ifndef CLONE_FS -+# define CLONE_FS 0x00000200 -+#endif -+#ifndef CLONE_NEWNS -+# define CLONE_NEWNS 0x00020000 -+#endif -+#ifndef CLONE_NEWCGROUP -+# define CLONE_NEWCGROUP 0x02000000 -+#endif -+#ifndef CLONE_NEWUTS -+# define CLONE_NEWUTS 0x04000000 -+#endif -+#ifndef CLONE_NEWIPC -+# define CLONE_NEWIPC 0x08000000 -+#endif -+#ifndef CLONE_NEWUSER -+# define CLONE_NEWUSER 0x10000000 -+#endif -+#ifndef CLONE_NEWPID -+# define CLONE_NEWPID 0x20000000 -+#endif -+#ifndef CLONE_NEWNET -+# define CLONE_NEWNET 0x40000000 -+#endif -+ - enum { - LXC_NS_USER, - LXC_NS_MNT, -@@ -25,6 +82,39 @@ extern const struct ns_info { - const char *env_name; - } ns_info[LXC_NS_MAX]; - -+#if defined(__ia64__) -+int __clone2(int (*__fn) (void *__arg), void *__child_stack_base, -+ size_t __child_stack_size, int __flags, void *__arg, ...); -+#else -+int clone(int (*fn)(void *), void *child_stack, -+ int flags, void *arg, ... -+ /* pid_t *ptid, struct user_desc *tls, pid_t *ctid */ ); -+#endif -+ -+/** -+ * lxc_clone() - create a new process -+ * -+ * - allocate stack: -+ * This function allocates a new stack the size of page and passes it to the -+ * kernel. -+ * -+ * - support all CLONE_*flags: -+ * This function supports all CLONE_* flags. If in doubt or not sufficiently -+ * familiar with process creation in the kernel and interactions with libcs -+ * this function should be used. -+ * -+ * - pthread_atfork() handlers depending on libc: -+ * Whether this function runs pthread_atfork() handlers depends on the -+ * corresponding libc wrapper. glibc currently does not run pthread_atfork() -+ * handlers but does not guarantee that they are not. Other libcs might or -+ * might not run pthread_atfork() handlers. If you require guarantees please -+ * refer to the lxc_raw_clone*() functions in raw_syscalls.{c,h}. -+ * -+ * - should call lxc_raw_getpid(): -+ * The child should use lxc_raw_getpid() to retrieve its pid. -+ */ -+extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd); -+ - extern int lxc_namespace_2_cloneflag(const char *namespace); - extern int lxc_namespace_2_ns_idx(const char *namespace); - extern int lxc_namespace_2_std_identifiers(char *namespaces); -diff --git a/src/lxc/network.c b/src/lxc/network.c -index bca044059..19adb2329 100644 ---- a/src/lxc/network.c -+++ b/src/lxc/network.c -@@ -36,7 +36,7 @@ - #include "memory_utils.h" - #include "network.h" - #include "nl.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "syscall_wrappers.h" - #include "utils.h" - -@@ -182,6 +182,11 @@ static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex) - return 0; - } - -+struct ip_proxy_args { -+ const char *ip; -+ const char *dev; -+}; -+ - static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest) - { - call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL; -@@ -319,15 +324,11 @@ static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netd - } - - if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) { -- if (!lxc_nic_exists(netdev->link)) { -- SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link); -- goto out_delete; -- } -- - err = lxc_bridge_attach(netdev->link, veth1); - if (err) { - errno = -err; -- SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link); -+ SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", -+ veth1, netdev->link); - goto out_delete; - } - INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link); -@@ -482,6 +483,8 @@ static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n - } - - strlcpy(netdev->created_name, peer, IFNAMSIZ); -+ if (is_empty_string(netdev->name)) -+ (void)strlcpy(netdev->name, peer, IFNAMSIZ); - - netdev->ifindex = if_nametoindex(peer); - if (!netdev->ifindex) { -@@ -531,7 +534,7 @@ on_error: - return -1; - } - --static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int isolation) -+static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation) - { - call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL; - struct nl_handler nlh; -@@ -540,7 +543,7 @@ static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int - struct ifinfomsg *ifi; - struct rtattr *nest, *nest2; - -- len = strlen(parent); -+ len = strlen(master); - if (len == 1 || len >= IFNAMSIZ) - return ret_errno(EINVAL); - -@@ -548,13 +551,13 @@ static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int - if (len == 1 || len >= IFNAMSIZ) - return ret_errno(EINVAL); - -- index = if_nametoindex(parent); -+ index = if_nametoindex(master); - if (!index) - return ret_errno(EINVAL); - - err = netlink_open(nlh_ptr, NETLINK_ROUTE); - if (err) -- return err; -+ return ret_errno(-err); - - nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE); - if (!nlmsg) -@@ -579,21 +582,24 @@ static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int - if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan")) - return ret_errno(EPROTO); - -- nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA); -- if (!nest2) -- return ret_errno(EPROTO); -+ if (mode) { -+ nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA); -+ if (!nest2) -+ return ret_errno(EPROTO); - -- if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode)) -- return ret_errno(EPROTO); -+ if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode)) -+ return ret_errno(EPROTO); - -- /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and -- * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway -- * according to ipvlan docs. -- */ -- if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation)) -- return ret_errno(EPROTO); -+ /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0 -+ * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs. -+ */ -+ if (isolation > 0 && -+ nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation)) -+ return ret_errno(EPROTO); -+ -+ nla_end_nested(nlmsg, nest2); -+ } - -- nla_end_nested(nlmsg, nest2); - nla_end_nested(nlmsg, nest); - - if (nla_put_u32(nlmsg, IFLA_LINK, index)) -@@ -631,6 +637,8 @@ static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *ne - } - - strlcpy(netdev->created_name, peer, IFNAMSIZ); -+ if (is_empty_string(netdev->name)) -+ (void)strlcpy(netdev->name, peer, IFNAMSIZ); - - netdev->ifindex = if_nametoindex(peer); - if (!netdev->ifindex) { -@@ -704,6 +712,8 @@ static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netd - } - - strlcpy(netdev->created_name, peer, IFNAMSIZ); -+ if (is_empty_string(netdev->name)) -+ (void)strlcpy(netdev->name, peer, IFNAMSIZ); - - netdev->ifindex = if_nametoindex(peer); - if (!netdev->ifindex) { -@@ -859,7 +869,7 @@ static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = { - [LXC_NET_NONE] = instantiate_none, - }; - --static int __instantiate_ns_common(struct lxc_netdev *netdev) -+static int instantiate_ns_veth(struct lxc_netdev *netdev) - { - char current_ifname[IFNAMSIZ]; - -@@ -901,30 +911,33 @@ static int __instantiate_ns_common(struct lxc_netdev *netdev) - return 0; - } - --static int instantiate_ns_veth(struct lxc_netdev *netdev) -+static int __instantiate_common(struct lxc_netdev *netdev) - { -+ netdev->ifindex = if_nametoindex(netdev->name); -+ if (!netdev->ifindex) -+ return log_error_errno(-1, errno, "Failed to retrieve ifindex for network device with name %s", netdev->name); - -- return __instantiate_ns_common(netdev); -+ return 0; - } - - static int instantiate_ns_macvlan(struct lxc_netdev *netdev) - { -- return __instantiate_ns_common(netdev); -+ return __instantiate_common(netdev); - } - - static int instantiate_ns_ipvlan(struct lxc_netdev *netdev) - { -- return __instantiate_ns_common(netdev); -+ return __instantiate_common(netdev); - } - - static int instantiate_ns_vlan(struct lxc_netdev *netdev) - { -- return __instantiate_ns_common(netdev); -+ return __instantiate_common(netdev); - } - - static int instantiate_ns_phys(struct lxc_netdev *netdev) - { -- return __instantiate_ns_common(netdev); -+ return __instantiate_common(netdev); - } - - static int instantiate_ns_empty(struct lxc_netdev *netdev) -@@ -1736,7 +1749,7 @@ int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned in - } - - /* TODO: merge with lxc_macvlan_create */ --int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid) -+int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid) - { - call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL; - struct nl_handler nlh; -@@ -1749,7 +1762,7 @@ int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid) - if (err) - return err; - -- len = strlen(parent); -+ len = strlen(master); - if (len == 1 || len >= IFNAMSIZ) - return ret_errno(EINVAL); - -@@ -1765,7 +1778,7 @@ int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid) - if (!answer) - return ret_errno(ENOMEM); - -- lindex = if_nametoindex(parent); -+ lindex = if_nametoindex(master); - if (!lindex) - return ret_errno(EINVAL); - -@@ -1804,7 +1817,7 @@ int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid) - return netlink_transaction(nlh_ptr, nlmsg, answer); - } - --int lxc_macvlan_create(const char *parent, const char *name, int mode) -+int lxc_macvlan_create(const char *master, const char *name, int mode) - { - call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL; - struct nl_handler nlh; -@@ -1817,7 +1830,7 @@ int lxc_macvlan_create(const char *parent, const char *name, int mode) - if (err) - return err; - -- len = strlen(parent); -+ len = strlen(master); - if (len == 1 || len >= IFNAMSIZ) - return ret_errno(EINVAL); - -@@ -1833,7 +1846,7 @@ int lxc_macvlan_create(const char *parent, const char *name, int mode) - if (!answer) - return ret_errno(ENOMEM); - -- index = if_nametoindex(parent); -+ index = if_nametoindex(master); - if (!index) - return ret_errno(EINVAL); - -@@ -2834,9 +2847,6 @@ bool lxc_delete_network_unpriv(struct lxc_handler *handler) - netdev->ifindex, netdev->link); - - ret = netdev_deconf[netdev->type](handler, netdev); -- if (ret < 0) -- WARN("Failed to deconfigure interface with index %d and initial name \"%s\"", -- netdev->ifindex, netdev->link); - goto clear_ifindices; - } - -@@ -3110,9 +3120,9 @@ int lxc_network_move_created_netdev_priv(struct lxc_handler *handler) - physname = is_wlan(netdev->link); - - if (physname) -- ret = lxc_netdev_move_wlan(physname, netdev->link, pid, NULL); -+ ret = lxc_netdev_move_wlan(physname, netdev->link, pid, netdev->name); - else -- ret = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL); -+ ret = lxc_netdev_move_by_index(netdev->ifindex, pid, netdev->name); - if (ret) - return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d", - netdev->created_name, -@@ -3219,9 +3229,6 @@ bool lxc_delete_network_priv(struct lxc_handler *handler) - } - - ret = netdev_deconf[netdev->type](handler, netdev); -- if (ret < 0) -- WARN("Failed to deconfigure interface with index %d and initial name \"%s\"", -- netdev->ifindex, netdev->link); - goto clear_ifindices; - } - -@@ -3441,10 +3448,18 @@ static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netde - - /* set the network device up */ - if (netdev->flags & IFF_UP) { -+ -+#ifdef HAVE_ISULAD -+ if (netdev->name[0] != '\0') { -+ err = lxc_netdev_up(netdev->name); -+ if (err) -+ return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name); -+ } -+#else - err = lxc_netdev_up(netdev->name); - if (err) - return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name); -- -+#endif - /* the network is up, make the loopback up too */ - err = lxc_netdev_up("lo"); - if (err) -diff --git a/src/lxc/network.h b/src/lxc/network.h -index ba35c1253..696380c90 100644 ---- a/src/lxc/network.h -+++ b/src/lxc/network.h -@@ -205,8 +205,8 @@ extern int lxc_netdev_set_mtu(const char *name, int mtu); - /* Create a virtual network devices. */ - extern int lxc_veth_create(const char *name1, const char *name2, pid_t pid, - unsigned int mtu); --extern int lxc_macvlan_create(const char *parent, const char *name, int mode); --extern int lxc_vlan_create(const char *parent, const char *name, -+extern int lxc_macvlan_create(const char *master, const char *name, int mode); -+extern int lxc_vlan_create(const char *master, const char *name, - unsigned short vid); - - /* Set ip address. */ -diff --git a/src/lxc/path.c b/src/lxc/path.c -new file mode 100644 -index 000000000..65b8aadbf ---- /dev/null -+++ b/src/lxc/path.c -@@ -0,0 +1,655 @@ -+/* SPDX-License-Identifier: LGPL-2.1+ */ -+/****************************************************************************** -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. Allrights reserved -+ * Description: isulad utils -+ * Author: lifeng -+ * Create: 2020-04-11 -+******************************************************************************/ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "path.h" -+#include "log.h" -+#include "isulad_utils.h" -+ -+lxc_log_define(lxc_path_ui, lxc); -+ -+#define ISSLASH(C) ((C) == '/') -+#define IS_ABSOLUTE_FILE_NAME(F) (ISSLASH ((F)[0])) -+#define IS_RELATIVE_FILE_NAME(F) (! IS_ABSOLUTE_FILE_NAME (F)) -+ -+bool specify_current_dir(const char *path) -+{ -+ char *basec = NULL, *bname = NULL; -+ bool res = false; -+ -+ basec = safe_strdup(path); -+ -+ bname = basename(basec); -+ if (bname == NULL) { -+ free(basec); -+ ERROR("Out of memory"); -+ return false; -+ } -+ res = !strcmp(bname, "."); -+ free(basec); -+ return res; -+} -+ -+bool has_traling_path_separator(const char *path) -+{ -+ return path && strlen(path) && (path[strlen(path) - 1] == '/'); -+} -+ -+// PreserveTrailingDotOrSeparator returns the given cleaned path -+// and appends a trailing `/.` or `/` if its corresponding original -+// path ends with a trailing `/.` or `/`. If the cleaned -+// path already ends in a `.` path segment, then another is not added. If the -+// clean path already ends in a path separator, then another is not added. -+char *preserve_trailing_dot_or_separator(const char *cleanedpath, -+ const char *originalpath) -+{ -+ char *respath = NULL; -+ size_t len; -+ -+ if (strlen(cleanedpath) > (SIZE_MAX - 3)) { -+ return NULL; -+ } -+ -+ len = strlen(cleanedpath) + 3; -+ respath = malloc(len); -+ if (respath == NULL) { -+ ERROR("Out of memory"); -+ return NULL; -+ } -+ memset(respath, 0x00, len); -+ strcat(respath, cleanedpath); -+ -+ if (!specify_current_dir(cleanedpath) && specify_current_dir(originalpath)) { -+ if (!has_traling_path_separator(respath)) -+ strcat(respath, "/"); -+ strcat(respath, "."); -+ } -+ -+ if (!has_traling_path_separator(respath) && -+ has_traling_path_separator(originalpath)) -+ strcat(respath, "/"); -+ -+ return respath; -+} -+ -+ -+// Split splits path immediately following the final Separator, -+// separating it into a directory and file name component. -+// If there is no Separator in path, Split returns an empty dir -+// and file set to path. -+// The returned values have the property that path = dir+file. -+bool filepath_split(const char *path, char **dir, char **base) -+{ -+ ssize_t i; -+ size_t len; -+ -+ len = strlen(path); -+ if (len >= PATH_MAX) { -+ ERROR("Invalid path"); -+ return false; -+ } -+ i = len - 1; -+ while (i >= 0 && path[i] != '/') -+ i--; -+ -+ *dir = malloc(i + 2); -+ if (*dir == NULL) { -+ ERROR("Out of memory"); -+ return false; -+ } -+ memcpy(*dir, path, i + 1); -+ *(*dir + i + 1) = '\0'; -+ -+ *base = safe_strdup(path + i + 1); -+ -+ return true; -+} -+ -+ -+static bool do_clean_path_continue(const char *endpos, const char *stpos, const char *respath, char **dst) -+{ -+ if (endpos - stpos == 1 && stpos[0] == '.') { -+ return true; -+ } else if (endpos - stpos == 2 && stpos[0] == '.' && stpos[1] == '.') { -+ char *dest = *dst; -+ if (dest <= respath + 1) { -+ return true; -+ } -+ for (--dest; dest > respath && !ISSLASH(dest[-1]); --dest) { -+ *dst = dest; -+ return true; -+ } -+ *dst = dest; -+ return true; -+ } -+ return false; -+} -+ -+int do_clean_path(const char *respath, const char *limit_respath, -+ const char *stpos, char **dst) -+{ -+ char *dest = *dst; -+ const char *endpos = NULL; -+ -+ for (endpos = stpos; *stpos; stpos = endpos) { -+ while (ISSLASH(*stpos)) { -+ ++stpos; -+ } -+ -+ for (endpos = stpos; *endpos && !ISSLASH(*endpos); ++endpos) { -+ } -+ -+ if (endpos - stpos == 0) { -+ break; -+ } else if (do_clean_path_continue(endpos, stpos, respath, &dest)) { -+ continue; -+ } -+ -+ if (!ISSLASH(dest[-1])) { -+ *dest++ = '/'; -+ } -+ -+ if (dest + (endpos - stpos) >= limit_respath) { -+ ERROR("Path is too long"); -+ if (dest > respath + 1) { -+ dest--; -+ } -+ *dest = '\0'; -+ return -1; -+ } -+ -+ memcpy(dest, stpos, (size_t)(endpos - stpos)); -+ dest += endpos - stpos; -+ *dest = '\0'; -+ } -+ *dst = dest; -+ return 0; -+} -+ -+char *cleanpath(const char *path, char *realpath, size_t realpath_len) -+{ -+ char *respath = NULL; -+ char *dest = NULL; -+ const char *stpos = NULL; -+ const char *limit_respath = NULL; -+ -+ if (path == NULL || path[0] == '\0' || \ -+ realpath == NULL || (realpath_len < PATH_MAX)) { -+ return NULL; -+ } -+ -+ respath = realpath; -+ -+ memset(respath, 0, realpath_len); -+ limit_respath = respath + PATH_MAX; -+ -+ if (!IS_ABSOLUTE_FILE_NAME(path)) { -+ if (!getcwd(respath, PATH_MAX)) { -+ ERROR("Failed to getcwd"); -+ respath[0] = '\0'; -+ goto error; -+ } -+ dest = strchr(respath, '\0'); -+ if (dest == NULL) { -+ ERROR("Failed to get the end of respath"); -+ goto error; -+ } -+ if (strlen(path) > (PATH_MAX - strlen(respath) - 1)) { -+ ERROR("Path is too long"); -+ goto error; -+ } -+ strcat(respath, path); -+ stpos = path; -+ } else { -+ dest = respath; -+ *dest++ = '/'; -+ stpos = path; -+ } -+ -+ if (do_clean_path(respath, limit_respath, stpos, &dest)) { -+ goto error; -+ } -+ -+ if (dest > respath + 1 && ISSLASH(dest[-1])) { -+ --dest; -+ } -+ *dest = '\0'; -+ -+ return respath; -+ -+error: -+ return NULL; -+} -+ -+static int do_path_realloc(const char *start, const char *end, -+ char **rpath, char **dest, const char **rpath_limit) -+{ -+ long long dest_offset = *dest - *rpath; -+ char *new_rpath = NULL; -+ size_t new_size; -+ int nret = 0; -+ size_t gap = 0; -+ -+ if (*dest + (end - start) < *rpath_limit) { -+ return 0; -+ } -+ -+ gap = (size_t)(end - start) + 1; -+ new_size = (size_t)(*rpath_limit - *rpath); -+ if (new_size > SIZE_MAX - gap) { -+ ERROR("Out of range!"); -+ return -1; -+ } -+ -+ if (gap > PATH_MAX) { -+ new_size += gap; -+ } else { -+ new_size += PATH_MAX; -+ } -+ nret = lxc_mem_realloc((void **)&new_rpath, new_size, *rpath, PATH_MAX); -+ if (nret) { -+ ERROR("Failed to realloc memory for files limit variables"); -+ return -1; -+ } -+ *rpath = new_rpath; -+ *rpath_limit = *rpath + new_size; -+ -+ *dest = *rpath + dest_offset; -+ -+ return 0; -+} -+ -+static int do_get_symlinks_copy_buf(const char *buf, const char *prefix, size_t prefix_len, -+ char **rpath, char **dest) -+{ -+ if (IS_ABSOLUTE_FILE_NAME(buf)) { -+ if (prefix_len) { -+ memcpy(*rpath, prefix, prefix_len); -+ } -+ *dest = *rpath + prefix_len; -+ *(*dest)++ = '/'; -+ } else { -+ if (*dest > *rpath + prefix_len + 1) { -+ for (--(*dest); *dest > *rpath && !ISSLASH((*dest)[-1]); --(*dest)) { -+ continue; -+ } -+ } -+ } -+ return 0; -+} -+ -+static int do_get_symlinks(const char **fullpath, const char *prefix, size_t prefix_len, -+ char **rpath, char **dest, const char **end, -+ int *num_links, char **extra_buf) -+{ -+ char *buf = NULL; -+ size_t len; -+ ssize_t n; -+ int ret = -1; -+ -+ if (++(*num_links) > MAXSYMLINKS) { -+ ERROR("Too many links in '%s'", *fullpath); -+ goto out; -+ } -+ -+ buf = lxc_common_calloc_s(PATH_MAX); -+ if (buf == NULL) { -+ ERROR("Out of memory"); -+ goto out; -+ } -+ -+ n = readlink(*rpath, buf, PATH_MAX - 1); -+ if (n < 0) { -+ goto out; -+ } -+ buf[n] = '\0'; -+ -+ if (*extra_buf == NULL) { -+ *extra_buf = lxc_common_calloc_s(PATH_MAX); -+ if (*extra_buf == NULL) { -+ ERROR("Out of memory"); -+ goto out; -+ } -+ } -+ -+ len = strlen(*end); -+ if (len >= PATH_MAX - n) { -+ ERROR("Path is too long"); -+ goto out; -+ } -+ -+ memmove(&(*extra_buf)[n], *end, len + 1); -+ memcpy(*extra_buf, buf, (size_t)n); -+ -+ *fullpath = *end = *extra_buf; -+ -+ if (do_get_symlinks_copy_buf(buf, prefix, prefix_len, rpath, dest) != 0) { -+ goto out; -+ } -+ -+ ret = 0; -+out: -+ free(buf); -+ return ret; -+} -+ -+static bool do_eval_symlinks_in_scope_is_symlink(const char *path) -+{ -+ struct stat st; -+ -+ if (lstat(path, &st) < 0) { -+ return true; -+ } -+ -+ if (!S_ISLNK(st.st_mode)) { -+ return true; -+ } -+ return false; -+} -+ -+static void do_eval_symlinks_skip_slash(const char **start, const char **end) -+{ -+ while (ISSLASH(**start)) { -+ ++(*start); -+ } -+ -+ for (*end = *start; **end && !ISSLASH(**end); ++(*end)) { -+ } -+} -+ -+static inline void skip_dest_traling_slash(char **dest, char **rpath, size_t prefix_len) -+{ -+ if (*dest > *rpath + prefix_len + 1) { -+ for (--(*dest); *dest > *rpath && !ISSLASH((*dest)[-1]); --(*dest)) { -+ continue; -+ } -+ } -+} -+ -+static inline bool is_current_char(const char c) -+{ -+ return c == '.'; -+} -+ -+static inline bool is_specify_current(const char *end, const char *start) -+{ -+ return (end - start == 1) && is_current_char(start[0]); -+} -+ -+static inline bool is_specify_parent(const char *end, const char *start) -+{ -+ return (end - start == 2) && is_current_char(start[0]) && is_current_char(start[1]); -+} -+ -+static int do_eval_symlinks_in_scope(const char *fullpath, const char *prefix, -+ size_t prefix_len, -+ char **rpath, char **dest, const char *rpath_limit) -+{ -+ const char *start = NULL; -+ const char *end = NULL; -+ char *extra_buf = NULL; -+ int nret = 0; -+ int num_links = 0; -+ -+ start = fullpath + prefix_len; -+ for (end = start; *start; start = end) { -+ do_eval_symlinks_skip_slash(&start, &end); -+ if (end - start == 0) { -+ break; -+ } else if (is_specify_current(end, start)) { -+ ; -+ } else if (is_specify_parent(end, start)) { -+ skip_dest_traling_slash(dest, rpath, prefix_len); -+ } else { -+ if (!ISSLASH((*dest)[-1])) { -+ *(*dest)++ = '/'; -+ } -+ -+ nret = do_path_realloc(start, end, rpath, dest, &rpath_limit); -+ if (nret != 0) { -+ nret = -1; -+ goto out; -+ } -+ -+ memcpy(*dest, start, (size_t)(end - start)); -+ *dest += end - start; -+ **dest = '\0'; -+ -+ if (do_eval_symlinks_in_scope_is_symlink(*rpath)) { -+ continue; -+ } -+ -+ nret = do_get_symlinks(&fullpath, prefix, prefix_len, rpath, dest, &end, &num_links, &extra_buf); -+ if (nret != 0) { -+ nret = -1; -+ goto out; -+ } -+ } -+ } -+out: -+ free(extra_buf); -+ return nret; -+} -+static char *eval_symlinks_in_scope(const char *fullpath, const char *rootpath) -+{ -+ char resroot[PATH_MAX] = {0}; -+ char *root = NULL; -+ char *rpath = NULL; -+ char *dest = NULL; -+ char *prefix = NULL; -+ const char *rpath_limit = NULL; -+ size_t prefix_len; -+ -+ if (fullpath == NULL || rootpath == NULL) { -+ return NULL; -+ } -+ -+ root = cleanpath(rootpath, resroot, sizeof(resroot)); -+ if (root == NULL) { -+ ERROR("Failed to get cleaned path"); -+ return NULL; -+ } -+ -+ if (!strcmp(fullpath, root)) { -+ return safe_strdup(fullpath); -+ } -+ -+ if (strstr(fullpath, root) == NULL) { -+ ERROR("Path '%s' is not in '%s'", fullpath, root); -+ return NULL; -+ } -+ -+ rpath = lxc_common_calloc_s(PATH_MAX); -+ if (rpath == NULL) { -+ ERROR("Out of memory"); -+ goto out; -+ } -+ rpath_limit = rpath + PATH_MAX; -+ -+ prefix = root; -+ prefix_len = (size_t)strlen(prefix); -+ if (!strcmp(prefix, "/")) { -+ prefix_len = 0; -+ } -+ -+ dest = rpath; -+ if (prefix_len) { -+ memcpy(rpath, prefix, prefix_len); -+ dest += prefix_len; -+ } -+ *dest++ = '/'; -+ -+ if (do_eval_symlinks_in_scope(fullpath, prefix, prefix_len, &rpath, &dest, -+ rpath_limit)) { -+ goto out; -+ } -+ -+ if (dest > rpath + prefix_len + 1 && ISSLASH(dest[-1])) { -+ --dest; -+ } -+ *dest = '\0'; -+ return rpath; -+ -+out: -+ free(rpath); -+ return NULL; -+} -+ -+// FollowSymlinkInScope is a wrapper around evalSymlinksInScope that returns an -+// absolute path. This function handles paths in a platform-agnostic manner. -+char *follow_symlink_in_scope(const char *fullpath, const char *rootpath) -+{ -+ char resfull[PATH_MAX] = {0}, *full = NULL; -+ char resroot[PATH_MAX] = {0}, *root = NULL; -+ -+ full = cleanpath(fullpath, resfull, PATH_MAX); -+ if (!full) { -+ ERROR("Failed to get cleaned path"); -+ return NULL; -+ } -+ -+ root = cleanpath(rootpath, resroot, PATH_MAX); -+ if (!root) { -+ ERROR("Failed to get cleaned path"); -+ return NULL; -+ } -+ -+ return eval_symlinks_in_scope(full, root); -+} -+ -+// GetResourcePath evaluates `path` in the scope of the container's rootpath, with proper path -+// sanitisation. Symlinks are all scoped to the rootpath of the container, as -+// though the container's rootpath was `/`. -+// -+// The BaseFS of a container is the host-facing path which is bind-mounted as -+// `/` inside the container. This method is essentially used to access a -+// particular path inside the container as though you were a process in that -+// container. -+int get_resource_path(const char *rootpath, const char *path, -+ char **scopepath) -+{ -+ char resolved[PATH_MAX] = {0}, *cleanedpath = NULL; -+ char *fullpath = NULL; -+ size_t len; -+ -+ if (!rootpath || !path || !scopepath) -+ return -1; -+ -+ *scopepath = NULL; -+ -+ cleanedpath = cleanpath(path, resolved, PATH_MAX); -+ if (!cleanedpath) { -+ ERROR("Failed to get cleaned path"); -+ return -1; -+ } -+ -+ len = strlen(rootpath) + strlen(cleanedpath) + 1; -+ fullpath = malloc(len); -+ if (!fullpath) { -+ ERROR("Out of memory"); -+ return -1; -+ } -+ snprintf(fullpath, len, "%s%s", rootpath, cleanedpath); -+ -+ *scopepath = follow_symlink_in_scope(fullpath, rootpath); -+ -+ free(fullpath); -+ return 0; -+} -+ -+// Rel returns a relative path that is lexically equivalent to targpath when -+// joined to basepath with an intervening separator. That is, -+// Join(basepath, Rel(basepath, targpath)) is equivalent to targpath itself. -+// On success, the returned path will always be relative to basepath, -+// even if basepath and targpath share no elements. -+// An error is returned if targpath can't be made relative to basepath or if -+// knowing the current working directory would be necessary to compute it. -+// Rel calls Clean on the result. -+char *path_relative(const char *basepath, const char *targpath) -+{ -+ char resbase[PATH_MAX] = {0}, *base = NULL; -+ char restarg[PATH_MAX] = {0}, *targ = NULL; -+ size_t bl = 0, tl = 0, b0 = 0, bi = 0, t0 = 0, ti = 0; -+ -+ base = cleanpath(basepath, resbase, PATH_MAX); -+ if (!base) { -+ ERROR("Failed to get cleaned path"); -+ return NULL; -+ } -+ -+ targ = cleanpath(targpath, restarg, PATH_MAX); -+ if (!targ) { -+ ERROR("Failed to get cleaned path"); -+ return NULL; -+ } -+ -+ if (strcmp(base, targ) == 0) -+ return safe_strdup("."); -+ -+ bl = strlen(base); -+ tl = strlen(targ); -+ while(true) { -+ while(bi < bl && !ISSLASH(base[bi])) -+ bi++; -+ while(ti < tl && !ISSLASH(targ[ti])) -+ ti++; -+ //not the same string -+ if (((bi - b0) != (ti - t0)) || strncmp(base + b0, targ + t0, bi - b0)) -+ break; -+ if (bi < bl) -+ bi++; -+ if (ti < tl) -+ ti++; -+ b0 = bi; -+ t0 = ti; -+ } -+ -+ if (b0 != bl) { -+ // Base elements left. Must go up before going down. -+ int seps = 0, i; -+ size_t ncopyed = 0, seps_size; -+ char *buf = NULL; -+ -+ for (bi = b0; bi < bl; bi++) { -+ if (ISSLASH(base[bi])) -+ seps++; -+ } -+ //strlen(..) + strlen(/..) + '\0' -+ seps_size = 2 + seps * 3 + 1; -+ if (t0 != tl) -+ seps_size += 1 + tl - t0; -+ -+ buf = calloc(seps_size, 1); -+ if (!buf) { -+ ERROR("Out of memory"); -+ return NULL; -+ } -+ buf[ncopyed++] = '.'; -+ buf[ncopyed++] = '.'; -+ for (i = 0; i < seps; i++) { -+ buf[ncopyed++] = '/'; -+ buf[ncopyed++] = '.'; -+ buf[ncopyed++] = '.'; -+ } -+ if (t0 != tl) { -+ buf[ncopyed++] = '/'; -+ memcpy(buf + ncopyed, targ + t0, tl - t0 + 1); -+ } -+ return buf; -+ } -+ -+ return safe_strdup(targ + t0); -+} -diff --git a/src/lxc/path.h b/src/lxc/path.h -new file mode 100644 -index 000000000..2c60fb9be ---- /dev/null -+++ b/src/lxc/path.h -@@ -0,0 +1,65 @@ -+/* SPDX-License-Identifier: LGPL-2.1+ */ -+/****************************************************************************** -+ * Copyright (c) Huawei Technologies Co., Ltd. 2020. Allrights reserved -+ * Description: isulad utils -+ * Author: lifeng -+ * Create: 2020-04-11 -+******************************************************************************/ -+#ifndef __ISULAD_PATH_H_ -+#define __ISULAD_PATH_H_ -+ -+#include -+ -+bool specify_current_dir(const char *path); -+ -+bool has_traling_path_separator(const char *path); -+ -+// PreserveTrailingDotOrSeparator returns the given cleaned path -+// and appends a trailing `/.` or `/` if its corresponding original -+// path ends with a trailing `/.` or `/`. If the cleaned -+// path already ends in a `.` path segment, then another is not added. If the -+// clean path already ends in a path separator, then another is not added. -+char *preserve_trailing_dot_or_separator(const char *cleanedpath, -+ const char *originalpath); -+ -+ -+// Split splits path immediately following the final Separator, -+// separating it into a directory and file name component. -+// If there is no Separator in path, Split returns an empty dir -+// and file set to path. -+// The returned values have the property that path = dir+file. -+bool filepath_split(const char *path, char **dir, char **base); -+ -+/* -+ * cleanpath is similar to realpath of glibc, but not expands symbolic links, -+ * and not check the existence of components of the path. -+ */ -+char *cleanpath(const char *path, char *realpath, size_t realpath_len); -+ -+ -+// FollowSymlinkInScope is a wrapper around evalSymlinksInScope that returns an -+// absolute path. This function handles paths in a platform-agnostic manner. -+char *follow_symlink_in_scope(const char *fullpath, const char *rootpath); -+ -+// GetResourcePath evaluates `path` in the scope of the container's rootpath, with proper path -+// sanitisation. Symlinks are all scoped to the rootpath of the container, as -+// though the container's rootpath was `/`. -+// -+// The BaseFS of a container is the host-facing path which is bind-mounted as -+// `/` inside the container. This method is essentially used to access a -+// particular path inside the container as though you were a process in that -+// container. -+int get_resource_path(const char *rootpath, const char *path, -+ char **scopepath); -+ -+// Rel returns a relative path that is lexically equivalent to targpath when -+// joined to basepath with an intervening separator. That is, -+// Join(basepath, Rel(basepath, targpath)) is equivalent to targpath itself. -+// On success, the returned path will always be relative to basepath, -+// even if basepath and targpath share no elements. -+// An error is returned if targpath can't be made relative to basepath or if -+// knowing the current working directory would be necessary to compute it. -+// Rel calls Clean on the result. -+char *path_relative(const char *basepath, const char *targpath); -+ -+#endif -diff --git a/src/lxc/process_utils.h b/src/lxc/process_utils.h -deleted file mode 100644 -index 4ea898a63..000000000 ---- a/src/lxc/process_utils.h -+++ /dev/null -@@ -1,290 +0,0 @@ --/* SPDX-License-Identifier: LGPL-2.1+ */ -- --#ifndef __LXC_PROCESS_UTILS_H --#define __LXC_PROCESS_UTILS_H -- --#ifndef _GNU_SOURCE --#define _GNU_SOURCE 1 --#endif --#include --#include --#include --#include --#include --#include --#include --#include -- --#include "compiler.h" --#include "config.h" --#include "syscall_numbers.h" -- --#ifndef CSIGNAL --#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ --#endif -- --#ifndef CLONE_VM --#define CLONE_VM 0x00000100 /* set if VM shared between processes */ --#endif -- --#ifndef CLONE_FS --#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ --#endif -- --#ifndef CLONE_FILES --#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ --#endif -- --#ifndef CLONE_SIGHAND --#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ --#endif -- --#ifndef CLONE_PIDFD --#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */ --#endif -- --#ifndef CLONE_PTRACE --#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ --#endif -- --#ifndef CLONE_VFORK --#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ --#endif -- --#ifndef CLONE_PARENT --#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ --#endif -- --#ifndef CLONE_THREAD --#define CLONE_THREAD 0x00010000 /* Same thread group? */ --#endif -- --#ifndef CLONE_NEWNS --#define CLONE_NEWNS 0x00020000 /* New mount namespace group */ --#endif -- --#ifndef CLONE_SYSVSEM --#define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */ --#endif -- --#ifndef CLONE_SETTLS --#define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */ --#endif -- --#ifndef CLONE_PARENT_SETTID --#define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */ --#endif -- --#ifndef CLONE_CHILD_CLEARTID --#define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */ --#endif -- --#ifndef CLONE_DETACHED --#define CLONE_DETACHED 0x00400000 /* Unused, ignored */ --#endif -- --#ifndef CLONE_UNTRACED --#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */ --#endif -- --#ifndef CLONE_CHILD_SETTID --#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */ --#endif -- --#ifndef CLONE_NEWCGROUP --#define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */ --#endif -- --#ifndef CLONE_NEWUTS --#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */ --#endif -- --#ifndef CLONE_NEWIPC --#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */ --#endif -- --#ifndef CLONE_NEWUSER --#define CLONE_NEWUSER 0x10000000 /* New user namespace */ --#endif -- --#ifndef CLONE_NEWPID --#define CLONE_NEWPID 0x20000000 /* New pid namespace */ --#endif -- --#ifndef CLONE_NEWNET --#define CLONE_NEWNET 0x40000000 /* New network namespace */ --#endif -- --#ifndef CLONE_IO --#define CLONE_IO 0x80000000 /* Clone io context */ --#endif -- --/* Flags for the clone3() syscall. */ --#ifndef CLONE_CLEAR_SIGHAND --#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */ --#endif -- --#ifndef CLONE_INTO_CGROUP --#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ --#endif -- --/* -- * cloning flags intersect with CSIGNAL so can be used with unshare and clone3 -- * syscalls only: -- */ --#ifndef CLONE_NEWTIME --#define CLONE_NEWTIME 0x00000080 /* New time namespace */ --#endif -- --/* waitid */ --#ifndef P_PIDFD --#define P_PIDFD 3 --#endif -- --#ifndef CLONE_ARGS_SIZE_VER0 --#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ --#endif -- --#ifndef CLONE_ARGS_SIZE_VER1 --#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ --#endif -- --#ifndef CLONE_ARGS_SIZE_VER2 --#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ --#endif -- --#ifndef ptr_to_u64 --#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) --#endif --#ifndef u64_to_ptr --#define u64_to_ptr(x) ((void *)(uintptr_t)x) --#endif -- --struct lxc_clone_args { -- __aligned_u64 flags; -- __aligned_u64 pidfd; -- __aligned_u64 child_tid; -- __aligned_u64 parent_tid; -- __aligned_u64 exit_signal; -- __aligned_u64 stack; -- __aligned_u64 stack_size; -- __aligned_u64 tls; -- __aligned_u64 set_tid; -- __aligned_u64 set_tid_size; -- __aligned_u64 cgroup; --}; -- --__returns_twice static inline pid_t lxc_clone3(struct lxc_clone_args *args, size_t size) --{ -- return syscall(__NR_clone3, args, size); --} -- --#if defined(__ia64__) --int __clone2(int (*__fn)(void *__arg), void *__child_stack_base, -- size_t __child_stack_size, int __flags, void *__arg, ...); --#else --int clone(int (*fn)(void *), void *child_stack, int flags, void *arg, ... -- /* pid_t *ptid, struct user_desc *tls, pid_t *ctid */); --#endif -- --/** -- * lxc_clone() - create a new process -- * -- * - allocate stack: -- * This function allocates a new stack the size of page and passes it to the -- * kernel. -- * -- * - support all CLONE_*flags: -- * This function supports all CLONE_* flags. If in doubt or not sufficiently -- * familiar with process creation in the kernel and interactions with libcs -- * this function should be used. -- * -- * - pthread_atfork() handlers depending on libc: -- * Whether this function runs pthread_atfork() handlers depends on the -- * corresponding libc wrapper. glibc currently does not run pthread_atfork() -- * handlers but does not guarantee that they are not. Other libcs might or -- * might not run pthread_atfork() handlers. If you require guarantees please -- * refer to the lxc_raw_clone*() functions in process_utils.{c,h}. -- * -- * - should call lxc_raw_getpid(): -- * The child should use lxc_raw_getpid() to retrieve its pid. -- */ --extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd); -- -- --/* -- * lxc_raw_clone() - create a new process -- * -- * - fork() behavior: -- * This function returns 0 in the child and > 0 in the parent. -- * -- * - copy-on-write: -- * This function does not allocate a new stack and relies on copy-on-write -- * semantics. -- * -- * - supports subset of ClONE_* flags: -- * lxc_raw_clone() intentionally only supports a subset of the flags available -- * to the actual system call. Please refer to the implementation what flags -- * cannot be used. Also, please don't assume that just because a flag isn't -- * explicitly checked for as being unsupported that it is supported. If in -- * doubt or not sufficiently familiar with process creation in the kernel and -- * interactions with libcs this function should be used. -- * -- * - no pthread_atfork() handlers: -- * This function circumvents - as much as this this is possible - any libc -- * wrappers and thus does not run any pthread_atfork() handlers. Make sure -- * that this is safe to do in the context you are trying to call this -- * function. -- * -- * - must call lxc_raw_getpid(): -- * The child must use lxc_raw_getpid() to retrieve its pid. -- */ --extern pid_t lxc_raw_clone(unsigned long flags, int *pidfd); -- --/* -- * lxc_raw_clone_cb() - create a new process -- * -- * - non-fork() behavior: -- * Function does return pid of the child or -1 on error. Pass in a callback -- * function via the "fn" argument that gets executed in the child process. -- * The "args" argument is passed to "fn". -- * -- * All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb() -- * as well. -- */ --extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, -- unsigned long flags, int *pidfd); -- --#ifndef HAVE_EXECVEAT --static inline int execveat(int dirfd, const char *pathname, char *const argv[], -- char *const envp[], int flags) --{ -- return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); --} --#else --extern int execveat(int dirfd, const char *pathname, char *const argv[], -- char *const envp[], int flags); --#endif -- --/* -- * Because of older glibc's pid cache (up to 2.25) whenever clone() is called -- * the child must must retrieve it's own pid via lxc_raw_getpid(). -- */ --static inline pid_t lxc_raw_getpid(void) --{ -- return (pid_t)syscall(SYS_getpid); --} -- --static inline pid_t lxc_raw_gettid(void) --{ --#if __NR_gettid > 0 -- return syscall(__NR_gettid); --#else -- return lxc_raw_getpid(); --#endif --} -- --extern int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, -- unsigned int flags); -- --#endif /* __LXC_PROCESS_UTILS_H */ -diff --git a/src/lxc/process_utils.c b/src/lxc/raw_syscalls.c -similarity index 68% -rename from src/lxc/process_utils.c -rename to src/lxc/raw_syscalls.c -index 7494def46..3c6bd2506 100644 ---- a/src/lxc/process_utils.c -+++ b/src/lxc/raw_syscalls.c -@@ -13,12 +13,15 @@ - - #include "compiler.h" - #include "config.h" --#include "log.h" - #include "macro.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "syscall_numbers.h" - --lxc_log_define(process_utils, lxc); -+int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[], -+ char *const envp[], int flags) -+{ -+ return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags); -+} - - /* - * This is based on raw_clone in systemd but adapted to our needs. This uses -@@ -28,8 +31,16 @@ lxc_log_define(process_utils, lxc); - * The nice thing about this is that we get fork() behavior. That is - * lxc_raw_clone() returns 0 in the child and the child pid in the parent. - */ --__returns_twice static pid_t __lxc_raw_clone(unsigned long flags, int *pidfd) -+__returns_twice pid_t lxc_raw_clone(unsigned long flags, int *pidfd) - { -+ /* -+ * These flags don't interest at all so we don't jump through any hoops -+ * of retrieving them and passing them to the kernel. -+ */ -+ errno = EINVAL; -+ if ((flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | -+ CLONE_CHILD_CLEARTID | CLONE_SETTLS))) -+ return -EINVAL; - - #if defined(__s390x__) || defined(__s390__) || defined(__CRIS__) - /* On s390/s390x and cris the order of the first and second arguments -@@ -89,31 +100,6 @@ __returns_twice static pid_t __lxc_raw_clone(unsigned long flags, int *pidfd) - #endif - } - --__returns_twice pid_t lxc_raw_clone(unsigned long flags, int *pidfd) --{ -- pid_t pid; -- struct lxc_clone_args args = { -- .flags = flags, -- .pidfd = ptr_to_u64(pidfd), -- }; -- -- if (flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | -- CLONE_CHILD_CLEARTID | CLONE_SETTLS)) -- return ret_errno(EINVAL); -- -- /* On CLONE_PARENT we inherit the parent's exit signal. */ -- if (!(flags & CLONE_PARENT)) -- args.exit_signal = SIGCHLD; -- -- pid = lxc_clone3(&args, CLONE_ARGS_SIZE_VER0); -- if (pid < 0 && errno == ENOSYS) { -- SYSTRACE("Falling back to legacy clone"); -- return __lxc_raw_clone(flags, pidfd); -- } -- -- return pid; --} -- - pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags, - int *pidfd) - { -@@ -138,30 +124,3 @@ int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, - { - return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); - } -- --/* -- * Let's use the "standard stack limit" (i.e. glibc thread size default) for -- * stack sizes: 8MB. -- */ --#define __LXC_STACK_SIZE (8 * 1024 * 1024) --pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd) --{ -- pid_t ret; -- void *stack; -- -- stack = malloc(__LXC_STACK_SIZE); -- if (!stack) { -- SYSERROR("Failed to allocate clone stack"); -- return -ENOMEM; -- } -- --#ifdef __ia64__ -- ret = __clone2(fn, stack, __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd); --#else -- ret = clone(fn, stack + __LXC_STACK_SIZE, flags | SIGCHLD, arg, pidfd); --#endif -- if (ret < 0) -- SYSERROR("Failed to clone (%#x)", flags); -- -- return ret; --} -diff --git a/src/lxc/raw_syscalls.h b/src/lxc/raw_syscalls.h -new file mode 100644 -index 000000000..1219f28f4 ---- /dev/null -+++ b/src/lxc/raw_syscalls.h -@@ -0,0 +1,94 @@ -+/* SPDX-License-Identifier: LGPL-2.1+ */ -+ -+#ifndef __LXC_RAW_SYSCALL_H -+#define __LXC_RAW_SYSCALL_H -+ -+#ifndef _GNU_SOURCE -+#define _GNU_SOURCE 1 -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* clone */ -+#ifndef CLONE_PIDFD -+#define CLONE_PIDFD 0x00001000 -+#endif -+ -+/* waitid */ -+#ifndef P_PIDFD -+#define P_PIDFD 3 -+#endif -+ -+/* -+ * lxc_raw_clone() - create a new process -+ * -+ * - fork() behavior: -+ * This function returns 0 in the child and > 0 in the parent. -+ * -+ * - copy-on-write: -+ * This function does not allocate a new stack and relies on copy-on-write -+ * semantics. -+ * -+ * - supports subset of ClONE_* flags: -+ * lxc_raw_clone() intentionally only supports a subset of the flags available -+ * to the actual system call. Please refer to the implementation what flags -+ * cannot be used. Also, please don't assume that just because a flag isn't -+ * explicitly checked for as being unsupported that it is supported. If in -+ * doubt or not sufficiently familiar with process creation in the kernel and -+ * interactions with libcs this function should be used. -+ * -+ * - no pthread_atfork() handlers: -+ * This function circumvents - as much as this this is possible - any libc -+ * wrappers and thus does not run any pthread_atfork() handlers. Make sure -+ * that this is safe to do in the context you are trying to call this -+ * function. -+ * -+ * - must call lxc_raw_getpid(): -+ * The child must use lxc_raw_getpid() to retrieve its pid. -+ */ -+extern pid_t lxc_raw_clone(unsigned long flags, int *pidfd); -+ -+/* -+ * lxc_raw_clone_cb() - create a new process -+ * -+ * - non-fork() behavior: -+ * Function does return pid of the child or -1 on error. Pass in a callback -+ * function via the "fn" argument that gets executed in the child process. -+ * The "args" argument is passed to "fn". -+ * -+ * All other comments that apply to lxc_raw_clone() apply to lxc_raw_clone_cb() -+ * as well. -+ */ -+extern pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, -+ unsigned long flags, int *pidfd); -+ -+extern int lxc_raw_execveat(int dirfd, const char *pathname, char *const argv[], -+ char *const envp[], int flags); -+ -+/* -+ * Because of older glibc's pid cache (up to 2.25) whenever clone() is called -+ * the child must must retrieve it's own pid via lxc_raw_getpid(). -+ */ -+static inline pid_t lxc_raw_getpid(void) -+{ -+ return (pid_t)syscall(SYS_getpid); -+} -+ -+static inline pid_t lxc_raw_gettid(void) -+{ -+#if __NR_gettid > 0 -+ return syscall(__NR_gettid); -+#else -+ return lxc_raw_getpid(); -+#endif -+} -+ -+extern int lxc_raw_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, -+ unsigned int flags); -+ -+#endif /* __LXC_RAW_SYSCALL_H */ -diff --git a/src/lxc/rexec.c b/src/lxc/rexec.c -index cf198c021..c9c84b8c1 100644 ---- a/src/lxc/rexec.c -+++ b/src/lxc/rexec.c -@@ -13,7 +13,7 @@ - #include "file_utils.h" - #include "macro.h" - #include "memory_utils.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "string_utils.h" - #include "syscall_wrappers.h" - -@@ -88,7 +88,7 @@ static int is_memfd(void) - static void lxc_rexec_as_memfd(char **argv, char **envp, const char *memfd_name) - { - __do_close int execfd = -EBADF, fd = -EBADF, memfd = -EBADF, -- tmpfd = -EBADF; -+ tmpfd = -EBADF; - int ret; - ssize_t bytes_sent = 0; - struct stat st = {0}; -@@ -143,7 +143,7 @@ static void lxc_rexec_as_memfd(char **argv, char **envp, const char *memfd_name) - if (fcntl(memfd, F_ADD_SEALS, LXC_MEMFD_REXEC_SEALS)) - return; - -- execfd = move_fd(memfd); -+ execfd = memfd; - } else { - char procfd[LXC_PROC_PID_FD_LEN]; - -@@ -169,12 +169,13 @@ extern char **environ; - - int lxc_rexec(const char *memfd_name) - { -- __do_free_string_list char **argv = NULL; - int ret; -+ char **argv = NULL; - - ret = is_memfd(); - if (ret < 0 && ret == -ENOTRECOVERABLE) { -- fprintf(stderr, "%s - Failed to determine whether this is a memfd\n", -+ fprintf(stderr, -+ "%s - Failed to determine whether this is a memfd\n", - strerror(errno)); - return -1; - } else if (ret > 0) { -@@ -183,7 +184,8 @@ int lxc_rexec(const char *memfd_name) - - ret = parse_argv(&argv); - if (ret < 0) { -- fprintf(stderr, "%s - Failed to parse command line parameters\n", -+ fprintf(stderr, -+ "%s - Failed to parse command line parameters\n", - strerror(errno)); - return -1; - } -diff --git a/src/lxc/seccomp.c b/src/lxc/seccomp.c -index 7820db8b2..4b9d23c55 100644 ---- a/src/lxc/seccomp.c -+++ b/src/lxc/seccomp.c -@@ -295,7 +295,11 @@ on_error: - #endif - - #if HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH -+#ifdef HAVE_ISULAD -+enum lxc_arch_t { -+#else - enum lxc_hostarch_t { -+#endif - lxc_seccomp_arch_all = 0, - lxc_seccomp_arch_native, - lxc_seccomp_arch_i386, -@@ -351,8 +355,13 @@ int get_hostarch(void) - return lxc_seccomp_arch_unknown; - } - -+#ifdef HAVE_ISULAD -+scmp_filter_ctx get_new_ctx(enum lxc_arch_t n_arch, -+ uint32_t default_policy_action, uint32_t *architectures) -+#else - scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, - uint32_t default_policy_action, bool *needs_merge) -+#endif - { - int ret; - uint32_t arch; -@@ -475,10 +484,17 @@ scmp_filter_ctx get_new_ctx(enum lxc_hostarch_t n_arch, - return NULL; - } - TRACE("Removed native arch from main seccomp context"); -- -+#ifdef HAVE_ISULAD -+ *architectures = arch; -+#else - *needs_merge = true; -+#endif - } else { -+#ifdef HAVE_ISULAD -+ *architectures = SCMP_ARCH_NATIVE; -+#else - *needs_merge = false; -+#endif - TRACE("Arch %d already present in main seccomp context", (int)n_arch); - } - -@@ -510,7 +526,11 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx, - if (ret < 0) { - errno = -ret; - SYSERROR("Failed loading rule to reject force umount"); -+#ifdef HAVE_ISULAD -+ return true; -+#else - return false; -+#endif - } - - INFO("Set seccomp rule to reject force umounts"); -@@ -519,20 +539,34 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx, - - nr = seccomp_syscall_resolve_name(line); - if (nr == __NR_SCMP_ERROR) { -+#ifdef HAVE_ISULAD -+ DEBUG("Failed to resolve syscall \"%s\"", line); -+ DEBUG("This syscall will NOT be handled by seccomp"); -+#else - WARN("Failed to resolve syscall \"%s\"", line); - WARN("This syscall will NOT be handled by seccomp"); -+#endif - return true; - } - - if (nr < 0) { -+#ifdef HAVE_ISULAD -+ DEBUG("Got negative return value %d for syscall \"%s\"", nr, line); -+ DEBUG("This syscall will NOT be handled by seccomp"); -+#else - WARN("Got negative return value %d for syscall \"%s\"", nr, line); - WARN("This syscall will NOT be handled by seccomp"); -+#endif - return true; - } - - memset(&arg_cmp, 0, sizeof(arg_cmp)); - for (i = 0; i < rule->args_num; i++) { -+#ifdef HAVE_ISULAD -+ DEBUG("arg_cmp[%d]: SCMP_CMP(%u, %llu, %llu, %llu)", i, -+#else - INFO("arg_cmp[%d]: SCMP_CMP(%u, %llu, %llu, %llu)", i, -+#endif - rule->args_value[i].index, - (long long unsigned int)rule->args_value[i].op, - (long long unsigned int)rule->args_value[i].mask, -@@ -553,14 +587,43 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx, - rule->args_num, arg_cmp); - if (ret < 0) { - errno = -ret; -+#ifdef HAVE_ISULAD -+ DEBUG("Failed loading rule for %s (nr %d action %d (%s))", -+ line, nr, rule->action, get_action_name(rule->action)); -+ return true; -+#else - SYSERROR("Failed loading rule for %s (nr %d action %d (%s))", - line, nr, rule->action, get_action_name(rule->action)); - return false; -+#endif - } - - return true; - } - -+#ifdef HAVE_ISULAD -+#define SCMP_ARCH_INDEX_MAX 3 -+ -+struct scmp_ctx_info { -+ uint32_t architectures[SCMP_ARCH_INDEX_MAX]; -+ enum lxc_arch_t lxc_arch[SCMP_ARCH_INDEX_MAX]; -+ scmp_filter_ctx contexts[SCMP_ARCH_INDEX_MAX]; -+ bool needs_merge[SCMP_ARCH_INDEX_MAX]; -+}; -+ -+static int get_arch_index(enum lxc_arch_t arch, struct scmp_ctx_info *ctx) -+{ -+ int i; -+ -+ for (i = 0; i < SCMP_ARCH_INDEX_MAX; i++) { -+ if (ctx->lxc_arch[i] == arch) -+ return i; -+ } -+ -+ return -1; -+} -+#endif -+ - /* - * v2 consists of - * [x86] -@@ -575,6 +638,521 @@ bool do_resolve_add_rule(uint32_t arch, char *line, scmp_filter_ctx ctx, - * write - * close - */ -+#ifdef HAVE_ISULAD -+static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf) -+{ -+ int ret; -+ char *p; -+ enum lxc_arch_t cur_rule_arch, native_arch; -+ bool blacklist = false; -+ uint32_t default_policy_action = -1, default_rule_action = -1; -+ struct seccomp_v2_rule rule; -+ struct scmp_ctx_info ctx; -+ -+ if (strncmp(line, "blacklist", 9) == 0) -+ blacklist = true; -+ else if (strncmp(line, "whitelist", 9) != 0) { -+ ERROR("Bad seccomp policy style \"%s\"", line); -+ return -1; -+ } -+ -+ p = strchr(line, ' '); -+ if (p) { -+ default_policy_action = get_v2_default_action(p + 1); -+ if (default_policy_action == -2) -+ return -1; -+ } -+ -+ /* for blacklist, allow any syscall which has no rule */ -+ if (blacklist) { -+ if (default_policy_action == -1) -+ default_policy_action = SCMP_ACT_ALLOW; -+ -+ if (default_rule_action == -1) -+ default_rule_action = SCMP_ACT_KILL; -+ } else { -+ if (default_policy_action == -1) -+ default_policy_action = SCMP_ACT_KILL; -+ -+ if (default_rule_action == -1) -+ default_rule_action = SCMP_ACT_ALLOW; -+ } -+ -+ memset(&ctx, 0, sizeof(ctx)); -+ ctx.architectures[0] = SCMP_ARCH_NATIVE; -+ ctx.architectures[1] = SCMP_ARCH_NATIVE; -+ ctx.architectures[2] = SCMP_ARCH_NATIVE; -+ native_arch = get_hostarch(); -+ cur_rule_arch = native_arch; -+ if (native_arch == lxc_seccomp_arch_amd64) { -+ cur_rule_arch = lxc_seccomp_arch_all; -+ -+ ctx.lxc_arch[0] = lxc_seccomp_arch_i386; -+ ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_i386, -+ default_policy_action, &ctx.architectures[0]); -+ if (!ctx.contexts[0]) -+ goto bad; -+ -+ ctx.lxc_arch[1] = lxc_seccomp_arch_x32; -+ ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_x32, -+ default_policy_action, &ctx.architectures[1]); -+ if (!ctx.contexts[1]) -+ goto bad; -+ -+ ctx.lxc_arch[2] = lxc_seccomp_arch_amd64; -+ ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_amd64, -+ default_policy_action, &ctx.architectures[2]); -+ if (!ctx.contexts[2]) -+ goto bad; -+#ifdef SCMP_ARCH_PPC -+ } else if (native_arch == lxc_seccomp_arch_ppc64) { -+ cur_rule_arch = lxc_seccomp_arch_all; -+ -+ ctx.lxc_arch[0] = lxc_seccomp_arch_ppc; -+ ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_ppc, -+ default_policy_action, &ctx.architectures[0]); -+ if (!ctx.contexts[0]) -+ goto bad; -+ -+ ctx.lxc_arch[1] = lxc_seccomp_arch_ppc64; -+ ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_ppc64, -+ default_policy_action, &ctx.architectures[1]); -+ if (!ctx.contexts[1]) -+ goto bad; -+#endif -+#ifdef SCMP_ARCH_ARM -+ } else if (native_arch == lxc_seccomp_arch_arm64) { -+ cur_rule_arch = lxc_seccomp_arch_all; -+ -+ ctx.lxc_arch[0] = lxc_seccomp_arch_arm; -+ ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_arm, -+ default_policy_action, &ctx.architectures[0]); -+ if (!ctx.contexts[0]) -+ goto bad; -+ -+#ifdef SCMP_ARCH_AARCH64 -+ ctx.lxc_arch[1] = lxc_seccomp_arch_arm64; -+ ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_arm64, -+ default_policy_action, &ctx.architectures[1]); -+ if (!ctx.contexts[1]) -+ goto bad; -+#endif -+#endif -+#ifdef SCMP_ARCH_MIPS -+ } else if (native_arch == lxc_seccomp_arch_mips64) { -+ cur_rule_arch = lxc_seccomp_arch_all; -+ -+ ctx.lxc_arch[0] = lxc_seccomp_arch_mips; -+ ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mips, -+ default_policy_action, &ctx.architectures[0]); -+ if (!ctx.contexts[0]) -+ goto bad; -+ -+ ctx.lxc_arch[1] = lxc_seccomp_arch_mips64n32; -+ ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mips64n32, -+ default_policy_action, &ctx.architectures[1]); -+ if (!ctx.contexts[1]) -+ goto bad; -+ -+ ctx.lxc_arch[2] = lxc_seccomp_arch_mips64; -+ ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mips64, -+ default_policy_action, &ctx.architectures[2]); -+ if (!ctx.contexts[2]) -+ goto bad; -+ } else if (native_arch == lxc_seccomp_arch_mipsel64) { -+ cur_rule_arch = lxc_seccomp_arch_all; -+ ctx.lxc_arch[0] = lxc_seccomp_arch_mipsel; -+ ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mipsel, -+ default_policy_action, &ctx.architectures[0]); -+ if (!ctx.contexts[0]) -+ goto bad; -+ -+ ctx.lxc_arch[1] = lxc_seccomp_arch_mipsel64n32; -+ ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mipsel64n32, -+ default_policy_action, &ctx.architectures[1]); -+ if (!ctx.contexts[1]) -+ goto bad; -+ -+ ctx.lxc_arch[2] = lxc_seccomp_arch_mipsel64; -+ ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mipsel64, -+ default_policy_action, &ctx.architectures[2]); -+ if (!ctx.contexts[2]) -+ goto bad; -+#endif -+ } -+ -+ if (default_policy_action != SCMP_ACT_KILL) { -+ ret = seccomp_reset(conf->seccomp.seccomp_ctx, default_policy_action); -+ if (ret != 0) { -+ ERROR("Error re-initializing Seccomp"); -+ return -1; -+ } -+ -+ ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_CTL_NNP, 0); -+ if (ret < 0) { -+ errno = -ret; -+ SYSERROR("Failed to turn off no-new-privs"); -+ return -1; -+ } -+ -+#ifdef SCMP_FLTATR_ATL_TSKIP -+ ret = seccomp_attr_set(conf->seccomp.seccomp_ctx, SCMP_FLTATR_ATL_TSKIP, 1); -+ if (ret < 0) { -+ errno = -ret; -+ SYSWARN("Failed to turn on seccomp nop-skip, continuing"); -+ } -+#endif -+ } -+ -+ while (getline(&line, line_bufsz, f) != -1) { -+ if (line[0] == '#') -+ continue; -+ -+ if (line[0] == '\0') -+ continue; -+ -+ remove_trailing_newlines(line); -+ -+#ifdef HAVE_ISULAD -+ DEBUG("Processing \"%s\"", line); -+#else -+ INFO("Processing \"%s\"", line); -+#endif -+ if (line[0] == '[') { -+ /* Read the architecture for next set of rules. */ -+ if (strcmp(line, "[x86]") == 0 || -+ strcmp(line, "[X86]") == 0) { -+ if (native_arch != lxc_seccomp_arch_i386 && -+ native_arch != lxc_seccomp_arch_amd64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_i386; -+ } else if (strcmp(line, "[x32]") == 0 || -+ strcmp(line, "[X32]") == 0) { -+ if (native_arch != lxc_seccomp_arch_amd64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_x32; -+ } else if (strcmp(line, "[X86_64]") == 0 || -+ strcmp(line, "[x86_64]") == 0) { -+ if (native_arch != lxc_seccomp_arch_amd64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_amd64; -+ } else if (strcmp(line, "[all]") == 0 || -+ strcmp(line, "[ALL]") == 0) { -+ cur_rule_arch = lxc_seccomp_arch_all; -+ } -+#ifdef SCMP_ARCH_ARM -+ else if (strcmp(line, "[arm]") == 0 || -+ strcmp(line, "[ARM]") == 0) { -+ if (native_arch != lxc_seccomp_arch_arm && -+ native_arch != lxc_seccomp_arch_arm64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_arm; -+ } -+#endif -+#ifdef SCMP_ARCH_AARCH64 -+ else if (strcmp(line, "[arm64]") == 0 || -+ strcmp(line, "[ARM64]") == 0) { -+ if (native_arch != lxc_seccomp_arch_arm64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_arm64; -+ } -+#endif -+#ifdef SCMP_ARCH_PPC64LE -+ else if (strcmp(line, "[ppc64le]") == 0 || -+ strcmp(line, "[PPC64LE]") == 0) { -+ if (native_arch != lxc_seccomp_arch_ppc64le) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_ppc64le; -+ } -+#endif -+#ifdef SCMP_ARCH_PPC64 -+ else if (strcmp(line, "[ppc64]") == 0 || -+ strcmp(line, "[PPC64]") == 0) { -+ if (native_arch != lxc_seccomp_arch_ppc64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_ppc64; -+ } -+#endif -+#ifdef SCMP_ARCH_PPC -+ else if (strcmp(line, "[ppc]") == 0 || -+ strcmp(line, "[PPC]") == 0) { -+ if (native_arch != lxc_seccomp_arch_ppc && -+ native_arch != lxc_seccomp_arch_ppc64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_ppc; -+ } -+#endif -+#ifdef SCMP_ARCH_MIPS -+ else if (strcmp(line, "[mips64]") == 0 || -+ strcmp(line, "[MIPS64]") == 0) { -+ if (native_arch != lxc_seccomp_arch_mips64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_mips64; -+ } else if (strcmp(line, "[mips64n32]") == 0 || -+ strcmp(line, "[MIPS64N32]") == 0) { -+ if (native_arch != lxc_seccomp_arch_mips64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_mips64n32; -+ } else if (strcmp(line, "[mips]") == 0 || -+ strcmp(line, "[MIPS]") == 0) { -+ if (native_arch != lxc_seccomp_arch_mips && -+ native_arch != lxc_seccomp_arch_mips64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_mips; -+ } else if (strcmp(line, "[mipsel64]") == 0 || -+ strcmp(line, "[MIPSEL64]") == 0) { -+ if (native_arch != lxc_seccomp_arch_mipsel64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_mipsel64; -+ } else if (strcmp(line, "[mipsel64n32]") == 0 || -+ strcmp(line, "[MIPSEL64N32]") == 0) { -+ if (native_arch != lxc_seccomp_arch_mipsel64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_mipsel64n32; -+ } else if (strcmp(line, "[mipsel]") == 0 || -+ strcmp(line, "[MIPSEL]") == 0) { -+ if (native_arch != lxc_seccomp_arch_mipsel && -+ native_arch != lxc_seccomp_arch_mipsel64) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_mipsel; -+ } -+#endif -+#ifdef SCMP_ARCH_S390X -+ else if (strcmp(line, "[s390x]") == 0 || -+ strcmp(line, "[S390X]") == 0) { -+ if (native_arch != lxc_seccomp_arch_s390x) { -+ cur_rule_arch = lxc_seccomp_arch_unknown; -+ continue; -+ } -+ -+ cur_rule_arch = lxc_seccomp_arch_s390x; -+ } -+#endif -+ else { -+ goto bad_arch; -+ } -+ -+ continue; -+ } -+ -+ /* irrelevant arch - i.e. arm on i386 */ -+ if (cur_rule_arch == lxc_seccomp_arch_unknown) -+ continue; -+ -+ memset(&rule, 0, sizeof(rule)); -+ /* read optional action which follows the syscall */ -+ ret = parse_v2_rules(line, default_rule_action, &rule); -+ if (ret != 0) { -+ ERROR("Failed to interpret seccomp rule"); -+ goto bad_rule; -+ } -+ -+ if (cur_rule_arch == native_arch) { -+ /* add for native arch */ -+ if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line, -+ conf->seccomp.seccomp_ctx, &rule)) -+ goto bad_rule; -+ -+#ifdef HAVE_ISULAD -+ DEBUG("Added native rule for arch %d for %s action %d(%s)", -+#else -+ INFO("Added native rule for arch %d for %s action %d(%s)", -+#endif -+ SCMP_ARCH_NATIVE, line, rule.action, -+ get_action_name(rule.action)); -+ } else if (cur_rule_arch != lxc_seccomp_arch_all) { -+ /* add for compat specified arch */ -+ int arch_index = get_arch_index(cur_rule_arch, &ctx); -+ if (arch_index < 0) -+ goto bad_arch; -+ -+ if (!do_resolve_add_rule(ctx.architectures[arch_index], line, -+ ctx.contexts[arch_index], &rule)) -+ goto bad_rule; -+ -+#ifdef HAVE_ISULAD -+ DEBUG("Added compat rule for arch %d for %s action %d(%s)", -+#else -+ INFO("Added compat rule for arch %d for %s action %d(%s)", -+#endif -+ ctx.architectures[arch_index], line, rule.action, -+ get_action_name(rule.action)); -+ ctx.needs_merge[arch_index] = true; -+ } else { -+ /* add for all compat archs */ -+ if (!do_resolve_add_rule(SCMP_ARCH_NATIVE, line, -+ conf->seccomp.seccomp_ctx, &rule)) -+ goto bad_rule; -+ -+#ifdef HAVE_ISULAD -+ DEBUG("Added native rule for arch %d for %s action %d(%s)", -+#else -+ INFO("Added native rule for arch %d for %s action %d(%s)", -+#endif -+ SCMP_ARCH_NATIVE, line, rule.action, -+ get_action_name(rule.action)); -+ -+ if (ctx.architectures[0] != SCMP_ARCH_NATIVE) { -+ if (!do_resolve_add_rule(ctx.architectures[0], line, -+ ctx.contexts[0], &rule)) -+ goto bad_rule; -+ -+#ifdef HAVE_ISULAD -+ DEBUG("Added compat rule for arch %d for %s action %d(%s)", -+#else -+ INFO("Added compat rule for arch %d for %s action %d(%s)", -+#endif -+ ctx.architectures[0], line, rule.action, -+ get_action_name(rule.action)); -+ ctx.needs_merge[0] = true; -+ } -+ -+ if (ctx.architectures[1] != SCMP_ARCH_NATIVE) { -+ if (!do_resolve_add_rule(ctx.architectures[1], line, -+ ctx.contexts[1], &rule)) -+ goto bad_rule; -+ -+#ifdef HAVE_ISULAD -+ DEBUG("Added compat rule for arch %d for %s action %d(%s)", -+#else -+ INFO("Added compat rule for arch %d for %s action %d(%s)", -+#endif -+ ctx.architectures[1], line, rule.action, -+ get_action_name(rule.action)); -+ ctx.needs_merge[1] = true; -+ } -+ -+ if (ctx.architectures[2] != SCMP_ARCH_NATIVE) { -+ if (!do_resolve_add_rule(ctx.architectures[2], line, -+ ctx.contexts[2], &rule)) -+ goto bad_rule; -+ -+#ifdef HAVE_ISULAD -+ DEBUG("Added native rule for arch %d for %s action %d(%s)", -+#else -+ INFO("Added native rule for arch %d for %s action %d(%s)", -+#endif -+ ctx.architectures[2], line, rule.action, -+ get_action_name(rule.action)); -+ ctx.needs_merge[2] = true; -+ } -+ } -+ -+ } -+ -+ INFO("Merging compat seccomp contexts into main context"); -+ if (ctx.contexts[0]) { -+ if (ctx.needs_merge[0]) { -+ ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[0]); -+ if (ret < 0) { -+ ERROR("%s - Failed to merge first compat seccomp " -+ "context into main context", strerror(-ret)); -+ goto bad; -+ } -+ -+ TRACE("Merged first compat seccomp context into main context"); -+ } else { -+ seccomp_release(ctx.contexts[0]); -+ ctx.contexts[0] = NULL; -+ } -+ } -+ -+ if (ctx.contexts[1]) { -+ if (ctx.needs_merge[1]) { -+ ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[1]); -+ if (ret < 0) { -+ ERROR("%s - Failed to merge second compat seccomp " -+ "context into main context", strerror(-ret)); -+ goto bad; -+ } -+ -+ TRACE("Merged second compat seccomp context into main context"); -+ } else { -+ seccomp_release(ctx.contexts[1]); -+ ctx.contexts[1] = NULL; -+ } -+ } -+ -+ if (ctx.contexts[2]) { -+ if (ctx.needs_merge[2]) { -+ ret = seccomp_merge(conf->seccomp.seccomp_ctx, ctx.contexts[2]); -+ if (ret < 0) { -+ ERROR("%s - Failed to merge third compat seccomp " -+ "context into main context", strerror(-ret)); -+ goto bad; -+ } -+ -+ TRACE("Merged third compat seccomp context into main context"); -+ } else { -+ seccomp_release(ctx.contexts[2]); -+ ctx.contexts[2] = NULL; -+ } -+ } -+ -+ free(line); -+ return 0; -+ -+bad_arch: -+ ERROR("Unsupported architecture \"%s\"", line); -+ -+bad_rule: -+bad: -+ if (ctx.contexts[0]) -+ seccomp_release(ctx.contexts[0]); -+ -+ if (ctx.contexts[1]) -+ seccomp_release(ctx.contexts[1]); -+ -+ if (ctx.contexts[2]) -+ seccomp_release(ctx.contexts[2]); -+ -+ free(line); -+ -+ return -1; -+} -+#else - static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_conf *conf) - { - int ret; -@@ -1067,6 +1645,7 @@ bad: - - return -1; - } -+#endif - #else /* HAVE_DECL_SECCOMP_SYSCALL_RESOLVE_NAME_ARCH */ - static int parse_config_v2(FILE *f, char *line, struct lxc_conf *conf) - { -@@ -1354,7 +1933,6 @@ int seccomp_notify_handler(int fd, uint32_t events, void *data, - char *cookie = conf->seccomp.notifier.cookie; - uint64_t req_id; - -- memset(req, 0, sizeof(*req)); - ret = seccomp_notify_receive(fd, req); - if (ret) { - SYSERROR("Failed to read seccomp notification"); -diff --git a/src/lxc/start.c b/src/lxc/start.c -index fd969c433..51d13254b 100644 ---- a/src/lxc/start.c -+++ b/src/lxc/start.c -@@ -47,7 +47,7 @@ - #include "monitor.h" - #include "namespace.h" - #include "network.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "start.h" - #include "storage/storage.h" - #include "storage/storage_utils.h" -@@ -212,13 +212,6 @@ int lxc_check_inherited(struct lxc_conf *conf, bool closeall, - if (conf && conf->close_all_fds) - closeall = true; - -- /* -- * Disable syslog at this point to avoid the above logging -- * function to open a new fd and make the check_inherited function -- * enter an infinite loop. -- */ -- lxc_log_syslog_disable(); -- - restart: - dir = opendir("/proc/self/fd"); - if (!dir) -@@ -279,24 +272,21 @@ restart: - - #endif - if (closeall) { -- if (close(fd)) -- SYSINFO("Closed inherited fd %d", fd); -- else -- INFO("Closed inherited fd %d", fd); -+ close(fd); - closedir(dir); -+ INFO("Closed inherited fd %d", fd); - goto restart; - } - WARN("Inherited fd %d", fd); - } -- closedir(dir); - -- /* -- * Only enable syslog at this point to avoid the above logging -- * function to open a new fd and make the check_inherited function -- * enter an infinite loop. -+ /* Only enable syslog at this point to avoid the above logging function -+ * to open a new fd and make the check_inherited function enter an -+ * infinite loop. - */ -- lxc_log_syslog_enable(); -+ lxc_log_enable_syslog(); - -+ closedir(dir); /* cannot fail */ - return 0; - } - -@@ -590,13 +580,23 @@ int lxc_poll(const char *name, struct lxc_handler *handler) - - TRACE("Mainloop is ready"); - -+#ifdef HAVE_ISULAD -+ // iSulad: close stdin pipe if we do not want open_stdin with container stdin -+ if (!handler->conf->console.open_stdin) { -+ if (handler->conf->console.pipes[0][1] > 0) { -+ close(handler->conf->console.pipes[0][1]); -+ handler->conf->console.pipes[0][1] = -1; -+ } -+ } -+#endif -+ - ret = lxc_mainloop(&descr, -1); - close_prot_errno_disarm(descr.epfd); - if (ret < 0 || !handler->init_died) - goto out_mainloop_console; - - if (has_console) -- ret = lxc_mainloop(&descr_console, 0); -+ ret = lxc_mainloop(&descr_console, 100); - - out_mainloop_console: - if (has_console) { -@@ -615,7 +615,32 @@ out_sigfd: - return ret; - } - --void lxc_put_handler(struct lxc_handler *handler) -+void lxc_zero_handler(struct lxc_handler *handler) -+{ -+ memset(handler, 0, sizeof(struct lxc_handler)); -+ -+ handler->state = STOPPED; -+ -+ handler->pinfd = -EBADF; -+ -+ handler->pidfd = -EBADF; -+ -+ handler->sigfd = -EBADF; -+ -+ for (int i = 0; i < LXC_NS_MAX; i++) -+ handler->nsfd[i] = -EBADF; -+ -+ handler->data_sock[0] = -EBADF; -+ handler->data_sock[1] = -EBADF; -+ -+ handler->state_socket_pair[0] = -EBADF; -+ handler->state_socket_pair[1] = -EBADF; -+ -+ handler->sync_sock[0] = -EBADF; -+ handler->sync_sock[1] = -EBADF; -+} -+ -+void lxc_free_handler(struct lxc_handler *handler) - { - close_prot_errno_disarm(handler->pinfd); - close_prot_errno_disarm(handler->pidfd); -@@ -627,27 +652,22 @@ void lxc_put_handler(struct lxc_handler *handler) - close_prot_errno_disarm(handler->state_socket_pair[0]); - close_prot_errno_disarm(handler->state_socket_pair[1]); - cgroup_exit(handler->cgroup_ops); -- if (handler->conf && handler->conf->reboot == REBOOT_NONE) -- free_disarm(handler); -- else -- handler->conf = NULL; -+ handler->conf = NULL; -+ free_disarm(handler); - } - --struct lxc_handler *lxc_init_handler(struct lxc_handler *old, -- const char *name, struct lxc_conf *conf, -+struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, - const char *lxcpath, bool daemonize) - { -- int nr_keep_fds = 0; - int ret; - struct lxc_handler *handler; - -- if (!old) -- handler = zalloc(sizeof(*handler)); -- else -- handler = old; -+ handler = malloc(sizeof(*handler)); - if (!handler) - return NULL; - -+ memset(handler, 0, sizeof(*handler)); -+ - /* Note that am_guest_unpriv() checks the effective uid. We - * probably don't care if we are real root only if we are running - * as root so this should be fine. -@@ -671,6 +691,11 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old, - handler->nsfd[i] = -EBADF; - - handler->name = name; -+ -+#ifdef HAVE_ISULAD -+ handler->exit_code = -1; /* isulad: record exit code of container */ -+#endif -+ - if (daemonize) - handler->transient_pid = lxc_raw_getpid(); - else -@@ -691,8 +716,6 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old, - TRACE("Created anonymous pair {%d,%d} of unix sockets", - handler->state_socket_pair[0], - handler->state_socket_pair[1]); -- handler->keep_fds[nr_keep_fds++] = handler->state_socket_pair[0]; -- handler->keep_fds[nr_keep_fds++] = handler->state_socket_pair[1]; - } - - if (handler->conf->reboot == REBOOT_NONE) { -@@ -701,7 +724,6 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old, - ERROR("Failed to set up command socket"); - goto on_error; - } -- handler->keep_fds[nr_keep_fds++] = handler->conf->maincmd_fd; - } - - TRACE("Unix domain socket %d for command server is ready", -@@ -710,7 +732,7 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old, - return handler; - - on_error: -- lxc_put_handler(handler); -+ lxc_free_handler(handler); - - return NULL; - } -@@ -721,6 +743,10 @@ int lxc_init(const char *name, struct lxc_handler *handler) - int ret; - const char *loglevel; - struct lxc_conf *conf = handler->conf; -+#ifdef HAVE_ISULAD -+ conf->console.disable_pty = handler->disable_pty; -+ conf->console.open_stdin = handler->open_stdin; -+#endif - - handler->monitor_pid = lxc_raw_getpid(); - status_fd = open("/proc/self/status", O_RDONLY | O_CLOEXEC); -@@ -810,6 +836,9 @@ int lxc_init(const char *name, struct lxc_handler *handler) - ret = lxc_terminal_setup(conf); - if (ret < 0) { - ERROR("Failed to create console"); -+#ifdef HAVE_ISULAD -+ lxc_write_error_message(conf->errpipe[1], "Failed to create console for container \"%s\".", name); -+#endif - goto out_restore_sigmask; - } - TRACE("Created console"); -@@ -853,6 +882,185 @@ out_restore_sigmask: - return -1; - } - -+#ifdef HAVE_ISULAD -+/* isulad: start timeout thread */ -+typedef enum { -+ START_INIT, -+ START_TIMEOUT, -+ START_MAX, -+} start_timeout_t; -+ -+static start_timeout_t global_timeout_state = START_INIT; -+static sem_t global_timeout_sem; -+ -+struct start_timeout_conf { -+ unsigned int timeout; -+ int errfd; -+}; -+ -+void trim_line(char *s) -+{ -+ size_t len; -+ -+ len = strlen(s); -+ while ((len > 1) && (s[len - 1] == '\n')) -+ s[--len] = '\0'; -+} -+ -+static int _read_procs_file(const char *path, pid_t **pids, size_t *len) -+{ -+ FILE *f; -+ char *line = NULL; -+ size_t sz = 0; -+ pid_t *tmp_pids = NULL; -+ -+ f = fopen_cloexec(path, "r"); -+ if (!f) -+ return -1; -+ -+ while (getline(&line, &sz, f) != -1) { -+ pid_t pid; -+ trim_line(line); -+ pid = (pid_t)atoll(line); -+ if (lxc_mem_realloc((void **)&tmp_pids, sizeof(pid_t) * (*len + 1), *pids, sizeof(pid_t) * (*len)) != 0) { -+ free(*pids); -+ *pids = NULL; -+ ERROR("out of memory"); -+ free(line); -+ fclose(f); -+ return -1; -+ } -+ *pids = tmp_pids; -+ -+ (*pids)[*len] = pid; -+ (*len)++; -+ } -+ -+ free(line); -+ fclose(f); -+ return 0; -+} -+ -+static int _recursive_read_cgroup_procs(const char *dirpath, pid_t **pids, size_t *len) -+{ -+ struct dirent *direntp = NULL; -+ DIR *dir = NULL; -+ int ret, failed = 0; -+ char pathname[PATH_MAX]; -+ -+ dir = opendir(dirpath); -+ if (dir == NULL) { -+ WARN("Failed to open \"%s\"", dirpath); -+ return 0; -+ } -+ -+ while ((direntp = readdir(dir))) { -+ struct stat mystat; -+ int rc; -+ -+ if (!strcmp(direntp->d_name, ".") || -+ !strcmp(direntp->d_name, "..")) -+ continue; -+ -+ rc = snprintf(pathname, PATH_MAX, "%s/%s", dirpath, direntp->d_name); -+ if (rc < 0 || rc >= PATH_MAX) { -+ failed = 1; -+ continue; -+ } -+ -+ if (strcmp(direntp->d_name, "cgroup.procs") == 0) { -+ if (_read_procs_file(pathname, pids, len)) { -+ failed = 1; -+ -+ } -+ continue; -+ } -+ -+ ret = lstat(pathname, &mystat); -+ if (ret) { -+ failed = 1; -+ continue; -+ } -+ -+ if (S_ISDIR(mystat.st_mode)) { -+ if (_recursive_read_cgroup_procs(pathname, pids, len) < 0) -+ failed = 1; -+ } -+ } -+ -+ ret = closedir(dir); -+ if (ret) { -+ WARN("Failed to close directory \"%s\"", dirpath); -+ failed = 1; -+ } -+ -+ return failed ? -1 : 0; -+} -+ -+int get_all_pids(struct cgroup_ops *cg_ops, pid_t **pids, size_t *len) -+{ -+ const char *devices_path = NULL; -+ -+ devices_path = cg_ops->get_cgroup_full_path(cg_ops, "devices"); -+ if (!file_exists(devices_path)) { -+ return 0; -+ } -+ -+ return _recursive_read_cgroup_procs(devices_path, pids, len); -+} -+ -+static int set_cgroup_freezer(struct cgroup_ops *cg_ops, const char *value) -+{ -+ char *fullpath; -+ int ret; -+ -+ fullpath = must_make_path(cg_ops->get_cgroup_full_path(cg_ops, "freezer"), "freezer.state", NULL); -+ ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); -+ free(fullpath); -+ return ret; -+} -+ -+/* isulad: kill all process in container cgroup path */ -+static void signal_all_processes(struct lxc_handler *handler) -+{ -+ int ret; -+ struct cgroup_ops *cg_ops = handler->cgroup_ops; -+ pid_t *pids = NULL; -+ size_t len = 0, i; -+ -+ ret = set_cgroup_freezer(cg_ops, "FROZEN"); -+ if (ret < 0 && errno != ENOENT) { -+ WARN("cgroup_set frozen failed"); -+ } -+ -+ ret = get_all_pids(cg_ops, &pids, &len); -+ if (ret < 0) { -+ WARN("failed to get all pids"); -+ } -+ -+ for (i = 0; i < len; i++) { -+ ret = kill(pids[i], SIGKILL); -+ if (ret < 0 && errno != ESRCH) { -+ WARN("Can not kill process (pid=%d) with SIGKILL for container %s", pids[i], handler->name); -+ } -+ } -+ -+ ret = set_cgroup_freezer(cg_ops, "THAWED"); -+ if (ret < 0 && errno != ENOENT) { -+ WARN("cgroup_set thawed failed"); -+ } -+ -+ for (i = 0; i < len; i++) { -+ ret = lxc_wait_for_pid_status(pids[i]); -+ if (ret < 0 && errno != ECHILD) { -+ WARN("Failed to wait pid %d for container %s: %s", pids[i], handler->name, strerror(errno)); -+ } -+ } -+ -+ free(pids); -+} -+#endif -+ - void lxc_end(struct lxc_handler *handler) - { - int ret; -@@ -926,11 +1134,37 @@ void lxc_end(struct lxc_handler *handler) - - lsm_process_cleanup(handler->conf, handler->lxcpath); - -+#ifdef HAVE_ISULAD -+ // close maincmd fd before destroy cgroup for isulad -+ if (handler->conf->reboot == REBOOT_NONE) { -+ /* For all new state clients simply close the command socket. -+ * This will inform all state clients that the container is -+ * STOPPED and also prevents a race between a open()/close() on -+ * the command socket causing a new process to get ECONNREFUSED -+ * because we haven't yet closed the command socket. -+ */ -+ close_prot_errno_disarm(handler->conf->maincmd_fd); -+ TRACE("Closed command socket"); -+ } -+ int retry_count = 0; -+ int max_retry = 10; -+retry: -+ if (cgroup_ops != NULL && !cgroup_ops->payload_destroy(cgroup_ops, handler)) { -+ TRACE("Trying to kill all subprocess"); -+ signal_all_processes(handler); -+ TRACE("Finished kill all subprocess"); -+ if (retry_count < max_retry) { -+ usleep(100 * 1000); /* 100 millisecond */ -+ retry_count++; -+ goto retry; -+ } -+ SYSERROR("Failed to destroy cgroup path for container: \"%s\"", handler->name); -+ } -+#else - if (cgroup_ops) { - cgroup_ops->payload_destroy(cgroup_ops, handler); - cgroup_ops->monitor_destroy(cgroup_ops, handler); - } -- - if (handler->conf->reboot == REBOOT_NONE) { - /* For all new state clients simply close the command socket. - * This will inform all state clients that the container is -@@ -940,12 +1174,25 @@ void lxc_end(struct lxc_handler *handler) - */ - close_prot_errno_disarm(handler->conf->maincmd_fd); - TRACE("Closed command socket"); -+ } -+#endif - -+ if (handler->conf->reboot == REBOOT_NONE) { - /* This function will try to connect to the legacy lxc-monitord - * state server and only exists for backwards compatibility. - */ - lxc_monitor_send_state(name, STOPPED, handler->lxcpath); - -+#ifdef HAVE_ISULAD -+ /* isuald: write exit code to exit fifo */ -+ if (handler->conf->exit_fd >= 0) { -+ ret = write(handler->conf->exit_fd, &handler->exit_code, sizeof(int)); -+ if (ret != sizeof(int)) { -+ SYSERROR("Failed to write to exit code to exit fifo."); -+ } -+ } -+#endif -+ - /* The command socket is closed so no one can acces the command - * socket anymore so there's no need to lock it. - */ -@@ -1001,7 +1248,7 @@ void lxc_end(struct lxc_handler *handler) - if (handler->conf->ephemeral == 1 && handler->conf->reboot != REBOOT_REQ) - lxc_destroy_container_on_signal(handler, name); - -- lxc_put_handler(handler); -+ lxc_free_handler(handler); - } - - void lxc_abort(struct lxc_handler *handler) -@@ -1032,16 +1279,36 @@ static int do_start(void *data) - struct lxc_handler *handler = data; - __lxc_unused __do_close int data_sock0 = handler->data_sock[0], - data_sock1 = handler->data_sock[1]; -- __do_close int devnull_fd = -EBADF, status_fd = -EBADF; -+ __do_close int status_fd = -EBADF; - int ret; - uid_t new_uid; - gid_t new_gid; - struct lxc_list *iterator; - uid_t nsuid = 0; - gid_t nsgid = 0; -+ int devnull_fd = -1; - - lxc_sync_fini_parent(handler); - -+#ifdef HAVE_ISULAD -+ sigset_t mask; -+ -+ /*isulad: restore default signal handlers and unblock all signals*/ -+ for (int i = 1; i < NSIG; i++) -+ signal(i, SIG_DFL); -+ -+ ret = sigfillset(&mask); -+ if (ret < 0) { -+ SYSERROR("Failed to fill signal mask"); -+ goto out_warn_father; -+ } -+ ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); -+ if (ret < 0) { -+ SYSERROR("Failed to set signal mask"); -+ goto out_warn_father; -+ } -+#endif -+ - if (lxc_abstract_unix_recv_fds(data_sock1, &status_fd, 1, NULL, 0) < 0) { - ERROR("Failed to receive status file descriptor to child process"); - goto out_warn_father; -@@ -1155,7 +1422,11 @@ static int do_start(void *data) - * means that migration won't work, but at least we won't spew output - * where it isn't wanted. - */ -+#ifdef HAVE_ISULAD -+ if (!handler->disable_pty && handler->daemonize && !handler->conf->autodev) { -+#else - if (handler->daemonize && !handler->conf->autodev) { -+#endif - char path[PATH_MAX]; - - ret = snprintf(path, sizeof(path), "%s/dev/null", -@@ -1221,6 +1492,9 @@ static int do_start(void *data) - /* Setup the container, ip, names, utsname, ... */ - ret = lxc_setup(handler); - if (ret < 0) { -+#ifdef HAVE_ISULAD -+ lxc_write_error_message(handler->conf->errpipe[1], "Failed to setup lxc, please check the config file."); -+#endif - ERROR("Failed to setup container \"%s\"", handler->name); - goto out_warn_father; - } -@@ -1243,23 +1517,82 @@ static int do_start(void *data) - DEBUG("Set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges"); - } - -+#ifdef HAVE_ISULAD -+ /* isulad: dup2 pipe[0][0] to container stdin, pipe[1][1] to container stdout, pipe[2][1] to container stderr */ -+ if (handler->disable_pty) { -+ if (handler->conf->console.pipes[0][1] >= 0) { -+ close(handler->conf->console.pipes[0][1]); -+ handler->conf->console.pipes[0][1] = -1; -+ } -+ -+ if (handler->conf->console.pipes[0][0] >= 0) { -+ ret = dup2(handler->conf->console.pipes[0][0], STDIN_FILENO); -+ if (ret < 0) -+ goto out_warn_father; -+ } -+ -+ if (handler->conf->console.pipes[1][0] >= 0) { -+ close(handler->conf->console.pipes[1][0]); -+ handler->conf->console.pipes[1][0] = -1; -+ } -+ -+ if (handler->conf->console.pipes[1][1] >= 0) { -+ ret = dup2(handler->conf->console.pipes[1][1], STDOUT_FILENO); -+ if (ret < 0) -+ goto out_warn_father; -+ } -+ if (handler->conf->console.pipes[2][0] >= 0) { -+ close(handler->conf->console.pipes[2][0]); -+ handler->conf->console.pipes[2][0] = -1; -+ } -+ -+ if (handler->conf->console.pipes[2][1] >= 0) { -+ ret = dup2(handler->conf->console.pipes[2][1], STDERR_FILENO); -+ if (ret < 0) -+ goto out_warn_father; -+ } -+ } -+#endif -+ - /* Some init's such as busybox will set sane tty settings on stdin, - * stdout, stderr which it thinks is the console. We already set them - * the way we wanted on the real terminal, and we want init to do its - * setup on its console ie. the pty allocated in lxc_terminal_setup() so - * make sure that that pty is stdin,stdout,stderr. - */ -- if (handler->conf->console.pts >= 0) { -+ setsid(); -+#ifdef HAVE_ISULAD -+ if (!handler->disable_pty && handler->conf->console.slave >= 0) { -+ /* isulad:make the given terminal as controlling terminal to avoid warning -+ * sh: cannot set terminal process group (-1): Inappropriate ioctl for device -+ * sh: no job control in this shell */ -+ if (ioctl(handler->conf->console.slave, TIOCSCTTY, NULL) < 0) { -+ ERROR("Faild to make the given terminal the controlling terminal of the calling process"); -+ goto out_warn_father; -+ } -+ if (handler->daemonize || !handler->conf->is_execute) -+ ret = set_stdfds(handler->conf->console.slave); -+ else -+ ret = lxc_terminal_set_stdfds(handler->conf->console.slave); -+ if (ret < 0) { -+ ERROR("Failed to redirect std{in,out,err} to pty file " -+ "descriptor %d", handler->conf->console.slave); -+ goto out_warn_father; -+ } -+ } -+#else -+ if (handler->conf->console.slave >= 0) { - if (handler->daemonize || !handler->conf->is_execute) -- ret = set_stdfds(handler->conf->console.pts); -+ ret = set_stdfds(handler->conf->console.slave); - else -- ret = lxc_terminal_set_stdfds(handler->conf->console.pts); -+ ret = lxc_terminal_set_stdfds(handler->conf->console.slave); - if (ret < 0) { - ERROR("Failed to redirect std{in,out,err} to pty file descriptor %d", -- handler->conf->console.pts); -+ handler->conf->console.slave); - goto out_warn_father; - } - } -+#endif - - /* If we mounted a temporary proc, then unmount it now. */ - tmp_proc_unmount(handler->conf); -@@ -1283,7 +1616,8 @@ static int do_start(void *data) - - close_prot_errno_disarm(handler->sigfd); - -- if (handler->conf->console.pts < 0 && handler->daemonize) { -+ #ifdef HAVE_ISULAD -+ if (!handler->disable_pty && handler->conf->console.slave < 0 && handler->daemonize) { - if (devnull_fd < 0) { - devnull_fd = open_devnull(); - if (devnull_fd < 0) -@@ -1296,12 +1630,35 @@ static int do_start(void *data) - goto out_warn_father; - } - } -+ #else -+ if (handler->conf->console.slave < 0 && handler->daemonize) { -+ if (devnull_fd < 0) { -+ devnull_fd = open_devnull(); -+ if (devnull_fd < 0) -+ goto out_warn_father; -+ } - -- close_prot_errno_disarm(devnull_fd); -+ ret = set_stdfds(devnull_fd); -+ if (ret < 0) { -+ ERROR("Failed to redirect std{in,out,err} to \"/dev/null\""); -+ goto out_warn_father; -+ } -+ } -+ #endif - -- setsid(); -+ close_prot_errno_disarm(devnull_fd); - - if (handler->conf->init_cwd) { -+#ifdef HAVE_ISULAD -+ /* try to craete workdir if not exist */ -+ struct stat st; -+ if (stat(handler->conf->init_cwd, &st) < 0 && mkdir_p(handler->conf->init_cwd, 0755) < 0) { -+ SYSERROR("Try to create directory \"%s\" as workdir failed", handler->conf->init_cwd); -+ lxc_write_error_message(handler->conf->errpipe[1], "%s:%d: Failed to create workdir: %s.", -+ __FILE__, __LINE__, strerror(errno)); -+ goto out_warn_father; -+ } -+#endif - ret = chdir(handler->conf->init_cwd); - if (ret < 0) { - SYSERROR("Could not change directory to \"%s\"", -@@ -1345,6 +1702,13 @@ static int do_start(void *data) - } - } - -+#ifdef HAVE_ISULAD -+ if (prctl(PR_SET_KEEPCAPS, 1) < 0) { -+ SYSERROR("Failed to keep permitted capabilities"); -+ goto out_warn_father; -+ } -+#endif -+ - /* The container has been setup. We can now switch to an unprivileged - * uid/gid. - */ -@@ -1358,6 +1722,13 @@ static int do_start(void *data) - if (new_gid == nsgid) - new_gid = LXC_INVALID_GID; - -+#ifdef HAVE_ISULAD -+ // isulad: set env home in container -+ if (lxc_setup_env_home(new_uid) < 0) { -+ goto out_warn_father; -+ } -+#endif -+ - /* Make sure that the processes STDIO is correctly owned by the user that we are switching to */ - ret = fix_stdio_permissions(new_uid); - if (ret) -@@ -1371,8 +1742,16 @@ static int do_start(void *data) - #if HAVE_LIBCAP - if (lxc_proc_cap_is_set(CAP_SETGID, CAP_EFFECTIVE)) - #endif -+ #ifdef HAVE_ISULAD -+ /* isulad: set groups for init process, and before we set uid and gid */ -+ if (!lxc_setgroups(handler->conf->init_groups_len, handler->conf->init_groups)) { -+ ERROR("Can not set groups"); -+ goto out_warn_father; -+ } -+ #else - if (!lxc_setgroups(0, NULL)) - goto out_warn_father; -+ #endif - - if (!lxc_switch_uid_gid(new_uid, new_gid)) - goto out_warn_father; -@@ -1383,6 +1762,19 @@ static int do_start(void *data) - goto out_warn_father; - } - -+#ifdef HAVE_ISULAD -+ /* isulad: drop the cap of current process */ -+ if (prctl(PR_SET_KEEPCAPS, 0) < 0) { -+ SYSERROR("Failed to clear permitted capabilities"); -+ goto out_warn_father; -+ } -+ -+ if (lxc_drop_caps(handler->conf)) { -+ SYSERROR("Failed to drop caps"); -+ goto out_warn_father; -+ } -+#endif -+ - if (handler->conf->monitor_signal_pdeath != SIGKILL) { - ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath, - handler->monitor_pid, status_fd); -@@ -1393,20 +1785,25 @@ static int do_start(void *data) - } - } - -- /* -- * After this call, we are in error because this ops should not return -+ /* After this call, we are in error because this ops should not return - * as it execs. - */ -+#ifdef HAVE_ISULAD -+ close_prot_errno_disarm(status_fd); -+ handler->ops->start(handler, handler->data, handler->daemonize ? handler->conf->errpipe[1] : -1); -+#else - handler->ops->start(handler, handler->data); -+#endif - - out_warn_father: -- /* -- * We want the parent to know something went wrong, so we return a -+ /* We want the parent to know something went wrong, so we return a - * special error code. - */ - lxc_sync_wake_parent(handler, LXC_SYNC_ERROR); - - out_error: -+ close_prot_errno_disarm(devnull_fd); -+ - return -1; - } - -@@ -1435,9 +1832,9 @@ static int lxc_recv_ttys_from_child(struct lxc_handler *handler) - - tty = &ttys->tty[i]; - tty->busy = -1; -- tty->ptmx = ttyfds[0]; -- tty->pts = ttyfds[1]; -- TRACE("Received pty with ptmx fd %d and pts fd %d from child", tty->ptmx, tty->pts); -+ tty->master = ttyfds[0]; -+ tty->slave = ttyfds[1]; -+ TRACE("Received pty with master fd %d and slave fd %d from child", tty->master, tty->slave); - } - - if (ret < 0) -@@ -1529,6 +1926,94 @@ static inline int do_share_ns(void *arg) - return 0; - } - -+#ifdef HAVE_ISULAD -+static int lxc_write_container_info(char *filename, pid_t pid, pid_t p_pid, -+ unsigned long long start_at, unsigned long long p_start_at) -+{ -+ FILE *pid_fp = NULL; -+ int ret = 0; -+ -+ pid_fp = lxc_fopen(filename, "w"); -+ if (pid_fp == NULL) { -+ SYSERROR("Failed to create pidfile '%s'",filename); -+ ret = -1; -+ goto out; -+ } -+ -+ if (fprintf(pid_fp, "%d %llu %d %llu\n", pid, start_at, p_pid, p_start_at) < 0) { -+ SYSERROR("Failed to write '%s'", filename); -+ ret = -1; -+ goto out; -+ } -+out: -+ if (pid_fp) -+ fclose(pid_fp); -+ pid_fp = NULL; -+ return ret; -+} -+ -+static int lxc_check_container_info(char *filename, pid_t pid, pid_t p_pid, -+ unsigned long long start_at, unsigned long long p_start_at) -+{ -+ int ret = 0; -+ int num; -+ char sbuf[1024] = {0}; /* bufs for stat */ -+ int saved_pid; /* process id */ -+ int saved_ppid; /* pid of parent process */ -+ unsigned long long saved_start_time; /* start time of process -- seconds since 1-1-70 */ -+ unsigned long long saved_pstart_time; /* start time of parent process -- seconds since 1-1-70 */ -+ -+ if ((lxc_file2str(filename, sbuf, sizeof(sbuf))) == -1) { -+ SYSERROR("Failed to read pidfile %s", filename); -+ ret = -1; -+ goto out; -+ } -+ -+ num = sscanf(sbuf, "%d %Lu %d %Lu", &saved_pid, &saved_start_time, &saved_ppid, &saved_pstart_time); -+ if (num != 4) { -+ SYSERROR("Call sscanf error"); -+ ret = -1; -+ goto out; -+ } -+ -+ if (pid != saved_pid || p_pid != saved_ppid -+ || start_at != saved_start_time || p_start_at != saved_pstart_time) { -+ ERROR("Check container info failed"); -+ ret = -1; -+ goto out; -+ } -+ -+out: -+ return ret; -+} -+ -+/* isuald: save pid/ppid info */ -+static int lxc_save_container_info(char *filename, pid_t pid) -+{ -+ int ret = 0; -+ pid_t p_pid = 0; -+ unsigned long long start_at = 0; -+ unsigned long long p_start_at = 0; -+ -+ start_at = lxc_get_process_startat(pid); -+ p_pid = getpid(); -+ p_start_at = lxc_get_process_startat(p_pid); -+ -+ ret = lxc_write_container_info(filename, pid, p_pid, start_at, p_start_at); -+ if (ret != 0) { -+ goto out; -+ } -+ -+ ret = lxc_check_container_info(filename, pid, p_pid, start_at, p_start_at); -+ if (ret != 0) { -+ goto out; -+ } -+ -+out: -+ return ret; -+} -+#endif -+ - /* lxc_spawn() performs crucial setup tasks and clone()s the new process which - * exec()s the requested container binary. - * Note that lxc_spawn() runs in the parent namespaces. Any operations performed -@@ -1640,6 +2125,32 @@ static int lxc_spawn(struct lxc_handler *handler) - } - TRACE("Cloned child process %d", handler->pid); - -+#ifdef HAVE_ISULAD -+ /* isulad: close pipe after clone */ -+ if (handler->conf->console.pipes[0][0] >= 0) { -+ close(handler->conf->console.pipes[0][0]); -+ handler->conf->console.pipes[0][0] = -1; -+ } -+ -+ if (handler->conf->console.pipes[1][1] >= 0) { -+ close(handler->conf->console.pipes[1][1]); -+ handler->conf->console.pipes[1][1] = -1; -+ } -+ -+ if (handler->conf->console.pipes[2][1] >= 0) { -+ close(handler->conf->console.pipes[2][1]); -+ handler->conf->console.pipes[2][1] = -1; -+ } -+ -+ /* isulad: save pid/ppid info into file*/ -+ if (handler->conf->container_info_file) { -+ if (lxc_save_container_info(handler->conf->container_info_file, handler->pid)) { -+ ERROR("Failed to save cloned container pid"); -+ goto out_delete_net; -+ } -+ } -+#endif -+ - /* Verify that we can actually make use of pidfds. */ - if (!lxc_can_use_pidfd(handler->pidfd)) - close_prot_errno_disarm(handler->pidfd); -@@ -1652,6 +2163,13 @@ static int lxc_spawn(struct lxc_handler *handler) - if (ret < 0) - SYSERROR("Failed to set environment variable: LXC_PID=%s", pidstr); - -+#ifdef HAVE_ISULAD -+ if (handler->cgroup_ops->container_cgroup) { -+ if (setenv("LXC_CGROUP_PATH", handler->cgroup_ops->container_cgroup, 1)) -+ SYSERROR("Failed to set environment variable: LXC_CGROUP_PATH=%s.", handler->cgroup_ops->container_cgroup); -+ } -+#endif -+ - for (i = 0; i < LXC_NS_MAX; i++) - if (handler->ns_on_clone_flags & ns_info[i].clone_flag) - INFO("Cloned %s", ns_info[i].flag_name); -@@ -1765,7 +2283,11 @@ static int lxc_spawn(struct lxc_handler *handler) - goto out_delete_net; - - if (!lxc_list_empty(&conf->limits)) { -+#ifdef HAVE_ISULAD -+ ret = setup_resource_limits(&conf->limits, handler->pid, conf->errpipe[1]); -+#else - ret = setup_resource_limits(&conf->limits, handler->pid); -+#endif - if (ret < 0) { - ERROR("Failed to setup resource limits"); - goto out_delete_net; -@@ -1776,12 +2298,7 @@ static int lxc_spawn(struct lxc_handler *handler) - if (ret < 0) - goto out_delete_net; - -- /* -- * with isolation the limiting devices cgroup was already setup, so -- * only setup devices here if we have no namespace directory -- */ -- if (!handler->conf->cgroup_meta.namespace_dir && -- !cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, true)) { -+ if (!cgroup_ops->setup_limits_legacy(cgroup_ops, handler->conf, true)) { - ERROR("Failed to setup legacy device cgroup controller limits"); - goto out_delete_net; - } -@@ -1816,6 +2333,26 @@ static int lxc_spawn(struct lxc_handler *handler) - ERROR("Failed to run lxc.hook.start-host"); - goto out_delete_net; - } -+#ifdef HAVE_ISULAD -+ /* isulad: Run oci prestart hook at here */ -+ ret = run_oci_hooks(name, "oci-prestart", conf, lxcpath); -+ if (ret < 0) { -+ ERROR("Failed to run oci prestart hooks"); -+ goto out_delete_net; -+ } -+ -+ if (START_TIMEOUT == global_timeout_state) { -+ lxc_write_error_message(conf->errpipe[1], "Starting the container \"%s\" timeout.", name); -+ ERROR("Starting the container \"%s\" timeout.", name); -+ goto out_delete_net; -+ } -+ -+ /* Tell the child to continue its initialization. We'll get -+ * LXC_SYNC_POST_OCI_PRESTART_HOOK when it is ready for us to run oci prestart hooks. -+ */ -+ if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_OCI_PRESTART_HOOK)) -+ goto out_delete_net; -+#endif - - /* Tell the child to complete its initialization and wait for it to exec - * or return an error. (The child will never return -@@ -1859,6 +2396,22 @@ static int lxc_spawn(struct lxc_handler *handler) - if (ret < 0) - goto out_abort; - -+#ifdef HAVE_ISULAD -+ /* isulad: Run oci prestart hook at here */ -+ ret = run_oci_hooks(name, "oci-poststart", conf, lxcpath); -+ if (ret < 0) { -+ ERROR("Failed to run oci poststart hooks"); -+ goto out_abort; -+ } -+ -+ if (START_TIMEOUT == global_timeout_state) { -+ lxc_write_error_message(conf->errpipe[1], "Starting the container \"%s\" timeout.", name); -+ ERROR("Starting the container \"%s\" timeout.", name); -+ goto out_abort; -+ } -+ -+#endif -+ - ret = lxc_set_state(name, handler, RUNNING); - if (ret < 0) { - ERROR("Failed to set state to \"%s\"", lxc_state2str(RUNNING)); -@@ -1883,9 +2436,82 @@ out_sync_fini: - return -1; - } - -+#ifdef HAVE_ISULAD -+/* isulad: start timeout thread function */ -+static void* wait_start_timeout(void *arg) -+{ -+ struct start_timeout_conf *conf = (struct start_timeout_conf *)arg; -+ -+ sem_post(&global_timeout_sem); -+ -+ if (!conf || conf->timeout < 1) -+ goto out; -+ -+ sleep(conf->timeout); -+ -+ global_timeout_state = START_TIMEOUT; -+ -+out: -+ free(conf); -+ return ((void *)0); -+} -+ -+/* isulad: create start timeout thread */ -+static int create_start_timeout_thread(struct lxc_conf *conf, unsigned int start_timeout) -+{ -+ int ret = 0; -+ pthread_t ptid; -+ pthread_attr_t attr; -+ struct start_timeout_conf *timeout_conf = NULL; -+ -+ if (sem_init(&global_timeout_sem, 0, 0)) { -+ ERROR("Failed to init start timeout semaphore");/*lint !e613*/ -+ ret = -1; -+ return ret; -+ } -+ -+ timeout_conf = malloc(sizeof(struct start_timeout_conf)); -+ if (timeout_conf == NULL) { -+ ERROR("Failed to malloc start timeout conf"); -+ ret = -1; -+ goto out; -+ } -+ -+ memset(timeout_conf, 0, sizeof(struct start_timeout_conf)); -+ timeout_conf->errfd = conf->errpipe[1]; -+ timeout_conf->timeout = start_timeout; -+ -+ pthread_attr_init(&attr); -+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); -+ ret = pthread_create(&ptid, &attr, wait_start_timeout, timeout_conf); -+ if (ret != 0) { -+ ERROR("Create start wait timeout thread failed"); -+ free(timeout_conf); -+ goto out; -+ } -+ -+ sem_wait(&global_timeout_sem); -+out: -+ sem_destroy(&global_timeout_sem); -+ return ret; -+} -+ -+// isulad: send '128 + signal' if container is killed by signal. -+#define EXIT_SIGNAL_OFFSET 128 -+#endif -+ -+ -+#ifdef HAVE_ISULAD -+int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, -+ void *data, const char *lxcpath, bool daemonize, int *error_num, -+ unsigned int start_timeout) -+{ -+ int exit_code; -+#else - int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, - void *data, const char *lxcpath, bool daemonize, int *error_num) - { -+#endif - int ret, status; - const char *name = handler->name; - struct lxc_conf *conf = handler->conf; -@@ -1901,6 +2527,16 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, - handler->daemonize = daemonize; - cgroup_ops = handler->cgroup_ops; - -+#ifdef HAVE_ISULAD -+ /* isulad: add start timeout limit */ -+ if (start_timeout > 0) { -+ ret = create_start_timeout_thread(conf, start_timeout); -+ if (ret) { -+ ERROR("Failed to create start timeout thread for container \"%s\".", name); -+ goto out_abort; -+ } -+ } -+#endif - if (!attach_block_device(handler->conf)) { - ERROR("Failed to attach block device"); - ret = -1; -@@ -1935,7 +2571,7 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, - } - INFO("Unshared CLONE_NEWNS"); - -- turn_into_dependent_mounts(); -+ remount_all_slave(); - ret = lxc_setup_rootfs_prepare_root(conf, name, lxcpath); - if (ret < 0) { - ERROR("Error setting up rootfs mount as root before spawn"); -@@ -1959,11 +2595,13 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, - goto out_delete_network; - } - -+#ifndef HAVE_ISULAD - if (!handler->init_died && handler->pid > 0) { - ERROR("Child process is not killed"); - ret = -1; - goto out_delete_network; - } -+#endif - - status = lxc_wait_for_pid_status(handler->pid); - if (status < 0) -@@ -1973,6 +2611,21 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, - * reboot. This should mean it was an lxc-execute which simply exited. - * In any case, treat it as a 'halt'. - */ -+#ifdef HAVE_ISULAD -+ // isulad: recored log for container init exit -+ if (WIFSIGNALED(status)) { -+ int signal = WTERMSIG(status); -+ signal = WTERMSIG(status); -+ exit_code = EXIT_SIGNAL_OFFSET + signal; -+ ERROR("Container \"%s\" init exited with signal %d", name, signal); -+ } else if (WIFEXITED(status)) { -+ exit_code = WEXITSTATUS(status); -+ ERROR("Container \"%s\" init exited with status %d", name, exit_code); -+ } else { -+ exit_code = -1; -+ ERROR("Container \"%s\" init exited with unknown status", name); -+ } -+#else - if (WIFSIGNALED(status)) { - switch(WTERMSIG(status)) { - case SIGINT: /* halt */ -@@ -1990,6 +2643,7 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, - break; - } - } -+#endif - - ret = lxc_restore_phys_nics_to_netns(handler); - if (ret < 0) -@@ -1997,11 +2651,20 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops, - - close_prot_errno_disarm(handler->pinfd); - -+#ifdef HAVE_ISULAD -+ lxc_monitor_send_exit_code(name, exit_code, handler->lxcpath); -+#else - lxc_monitor_send_exit_code(name, status, handler->lxcpath); -+#endif -+ - lxc_error_set_and_log(handler->pid, status); - if (error_num) - *error_num = handler->exit_status; - -+#ifdef HAVE_ISULAD -+ handler->exit_code = exit_code; /* record exit code */ -+#endif -+ - /* These are not the droids you are looking for. */ - __private_goto1: - lxc_delete_network(handler); -@@ -2032,7 +2695,11 @@ struct start_args { - char *const *argv; - }; - -+#ifdef HAVE_ISULAD -+static int start(struct lxc_handler *handler, void* data, int fd) -+#else - static int start(struct lxc_handler *handler, void* data) -+#endif - { - struct start_args *arg = data; - -@@ -2040,6 +2707,9 @@ static int start(struct lxc_handler *handler, void* data) - - execvp(arg->argv[0], arg->argv); - SYSERROR("Failed to exec \"%s\"", arg->argv[0]); -+#ifdef HAVE_ISULAD -+ lxc_write_error_message(fd, "exec: \"%s\": %s.", arg->argv[0], strerror(errno)); -+#endif - return 0; - } - -@@ -2057,14 +2727,18 @@ static struct lxc_operations start_ops = { - }; - - int lxc_start(char *const argv[], struct lxc_handler *handler, -- const char *lxcpath, bool daemonize, int *error_num) -+ const char *lxcpath, bool daemonize, int *error_num, unsigned int start_timeout) - { - struct start_args start_arg = { - .argv = argv, - }; - - TRACE("Doing lxc_start"); -+#ifdef HAVE_ISULAD -+ return __lxc_start(handler, &start_ops, &start_arg, lxcpath, daemonize, error_num, start_timeout); -+#else - return __lxc_start(handler, &start_ops, &start_arg, lxcpath, daemonize, error_num); -+#endif - } - - static void lxc_destroy_container_on_signal(struct lxc_handler *handler, -@@ -2136,3 +2810,261 @@ static bool do_destroy_container(struct lxc_handler *handler) - - return storage_destroy(handler->conf); - } -+ -+#ifdef HAVE_ISULAD -+/*isulad: set env for clean resources */ -+static int clean_resource_set_env(struct lxc_handler *handler) -+{ -+ const char *name = handler->name; -+ struct lxc_conf *conf = handler->conf; -+ char bufstr[PATH_MAX + 1]; -+ int i = 0; -+ int j = 0; -+ int len = 2; //set "LXC_PID" and "LXC_CGNS_AWARE" -+ -+ if (conf == NULL || conf->ocihooks == NULL || conf->ocihooks->poststop_len == 0) { -+ return 0; -+ } -+ -+ if (name) { -+ len++; -+ } -+ if (conf->rcfile) { -+ len++; -+ } -+ if (conf->rootfs.mount) { -+ len++; -+ } -+ if (conf->rootfs.path) { -+ len++; -+ } -+ if (conf->console.path) { -+ len++; -+ } -+ if (conf->console.log_path) { -+ len++; -+ } -+ if (handler->cgroup_ops->container_cgroup) { -+ len++; -+ } -+ -+ for (; i < conf->ocihooks->poststop_len; i++) { -+ size_t cap = conf->ocihooks->poststop[i]->env_len; -+ size_t newcap = cap + len + 1; -+ if (lxc_grow_array((void ***)&(conf->ocihooks->poststop[i]->env), &cap, newcap, 1) != 0) { -+ return -1; -+ } -+ j = conf->ocihooks->poststop[i]->env_len; -+ /* Start of environment variable setup for hooks. */ -+ if (name) { -+ snprintf(bufstr, PATH_MAX + 1, "LXC_NAME=%s", name); -+ conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); -+ } -+ if (conf->rcfile) { -+ snprintf(bufstr, PATH_MAX + 1, "LXC_CONFIG_FILE=%s", conf->rcfile); -+ conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); -+ } -+ if (conf->rootfs.mount) { -+ snprintf(bufstr, PATH_MAX + 1, "LXC_ROOTFS_MOUNT=%s", conf->rootfs.mount); -+ conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); -+ } -+ if (conf->rootfs.path) { -+ snprintf(bufstr, PATH_MAX + 1, "LXC_ROOTFS_PATH=%s", conf->rootfs.path); -+ conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); -+ } -+ if (conf->console.path) { -+ snprintf(bufstr, PATH_MAX + 1, "LXC_CONSOLE=%s", conf->console.path); -+ conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); -+ } -+ if (conf->console.log_path) { -+ snprintf(bufstr, PATH_MAX + 1, "LXC_CONSOLE_LOGPATH=%s", conf->console.log_path); -+ conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); -+ } -+ conf->ocihooks->poststop[i]->env[j++] = safe_strdup("LXC_CGNS_AWARE=1"); -+ -+ snprintf(bufstr, PATH_MAX + 1, "LXC_PID=%d", handler->pid); -+ conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); -+ if (handler->cgroup_ops->container_cgroup) { -+ snprintf(bufstr, PATH_MAX + 1, "LXC_CGROUP_PATH=%s", handler->cgroup_ops->container_cgroup); -+ conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr); -+ } -+ conf->ocihooks->poststop[i]->env_len = j; -+ /* End of environment variable setup for hooks. */ -+ } -+ return 0; -+} -+ -+/*isulad: init handler for clean */ -+static struct lxc_handler *lxc_init_clean_handler(char *name, char *lxcpath, struct lxc_conf *conf, pid_t pid) -+{ -+ int i; -+ struct lxc_handler *handler; -+ -+ handler = malloc(sizeof(*handler)); -+ if (handler == NULL) -+ return NULL; -+ -+ memset(handler, 0, sizeof(*handler)); -+ -+ /* Note that am_guest_unpriv() checks the effective uid. We -+ * probably don't care if we are real root only if we are running -+ * as root so this should be fine. -+ */ -+ handler->am_root = !am_guest_unpriv(); -+ handler->data_sock[0] = handler->data_sock[1] = -1; -+ handler->conf = conf; -+ handler->lxcpath = lxcpath; -+ handler->pinfd = -1; -+ handler->sigfd = -EBADF; -+ handler->pidfd = -EBADF; -+ handler->init_died = false; -+ handler->monitor_status_fd = -EBADF; -+ handler->pid = pid; -+ handler->state_socket_pair[0] = handler->state_socket_pair[1] = -1; -+ if (handler->conf->reboot == REBOOT_NONE) -+ lxc_list_init(&handler->conf->state_clients); -+ -+ for (i = 0; i < LXC_NS_MAX; i++) -+ handler->nsfd[i] = -1; -+ -+ handler->name = name; -+ handler->exit_code = -1; /* isulad: record exit code of container */ -+ -+ handler->cgroup_ops = cgroup_init(conf); -+ if (!handler->cgroup_ops) { -+ ERROR("Failed to initialize cgroup driver"); -+ goto on_error; -+ } -+ -+ INFO("Container \"%s\" 's clean handler is initialized.", name); -+ -+ return handler; -+ -+on_error: -+ lxc_free_handler(handler); -+ -+ return NULL; -+} -+ -+/*isulad: init handler for clean */ -+static struct lxc_handler *lxc_init_pids_handler(char *name, char *lxcpath, struct lxc_conf *conf) -+{ -+ int i; -+ struct lxc_handler *handler; -+ -+ handler = malloc(sizeof(*handler)); -+ if (handler == NULL) -+ return NULL; -+ -+ memset(handler, 0, sizeof(*handler)); -+ -+ /* Note that am_guest_unpriv() checks the effective uid. We -+ * probably don't care if we are real root only if we are running -+ * as root so this should be fine. -+ */ -+ handler->am_root = !am_guest_unpriv(); -+ handler->data_sock[0] = handler->data_sock[1] = -1; -+ handler->conf = conf; -+ handler->lxcpath = lxcpath; -+ handler->pinfd = -1; -+ handler->sigfd = -EBADF; -+ handler->init_died = false; -+ handler->state_socket_pair[0] = handler->state_socket_pair[1] = -1; -+ handler->monitor_status_fd = -EBADF; -+ handler->pidfd = -EBADF; -+ if (handler->conf->reboot == REBOOT_NONE) -+ lxc_list_init(&handler->conf->state_clients); -+ -+ for (i = 0; i < LXC_NS_MAX; i++) -+ handler->nsfd[i] = -1; -+ -+ handler->name = name; -+ handler->exit_code = -1; /* isulad: record exit code of container */ -+ -+ handler->cgroup_ops = cgroup_init(conf); -+ if (!handler->cgroup_ops) { -+ ERROR("Failed to initialize cgroup driver"); -+ goto on_error; -+ } -+ -+ INFO("Container \"%s\" 's clean handler is initialized.", name); -+ -+ return handler; -+ -+on_error: -+ lxc_free_handler(handler); -+ -+ return NULL; -+} -+ -+/*isulad: do_lxcapi_clean_resource */ -+int do_lxcapi_clean_resource(char *name, char *lxcpath, struct lxc_conf *conf, pid_t pid) -+{ -+ int ret = 0; -+ struct lxc_handler *handler = NULL; -+ int retry_count = 0; -+ int max_retry = 10; -+ -+ handler = lxc_init_clean_handler(name, lxcpath, conf, pid); -+ if (!handler) { -+ ERROR("Failed to init container %s clean handler", name); -+ ret = -1; -+ goto out; -+ } -+ -+ if (clean_resource_set_env(handler) != 0) { -+ ERROR("Failed to set env for poststop hooks"); -+ ret = -1; -+ goto out; -+ } -+ -+ if (run_oci_hooks(handler->name, "oci-poststop", handler->conf, handler->lxcpath)) { -+ ERROR("Failed to run lxc.hook.post-stop for container \"%s\".", handler->name); -+ ret = -1; -+ } -+ -+retry: -+ if (!handler->cgroup_ops->payload_destroy(handler->cgroup_ops, handler)) { -+ TRACE("Trying to kill all subprocess"); -+ signal_all_processes(handler); -+ TRACE("Finished kill all subprocess"); -+ if (retry_count < max_retry) { -+ usleep(100 * 1000); /* 100 millisecond */ -+ retry_count++; -+ goto retry; -+ } -+ SYSERROR("Failed to destroy cgroup path for container: \"%s\"", handler->name); -+ ret = -1; -+ } -+ -+out: -+ lxc_free_handler(handler); -+ return ret; -+} -+ -+/*isulad: do_lxcapi_get_pids */ -+int do_lxcapi_get_pids(char *name, char *lxcpath, struct lxc_conf *conf, pid_t **pids,size_t *pids_len) -+{ -+ int ret = 0; -+ struct lxc_handler *handler = NULL; -+ struct cgroup_ops *cg_ops = NULL; -+ -+ handler = lxc_init_pids_handler(name, lxcpath, conf); -+ if (!handler) { -+ ERROR("Failed to init container %s clean handler", name); -+ ret = -1; -+ goto out; -+ } -+ -+ cg_ops = handler->cgroup_ops; -+ ret = get_all_pids(cg_ops, pids, pids_len); -+ if (ret < 0) { -+ WARN("failed to get all pids"); -+ } -+ -+out: -+ lxc_free_handler(handler); -+ return ret; -+} -+ -+#endif -diff --git a/src/lxc/start.h b/src/lxc/start.h -index ece4aac47..ebeeb72ea 100644 ---- a/src/lxc/start.h -+++ b/src/lxc/start.h -@@ -10,7 +10,6 @@ - #include - - #include "conf.h" --#include "macro.h" - #include "namespace.h" - #include "state.h" - -@@ -124,8 +123,14 @@ struct lxc_handler { - - struct cgroup_ops *cgroup_ops; - -- /* Internal fds that always need to stay open. */ -- int keep_fds[3]; -+#ifdef HAVE_ISULAD -+ int exit_code;/* isulad: record the exit code of container */ -+ /* Indicates whether should we using pipes or pty dup to std{in,out,err} for console log. */ -+ bool disable_pty; -+ /* Indicates whether should we keep stdin active. */ -+ bool open_stdin; -+#endif -+ - }; - - struct execute_args { -@@ -136,7 +141,11 @@ struct execute_args { - }; - - struct lxc_operations { -+#ifdef HAVE_ISULAD -+ int (*start)(struct lxc_handler *, void *, int); -+#else - int (*start)(struct lxc_handler *, void *); -+#endif - int (*post_start)(struct lxc_handler *, void *); - }; - -@@ -147,11 +156,12 @@ extern int lxc_serve_state_clients(const char *name, - struct lxc_handler *handler, - lxc_state_t state); - extern void lxc_abort(struct lxc_handler *handler); --extern struct lxc_handler *lxc_init_handler(struct lxc_handler *old, -- const char *name, -+extern struct lxc_handler *lxc_init_handler(const char *name, - struct lxc_conf *conf, -- const char *lxcpath, bool daemonize); --extern void lxc_put_handler(struct lxc_handler *handler); -+ const char *lxcpath, -+ bool daemonize); -+extern void lxc_zero_handler(struct lxc_handler *handler); -+extern void lxc_free_handler(struct lxc_handler *handler); - extern int lxc_init(const char *name, struct lxc_handler *handler); - extern void lxc_end(struct lxc_handler *handler); - -@@ -164,14 +174,23 @@ extern void lxc_end(struct lxc_handler *handler); - */ - extern int lxc_check_inherited(struct lxc_conf *conf, bool closeall, - int *fds_to_ignore, size_t len_fds); --static inline int inherit_fds(struct lxc_handler *handler, bool closeall) --{ -- return lxc_check_inherited(handler->conf, closeall, handler->keep_fds, -- ARRAY_SIZE(handler->keep_fds)); --} -+#ifdef HAVE_ISULAD -+extern int __lxc_start(struct lxc_handler *handler, -+ struct lxc_operations* ops, void *data, const char *lxcpath, -+ bool daemonize, int *error_num, unsigned int start_timeout); -+#else - extern int __lxc_start(struct lxc_handler *, struct lxc_operations *, void *, - const char *, bool, int *); -+#endif - - extern int resolve_clone_flags(struct lxc_handler *handler); - -+#ifdef HAVE_ISULAD -+/*isulad: do_lxcapi_clean_resource */ -+extern int do_lxcapi_clean_resource(char *name, char *lxcpath, struct lxc_conf *conf, pid_t pid); -+ -+/*isulad: do_lxcapi_get_pids */ -+extern int do_lxcapi_get_pids(char *name, char *lxcpath, struct lxc_conf *conf, pid_t **pids,size_t *pids_len); -+#endif -+ - #endif -diff --git a/src/lxc/storage/block.c b/src/lxc/storage/block.c -new file mode 100644 -index 000000000..eb75e7065 ---- /dev/null -+++ b/src/lxc/storage/block.c -@@ -0,0 +1,86 @@ -+/* -+ * lxc: linux Container library -+ * -+ * (C) Copyright IBM Corp. 2007, 2008 -+ * -+ * Authors: -+ * Daniel Lezcano -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#ifndef _GNU_SOURCE -+#define _GNU_SOURCE 1 -+#endif -+#include -+#include -+ -+#include "config.h" -+#include "log.h" -+#include "storage.h" -+#include "storage_utils.h" -+#include "utils.h" -+ -+lxc_log_define(blk, lxc); -+ -+int blk_destroy(struct lxc_storage *orig) -+{ -+ return 0; -+} -+ -+bool blk_detect(const char *path) -+{ -+ struct stat statbuf; -+ int ret; -+ -+ if (!strncmp(path, "blk:", 4)) -+ return true; -+ -+ ret = stat(path, &statbuf); -+ if (ret == -1 && errno == EPERM) { -+ SYSERROR("blk_detect: failed to look at \"%s\"", path); -+ return false; -+ } -+ -+ if (ret == 0 && S_ISBLK(statbuf.st_mode)) -+ return true; -+ -+ return false; -+} -+ -+int blk_mount(struct lxc_storage *bdev) -+{ -+ const char *src; -+ if (strcmp(bdev->type, "blk")) -+ return -22; -+ -+ if (!bdev->src || !bdev->dest) -+ return -22; -+ -+ src = lxc_storage_get_path(bdev->src, bdev->type); -+ -+ return mount_unknown_fs(src, bdev->dest, bdev->mntopts); -+} -+ -+int blk_umount(struct lxc_storage *bdev) -+{ -+ if (strcmp(bdev->type, "blk")) -+ return -22; -+ -+ if (!bdev->src || !bdev->dest) -+ return -22; -+ -+ return umount(bdev->dest); -+} -diff --git a/src/lxc/storage/block.h b/src/lxc/storage/block.h -new file mode 100644 -index 000000000..2fa7565fb ---- /dev/null -+++ b/src/lxc/storage/block.h -@@ -0,0 +1,41 @@ -+/* -+ * lxc: linux Container library -+ * -+ * (C) Copyright IBM Corp. 2007, 2008 -+ * -+ * Authors: -+ * Daniel Lezcano -+ * -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#ifndef __LXC_BLK_H -+#define __LXC_BLK_H -+ -+#include -+#include -+ -+struct lxc_storage; -+ -+struct bdev_specs; -+ -+struct lxc_conf; -+ -+extern int blk_destroy(struct lxc_storage *orig); -+extern bool blk_detect(const char *path); -+extern int blk_mount(struct lxc_storage *bdev); -+extern int blk_umount(struct lxc_storage *bdev); -+ -+#endif /* __LXC_BLK_H */ -diff --git a/src/lxc/storage/btrfs.c b/src/lxc/storage/btrfs.c -index 92a4a6def..069a9dd84 100644 ---- a/src/lxc/storage/btrfs.c -+++ b/src/lxc/storage/btrfs.c -@@ -197,16 +197,27 @@ int btrfs_mount(struct lxc_storage *bdev) - const char *src; - int ret; - -+#ifdef HAVE_ISULAD -+ unsigned long pflags = 0; -+#endif -+ - if (strcmp(bdev->type, "btrfs")) - return -22; - - if (!bdev->src || !bdev->dest) - return -22; - -+#ifdef HAVE_ISULAD -+ if (parse_mntopts(bdev->mntopts, &mntflags, &pflags, &mntdata) < 0) { -+ free(mntdata); -+ return -22; -+ } -+#else - if (parse_mntopts(bdev->mntopts, &mntflags, &mntdata) < 0) { - free(mntdata); - return -22; - } -+#endif - - src = lxc_storage_get_path(bdev->src, "btrfs"); - -diff --git a/src/lxc/storage/dir.c b/src/lxc/storage/dir.c -index 18a10a42f..485572a0b 100644 ---- a/src/lxc/storage/dir.c -+++ b/src/lxc/storage/dir.c -@@ -94,6 +94,9 @@ int dir_create(struct lxc_storage *bdev, const char *dest, const char *n, - - int dir_destroy(struct lxc_storage *orig) - { -+#ifdef HAVE_ISULAD -+ // isulad: do not destroy rootfs for directory, it should be managed by caller -+#else - int ret; - const char *src; - -@@ -102,6 +105,7 @@ int dir_destroy(struct lxc_storage *orig) - ret = lxc_rmdir_onedev(src, NULL); - if (ret < 0) - return log_error_errno(ret, errno, "Failed to delete \"%s\"", src); -+#endif - - return 0; - } -@@ -124,6 +128,35 @@ bool dir_detect(const char *path) - return false; - } - -+#ifdef HAVE_ISULAD -+int dir_mount(struct lxc_storage *bdev) -+{ -+ __do_free char *mntdata = NULL; -+ unsigned long mntflags = 0, pflags = 0; -+ int ret; -+ const char *src; -+ -+ if (strcmp(bdev->type, "dir")) -+ return -22; -+ -+ if (!bdev->src || !bdev->dest) -+ return -22; -+ -+ ret = parse_mntopts(bdev->mntopts, &mntflags, &pflags, &mntdata); -+ if (ret < 0) -+ return log_error_errno(ret, errno, "Failed to parse mount options \"%s\"", bdev->mntopts); -+ -+ src = lxc_storage_get_path(bdev->src, bdev->type); -+ -+ ret = mount(src, bdev->dest, "bind", MS_BIND | MS_REC | (mntflags & ~MS_RDONLY) | pflags, mntdata); -+ if (ret < 0) { -+ return log_error_errno(-errno, errno, "Failed to mount \"%s\" on \"%s\"", src, bdev->dest); -+ } -+ TRACE("Mounted \"%s\" on \"%s\"", src, bdev->dest); -+ -+ return 0; -+} -+#else - int dir_mount(struct lxc_storage *bdev) - { - __do_free char *mntdata = NULL; -@@ -161,11 +194,12 @@ int dir_mount(struct lxc_storage *bdev) - DEBUG("Remounted \"%s\" on \"%s\" read-only with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"", - src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags); - } -- - TRACE("Mounted \"%s\" on \"%s\" with options \"%s\", mount flags \"%lu\", and propagation flags \"%lu\"", - src ? src : "(none)", bdev->dest ? bdev->dest : "(none)", mntdata, mflags, pflags); -+ - return 0; - } -+#endif - - int dir_umount(struct lxc_storage *bdev) - { -diff --git a/src/lxc/storage/loop.c b/src/lxc/storage/loop.c -index eebc1b67c..345be503b 100644 ---- a/src/lxc/storage/loop.c -+++ b/src/lxc/storage/loop.c -@@ -21,6 +21,7 @@ - #include "memory_utils.h" - #include "storage.h" - #include "storage_utils.h" -+#include "lxclock.h" - #include "utils.h" - - lxc_log_define(loop, lxc); -@@ -216,9 +217,11 @@ bool loop_detect(const char *path) - - int loop_mount(struct lxc_storage *bdev) - { -- int ret, loopfd; -+ int ret = 0; -+ int loopfd, lret; - char loname[PATH_MAX]; - const char *src; -+ struct lxc_lock *l = NULL; - - if (strcmp(bdev->type, "loop")) - return -22; -@@ -226,13 +229,29 @@ int loop_mount(struct lxc_storage *bdev) - if (!bdev->src || !bdev->dest) - return -22; - -+ /* isulad: do lock before mount, so we can avoid use loop which is used by -+ * other starting contianers */ -+ l = lxc_newlock("mount_lock", "mount_lock"); -+ if (!l) { -+ SYSERROR("create file lock error when mount fs"); -+ return -1; -+ } -+ -+ lret = lxclock(l, 0); -+ if (lret) { -+ SYSERROR("try to lock failed when mount fs"); -+ ret = -1; -+ goto out; -+ } -+ - /* skip prefix */ - src = lxc_storage_get_path(bdev->src, bdev->type); - - loopfd = lxc_prepare_loop_dev(src, loname, LO_FLAGS_AUTOCLEAR); - if (loopfd < 0) { - ERROR("Failed to prepare loop device for loop file \"%s\"", src); -- return -1; -+ ret = -1; -+ goto out; - } - DEBUG("Prepared loop device \"%s\"", loname); - -@@ -241,14 +260,21 @@ int loop_mount(struct lxc_storage *bdev) - ERROR("Failed to mount rootfs \"%s\" on \"%s\" via loop device \"%s\"", - bdev->src, bdev->dest, loname); - close(loopfd); -- return -1; -+ ret = -1; -+ goto out; - } - - bdev->lofd = loopfd; - DEBUG("Mounted rootfs \"%s\" on \"%s\" via loop device \"%s\"", - bdev->src, bdev->dest, loname); -- -- return 0; -+out: -+ lret = lxcunlock(l); -+ if (lret) { -+ SYSERROR("try to unlock failed when mount fs"); -+ ret = -1; -+ } -+ lxc_putlock(l); -+ return ret; - } - - int loop_umount(struct lxc_storage *bdev) -diff --git a/src/lxc/storage/overlay.c b/src/lxc/storage/overlay.c -index 770785cfd..75a81de15 100644 ---- a/src/lxc/storage/overlay.c -+++ b/src/lxc/storage/overlay.c -@@ -349,6 +349,9 @@ int ovl_mount(struct lxc_storage *bdev) - char *work, *lastslash; - size_t len, len2; - int ret, ret2; -+#ifdef HAVE_ISULAD -+ unsigned long pflags = 0; -+#endif - - if (strcmp(bdev->type, "overlay") && strcmp(bdev->type, "overlayfs")) - return -22; -@@ -414,7 +417,12 @@ int ovl_mount(struct lxc_storage *bdev) - work = must_make_path(upper, LXC_OVERLAY_WORK_DIR, NULL); - upper[lastslash - upper] = '/'; - -+#ifdef HAVE_ISULAD -+ ret = parse_mntopts(bdev->mntopts, &mntflags, &pflags, &mntdata); -+#else - ret = parse_mntopts(bdev->mntopts, &mntflags, &mntdata); -+#endif -+ - if (ret < 0) { - ERROR("Failed to parse mount options"); - free(mntdata); -diff --git a/src/lxc/storage/rsync.c b/src/lxc/storage/rsync.c -index 2e4df2537..97678dea2 100644 ---- a/src/lxc/storage/rsync.c -+++ b/src/lxc/storage/rsync.c -@@ -78,8 +78,12 @@ int lxc_rsync(struct rsync_data *data) - return -1; - } - -- if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) -- SYSERROR("Failed to recursively turn root mount tree into dependent mount"); -+ ret = detect_shared_rootfs(); -+ if (ret) { -+ ret = mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL); -+ if (ret < 0) -+ SYSERROR("Failed to make \"/\" a slave mount"); -+ } - - ret = orig->ops->mount(orig); - if (ret < 0) { -diff --git a/src/lxc/storage/storage.c b/src/lxc/storage/storage.c -index 3f1b713f6..5291b244b 100644 ---- a/src/lxc/storage/storage.c -+++ b/src/lxc/storage/storage.c -@@ -41,6 +41,7 @@ - #include "storage_utils.h" - #include "utils.h" - #include "zfs.h" -+#include "block.h" - - #ifndef HAVE_STRLCPY - #include "include/strlcpy.h" -@@ -94,6 +95,22 @@ static const struct lxc_storage_ops loop_ops = { - .can_backup = true, - }; - -+#ifdef HAVE_ISULAD -+/* block */ -+static const struct lxc_storage_ops blk_ops = { -+ .detect = &blk_detect, -+ .mount = &blk_mount, -+ .umount = &blk_umount, -+ .clone_paths = NULL, -+ .destroy = &blk_destroy, -+ .create = NULL, -+ .copy = NULL, -+ .snapshot = NULL, -+ .can_snapshot = false, -+ .can_backup = true, -+}; -+#endif -+ - /* lvm */ - static const struct lxc_storage_ops lvm_ops = { - .detect = &lvm_detect, -@@ -179,6 +196,10 @@ static const struct lxc_storage_type bdevs[] = { - { .name = "overlayfs", .ops = &ovl_ops, }, - { .name = "loop", .ops = &loop_ops, }, - { .name = "nbd", .ops = &nbd_ops, }, -+#ifdef HAVE_ISULAD -+ //isulad: block device -+ { .name = "blk", .ops = &blk_ops, } -+#endif - }; - - static const size_t numbdevs = sizeof(bdevs) / sizeof(struct lxc_storage_type); -@@ -570,9 +591,15 @@ bool storage_destroy(struct lxc_conf *conf) - int destroy_rv = 0; - - r = storage_init(conf); -+#ifdef HAVE_ISULAD -+ if (r == NULL) { -+ WARN("%s 's storage init failed, the storage may be deleted already", conf->name); -+ return true; -+ } -+#else - if (!r) - return ret; -- -+#endif - destroy_rv = r->ops->destroy(r); - if (destroy_rv == 0) - ret = true; -diff --git a/src/lxc/storage/storage_utils.c b/src/lxc/storage/storage_utils.c -index f96bd520b..6fec638ea 100644 ---- a/src/lxc/storage/storage_utils.c -+++ b/src/lxc/storage/storage_utils.c -@@ -165,8 +165,11 @@ int detect_fs(struct lxc_storage *bdev, char *type, int len) - if (unshare(CLONE_NEWNS) < 0) - _exit(EXIT_FAILURE); - -- if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) -- SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing..."); -+ if (detect_shared_rootfs()) -+ if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) { -+ SYSERROR("Failed to make / rslave"); -+ ERROR("Continuing..."); -+ } - - ret = mount_unknown_fs(srcdev, bdev->dest, bdev->mntopts); - if (ret < 0) { -@@ -256,10 +259,14 @@ int is_blktype(struct lxc_storage *b) - return 0; - } - -+// isulad: recored error -+static char **mount_errors = NULL; -+ - int mount_unknown_fs(const char *rootfs, const char *target, - const char *options) - { - size_t i; -+ char *errs = NULL; - int ret; - struct cbarg { - const char *rootfs; -@@ -288,15 +295,30 @@ int mount_unknown_fs(const char *rootfs, const char *target, - ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg); - if (ret < 0) { - ERROR("Failed to parse \"%s\"", fsfile[i]); -+ lxc_free_array((void**)mount_errors, free); -+ mount_errors = NULL; - return -1; - } - -- if (ret) -+ if (ret) { -+ lxc_free_array((void**)mount_errors, free); -+ mount_errors = NULL; - return 0; -+ } - } - -- ERROR("Failed to determine FSType for \"%s\"", rootfs); -+ if (mount_errors != NULL) { -+ errs = lxc_string_join("\n", (const char **)mount_errors, false); -+ if (errs == NULL) { -+ ERROR("failed to join mount errors"); -+ } -+ } - -+ ERROR("Failed to determine FSType for \"%s\": %s", rootfs, errs ? errs : "unknown reason"); -+ -+ free(errs); -+ lxc_free_array((void**)mount_errors, free); -+ mount_errors = NULL; - return -1; - } - -@@ -315,6 +337,12 @@ int find_fstype_cb(char *buffer, void *data) - unsigned long mntflags = 0; - char *mntdata = NULL; - char *fstype; -+ char mount_err[BUFSIZ] = {0}; -+ int ret; -+ -+#ifdef HAVE_ISULAD -+ unsigned long pflags = 0; -+#endif - - /* we don't try 'nodev' entries */ - if (strstr(buffer, "nodev")) -@@ -327,14 +355,34 @@ int find_fstype_cb(char *buffer, void *data) - DEBUG("Trying to mount \"%s\"->\"%s\" with FSType \"%s\"", cbarg->rootfs, - cbarg->target, fstype); - -+#ifdef HAVE_ISULAD -+ if (parse_mntopts(cbarg->options, &mntflags, &pflags, &mntdata) < 0) { -+ free(mntdata); -+ return 0; -+ } -+ -+ if (mount(cbarg->rootfs, cbarg->target, fstype, (mntflags & ~MS_RDONLY), mntdata)) { -+#else - if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) { - free(mntdata); - return 0; - } - - if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) { -+#endif - SYSDEBUG("Failed to mount"); - free(mntdata); -+ // isulad: recored error -+ ret = snprintf(mount_err, BUFSIZ, "\t\tmount %s onto %s with FSType %s failed: %s", -+ cbarg->rootfs, cbarg->target, fstype, strerror(errno)); -+ if (ret < 0 || (size_t)ret >= BUFSIZ) { -+ ERROR("failed to format output mount error"); -+ return 0; -+ } -+ -+ if (lxc_append_string(&mount_errors, mount_err) < 0) { -+ ERROR("failed to append mount error"); -+ } - return 0; - } - -diff --git a/src/lxc/storage/zfs.c b/src/lxc/storage/zfs.c -index ee9e32d0a..025cf956f 100644 ---- a/src/lxc/storage/zfs.c -+++ b/src/lxc/storage/zfs.c -@@ -159,23 +159,33 @@ bool zfs_detect(const char *path) - - int zfs_mount(struct lxc_storage *bdev) - { -- __do_free char *mntdata = NULL; - unsigned long mntflags = 0; -+ char *mntdata = NULL; - int ret; - size_t oldlen, newlen, totallen; - char *tmp; - const char *src; - char cmd_output[PATH_MAX] = {0}; - -+#ifdef HAVE_ISULAD -+ unsigned long pflags = 0; -+#endif -+ - if (strcmp(bdev->type, "zfs")) - return -22; - - if (!bdev->src || !bdev->dest) - return -22; - -+#ifdef HAVE_ISULAD -+ ret = parse_mntopts(bdev->mntopts, &mntflags, &pflags, &mntdata); -+#else - ret = parse_mntopts(bdev->mntopts, &mntflags, &mntdata); -+#endif -+ - if (ret < 0) { - ERROR("Failed to parse mount options"); -+ free(mntdata); - return -22; - } - -@@ -220,6 +230,7 @@ int zfs_mount(struct lxc_storage *bdev) - tmp = realloc(mntdata, totallen); - if (!tmp) { - ERROR("Failed to reallocate memory"); -+ free(mntdata); - return -1; - } - mntdata = tmp; -@@ -227,10 +238,12 @@ int zfs_mount(struct lxc_storage *bdev) - ret = snprintf((mntdata + oldlen), newlen, ",zfsutil,mntpoint=%s", src); - if (ret < 0 || (size_t)ret >= newlen) { - ERROR("Failed to create string"); -+ free(mntdata); - return -1; - } - - ret = mount(src, bdev->dest, "zfs", mntflags, mntdata); -+ free(mntdata); - if (ret < 0 && errno != EBUSY) { - SYSERROR("Failed to mount \"%s\" on \"%s\"", src, bdev->dest); - return -1; -diff --git a/src/lxc/string_utils.c b/src/lxc/string_utils.c -index dcb1160e4..9118add02 100644 ---- a/src/lxc/string_utils.c -+++ b/src/lxc/string_utils.c -@@ -501,6 +501,7 @@ int lxc_grow_array(void ***array, size_t *capacity, size_t new_size, size_t capa - /* first time around, catch some trivial mistakes of the user - * only initializing one of these */ - if (!*array || !*capacity) { -+ free(*array); - *array = NULL; - *capacity = 0; - } -diff --git a/src/lxc/sync.h b/src/lxc/sync.h -index ff7a1eb18..56c1dfcfd 100644 ---- a/src/lxc/sync.h -+++ b/src/lxc/sync.h -@@ -11,6 +11,10 @@ enum { - LXC_SYNC_POST_CONFIGURE, - LXC_SYNC_CGROUP, - LXC_SYNC_CGROUP_UNSHARE, -+#ifdef HAVE_ISULAD -+ LXC_SYNC_OCI_PRESTART_HOOK, -+ LXC_SYNC_POST_OCI_PRESTART_HOOK, -+#endif - LXC_SYNC_CGROUP_LIMITS, - LXC_SYNC_READY_START, - LXC_SYNC_RESTART, -diff --git a/src/lxc/syscall_numbers.h b/src/lxc/syscall_numbers.h -index bfd0e57ab..42609d43f 100644 ---- a/src/lxc/syscall_numbers.h -+++ b/src/lxc/syscall_numbers.h -@@ -35,12 +35,10 @@ - #define __NR_keyctl 280 - #elif defined __powerpc__ - #define __NR_keyctl 271 -- #elif defined __riscv -- #define __NR_keyctl 219 - #elif defined __sparc__ - #define __NR_keyctl 283 - #elif defined __ia64__ -- #define __NR_keyctl (249 + 1024) -+ #define __NR_keyctl 249 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_keyctl 4282 -@@ -70,8 +68,6 @@ - #define __NR_memfd_create 350 - #elif defined __powerpc__ - #define __NR_memfd_create 360 -- #elif defined __riscv -- #define __NR_memfd_create 279 - #elif defined __sparc__ - #define __NR_memfd_create 348 - #elif defined __blackfin__ -@@ -107,12 +103,10 @@ - #define __NR_pivot_root 217 - #elif defined __powerpc__ - #define __NR_pivot_root 203 -- #elif defined __riscv -- #define __NR_pivot_root 41 - #elif defined __sparc__ - #define __NR_pivot_root 146 - #elif defined __ia64__ -- #define __NR_pivot_root (183 + 1024) -+ #define __NR_pivot_root 183 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_pivot_root 4216 -@@ -142,12 +136,10 @@ - #define __NR_setns 339 - #elif defined __powerpc__ - #define __NR_setns 350 -- #elif defined __riscv -- #define __NR_setns 268 - #elif defined __sparc__ - #define __NR_setns 337 - #elif defined __ia64__ -- #define __NR_setns (306 + 1024) -+ #define __NR_setns 306 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_setns 4344 -@@ -177,12 +169,10 @@ - #define __NR_sethostname 74 - #elif defined __powerpc__ - #define __NR_sethostname 74 -- #elif defined __riscv -- #define __NR_sethostname 161 - #elif defined __sparc__ - #define __NR_sethostname 88 - #elif defined __ia64__ -- #define __NR_sethostname (59 + 1024) -+ #define __NR_sethostname 59 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_sethostname 474 -@@ -212,12 +202,10 @@ - #define __NR_signalfd 316 - #elif defined __powerpc__ - #define __NR_signalfd 305 -- #elif defined __riscv -- #define __NR_signalfd 74 - #elif defined __sparc__ - #define __NR_signalfd 311 - #elif defined __ia64__ -- #define __NR_signalfd (283 + 1024) -+ #define __NR_signalfd 283 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_signalfd 4317 -@@ -247,12 +235,10 @@ - #define __NR_signalfd4 322 - #elif defined __powerpc__ - #define __NR_signalfd4 313 -- #elif defined __riscv -- #define __NR_signalfd4 74 - #elif defined __sparc__ - #define __NR_signalfd4 317 - #elif defined __ia64__ -- #define __NR_signalfd4 (289 + 1024) -+ #define __NR_signalfd4 289 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_signalfd4 4324 -@@ -282,12 +268,10 @@ - #define __NR_unshare 303 - #elif defined __powerpc__ - #define __NR_unshare 282 -- #elif defined __riscv -- #define __NR_unshare 97 - #elif defined __sparc__ - #define __NR_unshare 299 - #elif defined __ia64__ -- #define __NR_unshare (272 + 1024) -+ #define __NR_unshare 272 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_unshare 4303 -@@ -317,12 +301,10 @@ - #define __NR_bpf 351 - #elif defined __powerpc__ - #define __NR_bpf 361 -- #elif defined __riscv -- #define __NR_bpf 280 - #elif defined __sparc__ - #define __NR_bpf 349 - #elif defined __ia64__ -- #define __NR_bpf (317 + 1024) -+ #define __NR_bpf 317 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_bpf 4355 -@@ -352,12 +334,10 @@ - #define __NR_faccessat 300 - #elif defined __powerpc__ - #define __NR_faccessat 298 -- #elif defined __riscv -- #define __NR_faccessat 48 - #elif defined __sparc__ - #define __NR_faccessat 296 - #elif defined __ia64__ -- #define __NR_faccessat (269 + 1024) -+ #define __NR_faccessat 269 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_faccessat 4300 -@@ -387,8 +367,6 @@ - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_pidfd_send_signal 5424 - #endif -- #elif defined __ia64__ -- #define __NR_pidfd_send_signal (424 + 1024) - #else - #define __NR_pidfd_send_signal 424 - #endif -@@ -407,12 +385,10 @@ - #define __NR_seccomp 348 - #elif defined __powerpc__ - #define __NR_seccomp 358 -- #elif defined __riscv -- #define __NR_seccomp 277 - #elif defined __sparc__ - #define __NR_seccomp 346 - #elif defined __ia64__ -- #define __NR_seccomp (329 + 1024) -+ #define __NR_seccomp 329 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_seccomp 4352 -@@ -442,12 +418,10 @@ - #define __NR_gettid 236 - #elif defined __powerpc__ - #define __NR_gettid 207 -- #elif defined __riscv -- #define __NR_gettid 178 - #elif defined __sparc__ - #define __NR_gettid 143 - #elif defined __ia64__ -- #define __NR_gettid (81 + 1024) -+ #define __NR_gettid 81 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_gettid 4222 -@@ -481,12 +455,10 @@ - #define __NR_execveat 354 - #elif defined __powerpc__ - #define __NR_execveat 362 -- #elif defined __riscv -- #define __NR_execveat 281 - #elif defined __sparc__ - #define __NR_execveat 350 - #elif defined __ia64__ -- #define __NR_execveat (318 + 1024) -+ #define __NR_execveat 318 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_execveat 4356 -@@ -503,64 +475,4 @@ - #endif - #endif - --#ifndef __NR_move_mount -- #if defined __alpha__ -- #define __NR_move_mount 539 -- #elif defined _MIPS_SIM -- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ -- #define __NR_move_mount 4429 -- #endif -- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ -- #define __NR_move_mount 6429 -- #endif -- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ -- #define __NR_move_mount 5429 -- #endif -- #elif defined __ia64__ -- #define __NR_move_mount (428 + 1024) -- #else -- #define __NR_move_mount 429 -- #endif --#endif -- --#ifndef __NR_open_tree -- #if defined __alpha__ -- #define __NR_open_tree 538 -- #elif defined _MIPS_SIM -- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ -- #define __NR_open_tree 4428 -- #endif -- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ -- #define __NR_open_tree 6428 -- #endif -- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ -- #define __NR_open_tree 5428 -- #endif -- #elif defined __ia64__ -- #define __NR_open_tree (428 + 1024) -- #else -- #define __NR_open_tree 428 -- #endif --#endif -- --#ifndef __NR_clone3 -- #if defined __alpha__ -- #define __NR_clone3 545 -- #elif defined _MIPS_SIM -- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ -- #define __NR_clone3 4435 -- #endif -- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ -- #define __NR_clone3 6435 -- #endif -- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ -- #define __NR_clone3 5435 -- #endif -- #elif defined __ia64__ -- #define __NR_clone3 (435 + 1024) -- #else -- #define __NR_clone3 435 -- #endif --#endif -- - #endif /* __LXC_SYSCALL_NUMBERS_H */ -diff --git a/src/lxc/syscall_wrappers.h b/src/lxc/syscall_wrappers.h -index 041daf357..1cef21585 100644 ---- a/src/lxc/syscall_wrappers.h -+++ b/src/lxc/syscall_wrappers.h -@@ -137,28 +137,4 @@ static int faccessat(int __fd, const char *__file, int __type, int __flag) - } - #endif - --#ifndef HAVE_MOVE_MOUNT --static inline int move_mount_lxc(int from_dfd, const char *from_pathname, -- int to_dfd, const char *to_pathname, -- unsigned int flags) --{ -- return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, -- to_pathname, flags); --} --#define move_mount move_mount_lxc --#else --extern int move_mount(int from_dfd, const char *from_pathname, int to_dfd, -- const char *to_pathname, unsigned int flags); --#endif -- --#ifndef HAVE_OPEN_TREE --static inline int open_tree_lxc(int dfd, const char *filename, unsigned int flags) --{ -- return syscall(__NR_open_tree, dfd, filename, flags); --} --#define open_tree open_tree_lxc --#else --extern int open_tree(int dfd, const char *filename, unsigned int flags); --#endif -- - #endif /* __LXC_SYSCALL_WRAPPER_H */ -diff --git a/src/lxc/terminal.c b/src/lxc/terminal.c -index e58db5c46..7441de791 100644 ---- a/src/lxc/terminal.c -+++ b/src/lxc/terminal.c -@@ -28,6 +28,10 @@ - #include "syscall_wrappers.h" - #include "terminal.h" - #include "utils.h" -+#ifdef HAVE_ISULAD -+#include "logger_json_file.h" -+#include "include/strlcpy.h" -+#endif - - #if HAVE_PTY_H - #include -@@ -65,7 +69,7 @@ void lxc_terminal_winsz(int srcfd, int dstfd) - - static void lxc_terminal_winch(struct lxc_terminal_state *ts) - { -- lxc_terminal_winsz(ts->stdinfd, ts->ptmxfd); -+ lxc_terminal_winsz(ts->stdinfd, ts->masterfd); - } - - int lxc_terminal_signalfd_cb(int fd, uint32_t events, void *cbdata, -@@ -105,7 +109,7 @@ struct lxc_terminal_state *lxc_terminal_signal_init(int srcfd, int dstfd) - - memset(ts, 0, sizeof(*ts)); - ts->stdinfd = srcfd; -- ts->ptmxfd = dstfd; -+ ts->masterfd = dstfd; - ts->sigfd = -1; - - ret = sigemptyset(&mask); -@@ -183,6 +187,69 @@ static int lxc_terminal_truncate_log_file(struct lxc_terminal *terminal) - return lxc_unpriv(ftruncate(terminal->log_fd, 0)); - } - -+#ifdef HAVE_ISULAD -+ -+int lxc_set_terminal_winsz(struct lxc_terminal *terminal, unsigned int height, unsigned int width) -+{ -+ int ret = 0; -+ struct winsize wsz; -+ -+ if (terminal->master < 0) { -+ return 0; -+ } -+ -+ ret = ioctl(terminal->master, TIOCGWINSZ, &wsz); -+ if (ret < 0) { -+ WARN("Failed to get window size"); -+ return -1; -+ } -+ wsz.ws_col = width; -+ wsz.ws_row = height; -+ -+ ret = ioctl(terminal->master, TIOCSWINSZ, &wsz); -+ if (ret < 0) -+ WARN("Failed to set window size"); -+ else -+ DEBUG("Set window size to %d columns and %d rows", wsz.ws_col, -+ wsz.ws_row); -+ return ret; -+} -+ -+/* -+ * isulad: support mult-logfiles -+ * */ -+static int lxc_terminal_rename_old_log_file(struct lxc_terminal *terminal) -+{ -+ int ret; -+ unsigned int i; -+ char tmp[PATH_MAX] = {0}; -+ char *rename_fname = NULL; -+ -+ for (i = terminal->log_rotate - 1; i > 1; i--) { -+ ret = snprintf(tmp, PATH_MAX, "%s.%u", terminal->log_path, i); -+ if (ret < 0 || ret >= PATH_MAX) { -+ free(rename_fname); -+ return -EFBIG; -+ } -+ free(rename_fname); -+ rename_fname = safe_strdup(tmp); -+ ret = snprintf(tmp, PATH_MAX, "%s.%u", terminal->log_path, (i - 1)); -+ if (ret < 0 || ret >= PATH_MAX) { -+ free(rename_fname); -+ return -EFBIG; -+ } -+ ret = lxc_unpriv(rename(tmp, rename_fname)); -+ if (ret < 0 && errno != ENOENT) { -+ free(rename_fname); -+ return ret; -+ } -+ } -+ -+ free(rename_fname); -+ return 0; -+} -+#endif -+ - static int lxc_terminal_rotate_log_file(struct lxc_terminal *terminal) - { - __do_free char *tmp = NULL; -@@ -196,6 +263,15 @@ static int lxc_terminal_rotate_log_file(struct lxc_terminal *terminal) - if (terminal->log_fd < 0) - return -EBADF; - -+#ifdef HAVE_ISULAD -+ /* isuald: rotate old log file first */ -+ ret = lxc_terminal_rename_old_log_file(terminal); -+ if(ret != 0) { -+ ERROR("Rename old log file failed"); -+ return ret; -+ } -+#endif -+ - len = strlen(terminal->log_path) + sizeof(".1"); - tmp = must_realloc(NULL, len); - -@@ -212,6 +288,7 @@ static int lxc_terminal_rotate_log_file(struct lxc_terminal *terminal) - return lxc_terminal_create_log_file(terminal); - } - -+#ifndef HAVE_ISULAD - static int lxc_terminal_write_log_file(struct lxc_terminal *terminal, char *buf, - int bytes_read) - { -@@ -317,7 +394,456 @@ static int lxc_terminal_write_log_file(struct lxc_terminal *terminal, char *buf, - bytes_read -= ret; - return bytes_read; - } -+#endif -+ -+#ifdef HAVE_ISULAD -+/* get time buffer */ -+static bool get_time_buffer(struct timespec *timestamp, char *timebuffer, -+ size_t maxsize) -+{ -+ struct tm tm_utc = { 0 }; -+ int32_t nanos = 0; -+ time_t seconds; -+ size_t len = 0; -+ int ret = 0; -+ -+ if (!timebuffer || !maxsize) { -+ return false; -+ } -+ -+ seconds = (time_t)timestamp->tv_sec; -+ gmtime_r(&seconds, &tm_utc); -+ strftime(timebuffer, maxsize, "%Y-%m-%dT%H:%M:%S", &tm_utc); -+ -+ nanos = (int32_t)timestamp->tv_nsec; -+ len = strlen(timebuffer); -+ ret = snprintf(timebuffer + len, (maxsize - len), ".%09dZ", nanos); -+ if (ret < 0 || ret >= (maxsize - len)) { -+ return false; -+ } -+ -+ return true; -+} -+ -+/* get now time buffer */ -+static bool get_now_time_buffer(char *timebuffer, size_t maxsize) -+{ -+ int err = 0; -+ struct timespec ts; -+ -+ err = clock_gettime(CLOCK_REALTIME, &ts); -+ if (err != 0) { -+ ERROR("failed to get time"); -+ return false; -+ } -+ -+ return get_time_buffer(&ts, timebuffer, maxsize); -+} -+ -+static int isulad_lxc_terminal_rotate_write_data(struct lxc_terminal *terminal, const char *buf, -+ int bytes_read) -+{ -+ int ret; -+ struct stat st; -+ int64_t space_left = -1; -+ -+ if (terminal->log_fd < 0) -+ return 0; -+ -+ /* A log size <= 0 means that there's no limit on the size of the log -+ * file at which point we simply ignore whether the log is supposed to -+ * be rotated or not. -+ */ -+ if (terminal->log_size <= 0) -+ return lxc_write_nointr(terminal->log_fd, buf, bytes_read); -+ -+ /* Get current size of the log file. */ -+ ret = fstat(terminal->log_fd, &st); -+ if (ret < 0) { -+ SYSERROR("Failed to stat the terminal log file descriptor"); -+ return -1; -+ } -+ -+ /* handle non-regular files */ -+ if ((st.st_mode & S_IFMT) != S_IFREG) { -+ /* This isn't a regular file. so rotating the file seems a -+ * dangerous thing to do, size limits are also very -+ * questionable. Let's not risk anything and tell the user that -+ * he's requesting us to do weird stuff. -+ */ -+ if (terminal->log_rotate > 0 || terminal->log_size > 0) -+ return -EINVAL; -+ -+ /* I mean, sure log wherever you want to. */ -+ return lxc_write_nointr(terminal->log_fd, buf, bytes_read); -+ } -+ -+ space_left = terminal->log_size - st.st_size; -+ -+ /* User doesn't want to rotate the log file and there's no more space -+ * left so simply truncate it. -+ */ -+ if (space_left <= 0 && terminal->log_rotate <= 0) { -+ ret = lxc_terminal_truncate_log_file(terminal); -+ if (ret < 0) -+ return ret; -+ -+ if (bytes_read <= terminal->log_size) -+ return lxc_write_nointr(terminal->log_fd, buf, bytes_read); -+ -+ /* Write as much as we can into the buffer and loose the rest. */ -+ return lxc_write_nointr(terminal->log_fd, buf, terminal->log_size); -+ } -+ -+ /* There's enough space left. */ -+ if (bytes_read <= space_left) -+ return lxc_write_nointr(terminal->log_fd, buf, bytes_read); -+ -+ /* There'd be more to write but we aren't instructed to rotate the log -+ * file so simply return. There's no error on our side here. -+ */ -+ if (terminal->log_rotate > 0) -+ ret = lxc_terminal_rotate_log_file(terminal); -+ else -+ ret = lxc_terminal_truncate_log_file(terminal); -+ if (ret < 0) -+ return ret; -+ -+ if (terminal->log_size < bytes_read) { -+ /* Well, this is unfortunate because it means that there is more -+ * to write than the user has granted us space. There are -+ * multiple ways to handle this but let's use the simplest one: -+ * write as much as we can, tell the user that there was more -+ * stuff to write and move on. -+ * Note that this scenario shouldn't actually happen with the -+ * standard pty-based terminal that LXC allocates since it will -+ * be switched into raw mode. In raw mode only 1 byte at a time -+ * should be read and written. -+ */ -+ WARN("Size of terminal log file is smaller than the bytes to write"); -+ ret = lxc_write_nointr(terminal->log_fd, buf, terminal->log_size); -+ if (ret < 0) -+ return -1; -+ bytes_read -= ret; -+ return bytes_read; -+ } -+ -+ /* Yay, we made it. */ -+ ret = lxc_write_nointr(terminal->log_fd, buf, bytes_read); -+ if (ret < 0) -+ return -1; -+ bytes_read -= ret; -+ return bytes_read; -+} -+ -+static ssize_t isulad_logger_json_write(struct lxc_terminal *terminal, const char *type, const char *buf, -+ int bytes_read) -+{ -+ logger_json_file *msg = NULL; -+ ssize_t ret = -1; -+ size_t len; -+ char *json = NULL; -+ char timebuffer[64] = { 0 }; -+ parser_error err = NULL; -+ struct parser_context ctx = { GEN_OPTIONS_SIMPLIFY | GEN_OPTIONS_NOT_VALIDATE_UTF8, stderr }; -+ -+ if (bytes_read < 0 || bytes_read >= INT_MAX) { -+ return -1; -+ } -+ msg = calloc(sizeof(logger_json_file), 1); -+ if (msg == NULL) { -+ return -errno; -+ } -+ msg->log = calloc(bytes_read, 1); -+ if (!msg->log) { -+ goto cleanup; -+ } -+ memcpy(msg->log, buf, bytes_read); -+ msg->log_len = bytes_read; -+ msg->stream = type ? safe_strdup(type) : safe_strdup("stdout"); -+ -+ get_now_time_buffer(timebuffer, sizeof(timebuffer)); -+ msg->time = safe_strdup(timebuffer); -+ -+ json = logger_json_file_generate_json(msg, &ctx, &err); -+ if (!json) { -+ ERROR("Failed to generate json: %s", err); -+ goto cleanup; -+ } -+ len = strlen(json); -+ json[len] = '\n'; -+ ret = isulad_lxc_terminal_rotate_write_data(terminal, json, len + 1); -+cleanup: -+ free(json); -+ free_logger_json_file(msg); -+ free(err); -+ return ret; -+} -+ -+static ssize_t isulad_logger_syslog_write(struct lxc_terminal *terminal, const char *buf) -+{ -+ syslog(LOG_INFO, "%s", buf); -+ return 0; -+} -+ -+static inline bool is_syslog(const char *driver) -+{ -+ if (driver == NULL) { -+ return false; -+ } -+ -+ return (strcmp("syslog", driver) == 0); -+} -+ -+static inline ssize_t isulad_logger_write(struct lxc_terminal *terminal, const char *type, const char *buf, -+ int bytes_read) -+{ -+ if (is_syslog(terminal->log_driver)) { -+ return isulad_logger_syslog_write(terminal, buf); -+ } -+ -+ return isulad_logger_json_write(terminal, type, buf, bytes_read); -+} -+ -+static int isulad_lxc_terminal_write_log_file(struct lxc_terminal *terminal, const char *type, char *buf, -+ int bytes_read) -+{ -+#define __BUF_CACHE_SIZE (16 * LXC_TERMINAL_BUFFER_SIZE) -+ static char cache[__BUF_CACHE_SIZE]; -+ static int size = 0; -+ int upto, index; -+ int begin = 0, buf_readed = 0, buf_left = 0; -+ int ret; -+ -+ if (buf != NULL && bytes_read > 0) { -+ /* Work out how much more data we are okay with reading this time. */ -+ upto = size + bytes_read; -+ if (upto > __BUF_CACHE_SIZE) { -+ upto = __BUF_CACHE_SIZE; -+ } -+ -+ if (upto > size) { -+ buf_readed = upto - size; -+ memcpy(cache + size, buf, buf_readed); -+ buf_left = bytes_read - buf_readed; -+ size += buf_readed; -+ } -+ } -+ -+ // If we have no data to log, and there's no more coming, we're done. -+ if (size == 0) -+ return 0; -+ -+ // Break up the data that we've buffered up into lines, and log each in turn. -+ for (index = 0; index < size; index++) { -+ if (cache[index] == '\n') { -+ ret = isulad_logger_write(terminal, type, cache + begin, index - begin + 1); -+ if (ret < 0) { -+ WARN("Failed to log msg"); -+ } -+ begin = index + 1; -+ } -+ } -+ /* If there's no more coming, or the buffer is full but -+ * has no newlines, log whatever we haven't logged yet, -+ * noting that it's a partial log line. */ -+ if (buf == NULL || (begin == 0 && size == __BUF_CACHE_SIZE)) { -+ if (begin < size) { -+ ret = isulad_logger_write(terminal, type, cache + begin, size - begin); -+ if (ret < 0) { -+ WARN("Failed to log msg"); -+ } -+ begin = 0; -+ size = 0; -+ } -+ if (buf == NULL) { -+ return 0; -+ } -+ } -+ /* Move any unlogged data to the front of the buffer in preparation for another read. */ -+ if (begin > 0) { -+ memcpy(cache, cache + begin, size - begin); -+ size -= begin; -+ } -+ /* Move left data to cache buffer */ -+ if (buf_left > 0) { -+ memcpy(cache + size, buf + buf_readed, buf_left); -+ size += buf_left; -+ } -+ return 0; -+} -+ -+/* isulad: forward data to all fifos */ -+static void lxc_forward_data_to_fifo(struct lxc_list *list, bool is_err, const char *buf, int r) -+{ -+ struct lxc_list *it = NULL; -+ struct lxc_list *next = NULL; -+ struct lxc_fifos_fd *elem = NULL; -+ -+ lxc_list_for_each_safe(it, list, next) { -+ elem = it->elem; -+ if (is_err) { -+ if (elem->err_fd >= 0) -+ lxc_write_nointr(elem->err_fd, buf, r); -+ } else { -+ if (elem->out_fd >= 0) -+ lxc_write_nointr(elem->out_fd, buf, r); -+ } -+ } -+ -+ return; -+} -+ -+/* isulad: judge the fd whether is fifo */ -+static bool lxc_terminal_is_fifo(int fd, struct lxc_list *list) -+{ -+ struct lxc_list *it = NULL; -+ struct lxc_list *next = NULL; -+ struct lxc_fifos_fd *elem = NULL; -+ -+ lxc_list_for_each_safe(it, list, next) { -+ elem = it->elem; -+ if (elem->in_fd == fd) -+ return true; -+ } -+ -+ return false; -+} -+ -+/* isulad: if fd == -1, means delete all the fifos*/ -+int lxc_terminal_delete_fifo(int fd, struct lxc_list *list) -+{ -+ struct lxc_list *it = NULL; -+ struct lxc_list *next = NULL; -+ struct lxc_fifos_fd *elem = NULL; -+ -+ lxc_list_for_each_safe(it, list, next) { -+ elem = it->elem; -+ if (elem->in_fd == fd || -1 == fd) { -+ INFO("Delete fifo fd %d", fd); -+ lxc_list_del(it); -+ if (elem->in_fifo) -+ free(elem->in_fifo); -+ if (elem->out_fifo) -+ free(elem->out_fifo); -+ if (elem->err_fifo) -+ free(elem->err_fifo); -+ if (elem->in_fd >= 0) -+ close(elem->in_fd); -+ if (elem->out_fd >= 0) -+ close(elem->out_fd); -+ if (elem->err_fd >= 0) -+ close(elem->err_fd); -+ free(elem); -+ } -+ } -+ -+ return 0; -+} -+ -+int lxc_terminal_io_cb(int fd, uint32_t events, void *data, -+ struct lxc_epoll_descr *descr) -+{ -+ struct lxc_terminal *terminal = data; -+ char buf[2 * LXC_TERMINAL_BUFFER_SIZE]; -+ int r, w, w_log, w_rbuf; -+ -+ w = r = lxc_read_nointr(fd, buf, sizeof(buf)); -+ if (r <= 0) { -+ INFO("Terminal client on fd %d has exited", fd); -+ lxc_mainloop_del_handler(descr, fd); -+ -+ if (fd == terminal->master) { -+ terminal->master = -EBADF; -+ /* write remained buffer to terminal log */ -+ if (terminal->log_fd >= 0) { -+ w_log = isulad_lxc_terminal_write_log_file(terminal, "stdout", NULL, 0); -+ if (w_log < 0) -+ TRACE("Failed to write %d bytes to terminal log", r); -+ } -+ /* notes: do not close the master fd due to if we close the fd, the process may -+ * recive SIGHUP and the exit code will be 129 (128 + 1) -+ */ -+ return LXC_MAINLOOP_CLOSE; -+ } else if (fd == terminal->peer) { -+ lxc_terminal_signal_fini(terminal); -+ terminal->peer = -EBADF; -+ close(fd); -+ return LXC_MAINLOOP_CONTINUE; /* isulad: do not close mainloop when peer close*/ -+ } else if (lxc_terminal_is_fifo(fd, &terminal->fifos)) { -+ /* isulad: delete fifos when the client close */ -+ lxc_terminal_delete_fifo(fd, &terminal->fifos); -+ return LXC_MAINLOOP_CONTINUE; -+ } else if (fd == terminal->pipes[1][0] || fd == terminal->pipes[2][0]) { -+ if (fd == terminal->pipes[1][0]) { -+ if (terminal->log_fd >= 0) { -+ w_log = isulad_lxc_terminal_write_log_file(terminal, "stdout", NULL, 0); -+ } -+ terminal->pipes[1][0] = -EBADF; -+ } else if (fd == terminal->pipes[2][0]) { -+ if (terminal->log_fd >= 0) { -+ w_log = isulad_lxc_terminal_write_log_file(terminal, "stderr", NULL, 0); -+ } -+ terminal->pipes[2][0] = -EBADF; -+ } -+ /* notes: do not close the master fd due to if we close the fd, the process may -+ * recive SIGHUP and the exit code will be 141 (128 + 13) -+ */ -+ return LXC_MAINLOOP_CONTINUE; -+ } else if (fd == terminal->pipes[0][1]) { -+ TRACE("closed stdin pipe of container stdin"); -+ terminal->pipes[0][1] = -EBADF; -+ return LXC_MAINLOOP_CONTINUE; -+ } else { -+ ERROR("Handler received unexpected file descriptor"); -+ } -+ close(fd); -+ return LXC_MAINLOOP_CLOSE; -+ } -+ -+ if (fd == terminal->peer || lxc_terminal_is_fifo(fd, &terminal->fifos)) { -+ if (terminal->master > 0) -+ w = lxc_write_nointr(terminal->master, buf, r); -+ if (terminal->pipes[0][1] > 0) -+ w = lxc_write_nointr(terminal->pipes[0][1], buf, r); -+ } -+ -+ w_rbuf = w_log = 0; -+ if (fd == terminal->master || fd == terminal->pipes[1][0] || fd == terminal->pipes[2][0]) { -+ /* write to peer first */ -+ if (terminal->peer >= 0) -+ w = lxc_write_nointr(terminal->peer, buf, r); -+ -+ /* isulad: forward data to fifos */ -+ lxc_forward_data_to_fifo(&terminal->fifos, fd == terminal->pipes[2][0], buf, r); -+ -+ /* write to terminal ringbuffer */ -+ if (terminal->buffer_size > 0) -+ w_rbuf = lxc_ringbuf_write(&terminal->ringbuf, buf, r); -+ -+ /* write to terminal log */ -+ if (terminal->log_fd >= 0) { -+ if (fd == terminal->master || fd == terminal->pipes[1][0]) -+ w_log = isulad_lxc_terminal_write_log_file(terminal, "stdout", buf, r); -+ else if (fd == terminal->pipes[2][0]) -+ w_log = isulad_lxc_terminal_write_log_file(terminal, "stderr", buf, r); -+ } -+ } -+ -+ if (w != r) -+ WARN("Short write on terminal r:%d != w:%d", r, w); -+ -+ if (w_rbuf < 0) { -+ errno = -w_rbuf; -+ SYSTRACE("Failed to write %d bytes to terminal ringbuffer", r); -+ } - -+ if (w_log < 0) -+ TRACE("Failed to write %d bytes to terminal log", r); -+ -+ return LXC_MAINLOOP_CONTINUE; -+} -+#else - int lxc_terminal_io_cb(int fd, uint32_t events, void *data, - struct lxc_epoll_descr *descr) - { -@@ -330,8 +856,8 @@ int lxc_terminal_io_cb(int fd, uint32_t events, void *data, - INFO("Terminal client on fd %d has exited", fd); - lxc_mainloop_del_handler(descr, fd); - -- if (fd == terminal->ptmx) { -- terminal->ptmx = -EBADF; -+ if (fd == terminal->master) { -+ terminal->master = -EBADF; - } else if (fd == terminal->peer) { - lxc_terminal_signal_fini(terminal); - terminal->peer = -EBADF; -@@ -344,10 +870,10 @@ int lxc_terminal_io_cb(int fd, uint32_t events, void *data, - } - - if (fd == terminal->peer) -- w = lxc_write_nointr(terminal->ptmx, buf, r); -+ w = lxc_write_nointr(terminal->master, buf, r); - - w_rbuf = w_log = 0; -- if (fd == terminal->ptmx) { -+ if (fd == terminal->master) { - /* write to peer first */ - if (terminal->peer >= 0) - w = lxc_write_nointr(terminal->peer, buf, r); -@@ -374,6 +900,7 @@ int lxc_terminal_io_cb(int fd, uint32_t events, void *data, - - return LXC_MAINLOOP_CONTINUE; - } -+#endif - - static int lxc_terminal_mainloop_add_peer(struct lxc_terminal *terminal) - { -@@ -401,21 +928,125 @@ static int lxc_terminal_mainloop_add_peer(struct lxc_terminal *terminal) - return 0; - } - -+#ifdef HAVE_ISULAD -+/* isulad add pipes to mainloop */ -+static int lxc_terminal_mainloop_add_pipes(struct lxc_terminal *terminal) -+{ -+ int ret = 0; -+ -+ // parent read data from fifo, and send to stdin of container -+ if (terminal->pipes[0][1] > 0) { -+ ret = lxc_mainloop_add_handler(terminal->descr, terminal->pipes[0][1], -+ lxc_terminal_io_cb, terminal); -+ if (ret) { -+ ERROR("pipe fd %d not added to mainloop", terminal->pipes[0][1]); -+ return -1; -+ } -+ } -+ // parent read data from stdout of container, and send to fifo -+ if (terminal->pipes[1][0] > 0) { -+ ret = lxc_mainloop_add_handler(terminal->descr, terminal->pipes[1][0], -+ lxc_terminal_io_cb, terminal); -+ if (ret) { -+ ERROR("pipe fd %d not added to mainloop", terminal->pipes[1][0]); -+ return -1; -+ } -+ } -+ // parent read data from stderr of container, and send to fifo -+ if (terminal->pipes[2][0] > 0) { -+ ret = lxc_mainloop_add_handler(terminal->descr, terminal->pipes[2][0], -+ lxc_terminal_io_cb, terminal); -+ if (ret) { -+ ERROR("pipe fd %d not added to mainloop", terminal->pipes[2][0]); -+ return -1; -+ } -+ } -+ return ret; -+} -+ -+/* isulad add fifo to mainloop */ -+static int lxc_terminal_mainloop_add_fifo(struct lxc_terminal *terminal) -+{ -+ int ret = 0; -+ struct lxc_list *it = NULL; -+ struct lxc_list *next = NULL; -+ struct lxc_fifos_fd *elem = NULL; -+ -+ lxc_list_for_each_safe(it, &terminal->fifos, next) { -+ elem = it->elem; -+ if (elem->in_fd >= 0) { -+ ret = lxc_mainloop_add_handler(terminal->descr, elem->in_fd, -+ lxc_terminal_io_cb, terminal); -+ if (ret) { -+ ERROR("console fifo %s not added to mainloop", elem->in_fifo); -+ return -1; -+ } -+ } -+ } -+ return ret; -+} -+ -+int lxc_terminal_mainloop_add(struct lxc_epoll_descr *descr, -+ struct lxc_terminal *terminal) -+{ -+ int ret; -+ -+ /* We cache the descr so that we can add an fd to it when someone -+ * does attach to it in lxc_terminal_allocate(). -+ */ -+ terminal->descr = descr; -+ -+ ret = lxc_terminal_mainloop_add_peer(terminal); -+ if (ret < 0) { -+ ERROR("Failed to add handler for terminal peer to mainloop"); -+ return -1; -+ } -+ -+ /* isulad add pipes to mainloop */ -+ ret = lxc_terminal_mainloop_add_pipes(terminal); -+ if (ret < 0) { -+ ERROR("Failed to add handler for terminal fifos to mainloop"); -+ return -1; -+ } -+ -+ /* isulad add fifo to mainloop */ -+ ret = lxc_terminal_mainloop_add_fifo(terminal); -+ if (ret < 0) { -+ ERROR("Failed to add handler for terminal fifos to mainloop"); -+ return -1; -+ } -+ -+ if (terminal->master < 0) { -+ INFO("Terminal is not initialized"); -+ return 0; -+ } -+ -+ ret = lxc_mainloop_add_handler(descr, terminal->master, -+ lxc_terminal_io_cb, terminal); -+ if (ret < 0) { -+ ERROR("Failed to add handler for terminal master fd %d to " -+ "mainloop", terminal->master); -+ return -1; -+ } -+ -+ return 0; -+} -+#else - int lxc_terminal_mainloop_add(struct lxc_epoll_descr *descr, - struct lxc_terminal *terminal) - { - int ret; - -- if (terminal->ptmx < 0) { -+ if (terminal->master < 0) { - INFO("Terminal is not initialized"); - return 0; - } - -- ret = lxc_mainloop_add_handler(descr, terminal->ptmx, -+ ret = lxc_mainloop_add_handler(descr, terminal->master, - lxc_terminal_io_cb, terminal); - if (ret < 0) { -- ERROR("Failed to add handler for terminal ptmx fd %d to " -- "mainloop", terminal->ptmx); -+ ERROR("Failed to add handler for terminal master fd %d to " -+ "mainloop", terminal->master); - return -1; - } - -@@ -426,6 +1057,7 @@ int lxc_terminal_mainloop_add(struct lxc_epoll_descr *descr, - - return lxc_terminal_mainloop_add_peer(terminal); - } -+#endif - - int lxc_setup_tios(int fd, struct termios *oldtios) - { -@@ -483,11 +1115,11 @@ static void lxc_terminal_peer_proxy_free(struct lxc_terminal *terminal) - { - lxc_terminal_signal_fini(terminal); - -- close(terminal->proxy.ptmx); -- terminal->proxy.ptmx = -1; -+ close(terminal->proxy.master); -+ terminal->proxy.master = -1; - -- close(terminal->proxy.pts); -- terminal->proxy.pts = -1; -+ close(terminal->proxy.slave); -+ terminal->proxy.slave = -1; - - terminal->proxy.busy = -1; - -@@ -503,7 +1135,7 @@ static int lxc_terminal_peer_proxy_alloc(struct lxc_terminal *terminal, - struct termios oldtermio; - struct lxc_terminal_state *ts; - -- if (terminal->ptmx < 0) { -+ if (terminal->master < 0) { - ERROR("Terminal not set up"); - return -1; - } -@@ -519,51 +1151,51 @@ static int lxc_terminal_peer_proxy_alloc(struct lxc_terminal *terminal, - } - - /* This is the proxy terminal that will be given to the client, and -- * that the real terminal ptmx will send to / recv from. -+ * that the real terminal master will send to / recv from. - */ -- ret = openpty(&terminal->proxy.ptmx, &terminal->proxy.pts, NULL, -+ ret = openpty(&terminal->proxy.master, &terminal->proxy.slave, NULL, - NULL, NULL); - if (ret < 0) { - SYSERROR("Failed to open proxy terminal"); - return -1; - } - -- ret = ttyname_r(terminal->proxy.pts, terminal->proxy.name, -+ ret = ttyname_r(terminal->proxy.slave, terminal->proxy.name, - sizeof(terminal->proxy.name)); - if (ret < 0) { -- SYSERROR("Failed to retrieve name of proxy terminal pts"); -+ SYSERROR("Failed to retrieve name of proxy terminal slave"); - goto on_error; - } - -- ret = fd_cloexec(terminal->proxy.ptmx, true); -+ ret = fd_cloexec(terminal->proxy.master, true); - if (ret < 0) { -- SYSERROR("Failed to set FD_CLOEXEC flag on proxy terminal ptmx"); -+ SYSERROR("Failed to set FD_CLOEXEC flag on proxy terminal master"); - goto on_error; - } - -- ret = fd_cloexec(terminal->proxy.pts, true); -+ ret = fd_cloexec(terminal->proxy.slave, true); - if (ret < 0) { -- SYSERROR("Failed to set FD_CLOEXEC flag on proxy terminal pts"); -+ SYSERROR("Failed to set FD_CLOEXEC flag on proxy terminal slave"); - goto on_error; - } - -- ret = lxc_setup_tios(terminal->proxy.pts, &oldtermio); -+ ret = lxc_setup_tios(terminal->proxy.slave, &oldtermio); - if (ret < 0) - goto on_error; - -- ts = lxc_terminal_signal_init(terminal->proxy.ptmx, terminal->ptmx); -+ ts = lxc_terminal_signal_init(terminal->proxy.master, terminal->master); - if (!ts) - goto on_error; - - terminal->tty_state = ts; -- terminal->peer = terminal->proxy.pts; -+ terminal->peer = terminal->proxy.slave; - terminal->proxy.busy = sockfd; - ret = lxc_terminal_mainloop_add_peer(terminal); - if (ret < 0) - goto on_error; - -- NOTICE("Opened proxy terminal with ptmx fd %d and pts fd %d", -- terminal->proxy.ptmx, terminal->proxy.pts); -+ NOTICE("Opened proxy terminal with master fd %d and slave fd %d", -+ terminal->proxy.master, terminal->proxy.slave); - return 0; - - on_error: -@@ -574,7 +1206,7 @@ on_error: - int lxc_terminal_allocate(struct lxc_conf *conf, int sockfd, int *ttyreq) - { - int ttynum; -- int ptmxfd = -1; -+ int masterfd = -1; - struct lxc_tty_info *ttys = &conf->ttys; - struct lxc_terminal *terminal = &conf->console; - -@@ -585,7 +1217,7 @@ int lxc_terminal_allocate(struct lxc_conf *conf, int sockfd, int *ttyreq) - if (ret < 0) - goto out; - -- ptmxfd = terminal->proxy.ptmx; -+ masterfd = terminal->proxy.master; - goto out; - } - -@@ -614,10 +1246,10 @@ int lxc_terminal_allocate(struct lxc_conf *conf, int sockfd, int *ttyreq) - - out_tty: - ttys->tty[ttynum - 1].busy = sockfd; -- ptmxfd = ttys->tty[ttynum - 1].ptmx; -+ masterfd = ttys->tty[ttynum - 1].master; - - out: -- return ptmxfd; -+ return masterfd; - } - - void lxc_terminal_free(struct lxc_conf *conf, int fd) -@@ -633,20 +1265,37 @@ void lxc_terminal_free(struct lxc_conf *conf, int fd) - if (terminal->proxy.busy != fd) - return; - -- lxc_mainloop_del_handler(terminal->descr, terminal->proxy.pts); -+ lxc_mainloop_del_handler(terminal->descr, terminal->proxy.slave); - lxc_terminal_peer_proxy_free(terminal); - } - - static int lxc_terminal_peer_default(struct lxc_terminal *terminal) - { -- struct lxc_terminal_state *ts; -- const char *path; -+ struct lxc_terminal_state *ts = NULL; -+ const char *path = NULL; - int ret = 0; - - if (terminal->path) - path = terminal->path; -- else -- path = "/dev/tty"; -+ -+#ifdef HAVE_ISULAD -+ /* isulad: if no console was given, try current controlling terminal, there -+ * won't be one if we were started as a daemon (-d) -+ */ -+ if (!path && !access("/dev/tty", F_OK)) { -+ int fd; -+ fd = open("/dev/tty", O_RDWR); -+ if (fd >= 0) { -+ close(fd); -+ path = "/dev/tty"; -+ } -+ } -+ -+ if (!path) { -+ DEBUG("Not have a controlling terminal"); -+ return 0; -+ } -+#endif - - terminal->peer = lxc_unpriv(open(path, O_RDWR | O_CLOEXEC)); - if (terminal->peer < 0) { -@@ -666,14 +1315,14 @@ static int lxc_terminal_peer_default(struct lxc_terminal *terminal) - goto on_error_free_tios; - } - -- ts = lxc_terminal_signal_init(terminal->peer, terminal->ptmx); -+ ts = lxc_terminal_signal_init(terminal->peer, terminal->master); - terminal->tty_state = ts; - if (!ts) { - WARN("Failed to install signal handler"); - goto on_error_free_tios; - } - -- lxc_terminal_winsz(terminal->peer, terminal->ptmx); -+ lxc_terminal_winsz(terminal->peer, terminal->master); - - terminal->tios = malloc(sizeof(*terminal->tios)); - if (!terminal->tios) -@@ -749,22 +1398,51 @@ void lxc_terminal_delete(struct lxc_terminal *terminal) - close(terminal->peer); - terminal->peer = -1; - -- if (terminal->ptmx >= 0) -- close(terminal->ptmx); -- terminal->ptmx = -1; -+ if (terminal->master >= 0) -+ close(terminal->master); -+ terminal->master = -1; - -- if (terminal->pts >= 0) -- close(terminal->pts); -- terminal->pts = -1; -+ if (terminal->slave >= 0) -+ close(terminal->slave); -+ terminal->slave = -1; - - if (terminal->log_fd >= 0) - close(terminal->log_fd); - terminal->log_fd = -1; -+ -+#ifdef HAVE_ISULAD -+ if (is_syslog(terminal->log_driver)) { -+ closelog(); -+ free(terminal->log_driver); -+ } -+ /* isulad: close all pipes */ -+ if (terminal->pipes[0][0] >= 0) -+ close(terminal->pipes[0][0]); -+ terminal->pipes[0][0] = -1; -+ if (terminal->pipes[0][1] >= 0) -+ close(terminal->pipes[0][1]); -+ terminal->pipes[0][1] = -1; -+ if (terminal->pipes[1][0] >= 0) -+ close(terminal->pipes[1][0]); -+ terminal->pipes[1][0] = -1; -+ if (terminal->pipes[1][1] >= 0) -+ close(terminal->pipes[1][1]); -+ terminal->pipes[1][1] = -1; -+ if (terminal->pipes[2][0] >= 0) -+ close(terminal->pipes[2][0]); -+ terminal->pipes[2][0] = -1; -+ if (terminal->pipes[2][1] >= 0) -+ close(terminal->pipes[2][1]); -+ terminal->pipes[2][1] = -1; -+ -+ /* isulad: delete all fifos */ -+ lxc_terminal_delete_fifo(-1, &terminal->fifos); -+#endif - } - - /** - * Note that this function needs to run before the mainloop starts. Since we -- * register a handler for the terminal's ptmxfd when we create the mainloop -+ * register a handler for the terminal's masterfd when we create the mainloop - * the terminal handler needs to see an allocated ringbuffer. - */ - static int lxc_terminal_create_ringbuf(struct lxc_terminal *terminal) -@@ -828,31 +1506,275 @@ int lxc_terminal_create_log_file(struct lxc_terminal *terminal) - return 0; - } - -+#ifdef HAVE_ISULAD -+/* isulad: fd_nonblock */ -+static int fd_nonblock(int fd) -+{ -+ int flags; -+ -+ flags = fcntl(fd, F_GETFL); -+ -+ return fcntl(fd, F_SETFL, (int)((unsigned int)flags | O_NONBLOCK)); -+} -+ -+static int terminal_fifo_open(const char *fifo_path, int flags) -+{ -+ int fd = -1; -+ -+ fd = lxc_open(fifo_path, flags, 0); -+ if (fd < 0) { -+ WARN("Failed to open fifo %s to send message: %s.", fifo_path, -+ strerror(errno)); -+ return -1; -+ } -+ -+ return fd; -+} -+ -+bool fifo_exists(const char *path) -+{ -+ struct stat sb; -+ int ret; -+ -+ ret = stat(path, &sb); -+ if (ret < 0) -+ // could be something other than eexist, just say no -+ return false; -+ return S_ISFIFO(sb.st_mode); -+} -+ -+/* isulad: set terminal fifos */ -+static int lxc_terminal_set_fifo(struct lxc_terminal *console, const char *in, const char *out, const char *err, int *input_fd) -+{ -+ int fifofd_in = -1, fifofd_out = -1, fifofd_err = -1; -+ struct lxc_fifos_fd *fifo_elem = NULL; -+ -+ if ((in && !fifo_exists(in)) || (out && !fifo_exists(out)) || (err && !fifo_exists(err))) { -+ ERROR("File %s or %s or %s does not refer to a FIFO", in, out, err); -+ return -1; -+ } -+ -+ if (in) { -+ fifofd_in = terminal_fifo_open(in, O_RDONLY | O_NONBLOCK | O_CLOEXEC); -+ if (fifofd_in < 0) { -+ SYSERROR("Failed to open FIFO: %s", in); -+ return -1; -+ } -+ } -+ -+ if (out) { -+ fifofd_out = terminal_fifo_open(out, O_WRONLY | O_NONBLOCK | O_CLOEXEC); -+ if (fifofd_out < 0) { -+ SYSERROR("Failed to open FIFO: %s", out); -+ if (fifofd_in >= 0) -+ close(fifofd_in); -+ return -1; -+ } -+ } -+ -+ if (err) { -+ fifofd_err = terminal_fifo_open(err, O_WRONLY | O_NONBLOCK | O_CLOEXEC); -+ if (fifofd_err < 0) { -+ SYSERROR("Failed to open FIFO: %s", err); -+ if (fifofd_in >= 0) -+ close(fifofd_in); -+ if (fifofd_out >= 0) -+ close(fifofd_out); -+ return -1; -+ } -+ } -+ -+ fifo_elem = malloc(sizeof(*fifo_elem)); -+ if (fifo_elem == NULL) { -+ if (fifofd_in >= 0) -+ close(fifofd_in); -+ if (fifofd_out >= 0) -+ close(fifofd_out); -+ if (fifofd_err >= 0) -+ close(fifofd_err); -+ return -1; -+ } -+ memset(fifo_elem, 0, sizeof(*fifo_elem)); -+ -+ fifo_elem->in_fifo = safe_strdup(in ? in : ""); -+ fifo_elem->out_fifo = safe_strdup(out ? out : ""); -+ fifo_elem->err_fifo = safe_strdup(err ? err : ""); -+ fifo_elem->in_fd = fifofd_in; -+ fifo_elem->out_fd = fifofd_out; -+ fifo_elem->err_fd = fifofd_err; -+ lxc_list_add_elem(&fifo_elem->node, fifo_elem); -+ lxc_list_add_tail(&console->fifos, &fifo_elem->node); -+ -+ if (input_fd) -+ *input_fd = fifofd_in; -+ -+ return 0; -+} -+ -+/* isulad: add default fifos */ -+static int lxc_terminal_fifo_default(struct lxc_terminal *terminal) -+{ -+ if (terminal->init_fifo[0] || terminal->init_fifo[1] || terminal->init_fifo[2]) -+ return lxc_terminal_set_fifo(terminal, terminal->init_fifo[0], terminal->init_fifo[1], terminal->init_fifo[2], NULL); -+ return 0; -+} -+ - int lxc_terminal_create(struct lxc_terminal *terminal) - { - int ret; - -- ret = openpty(&terminal->ptmx, &terminal->pts, NULL, NULL, NULL); -+ if (!terminal->disable_pty) { -+ ret = openpty(&terminal->master, &terminal->slave, NULL, NULL, NULL); -+ if (ret < 0) { -+ SYSERROR("Failed to open terminal"); -+ return -1; -+ } -+ -+ ret = ttyname_r(terminal->slave, terminal->name, sizeof(terminal->name)); -+ if (ret < 0) { -+ SYSERROR("Failed to retrieve name of terminal slave"); -+ goto err; -+ } -+ -+ ret = fd_cloexec(terminal->master, true); -+ if (ret < 0) { -+ SYSERROR("Failed to set FD_CLOEXEC flag on terminal master"); -+ goto err; -+ } -+ -+ /* isulad: make master NONBLOCK */ -+ ret = fd_nonblock(terminal->master); -+ if (ret < 0) { -+ SYSERROR("Failed to set O_NONBLOCK flag on terminal master"); -+ goto err; -+ } -+ -+ ret = fd_cloexec(terminal->slave, true); -+ if (ret < 0) { -+ SYSERROR("Failed to set FD_CLOEXEC flag on terminal slave"); -+ goto err; -+ } -+ -+ ret = lxc_terminal_peer_default(terminal); -+ if (ret < 0) { -+ ERROR("Failed to allocate proxy terminal"); -+ goto err; -+ } -+ } else { -+ /* isulad: create 3 pipes */ -+ /* for stdin */ -+ if (pipe2(terminal->pipes[0], O_CLOEXEC)) { -+ ERROR("Failed to create stdin pipe"); -+ goto err; -+ } -+ /* for stdout */ -+ if (pipe2(terminal->pipes[1], O_CLOEXEC)) { -+ ERROR("Failed to create stdout pipe"); -+ goto err; -+ } -+ /* for stderr */ -+ if (pipe2(terminal->pipes[2], O_CLOEXEC)) { -+ ERROR("Failed to create stderr pipe"); -+ goto err; -+ } -+ } -+ -+ /* isulad: open fifos */ -+ ret = lxc_terminal_fifo_default(terminal); -+ if (ret < 0) { -+ ERROR("Failed to allocate fifo terminal"); -+ goto err; -+ } -+ -+ return 0; -+ -+err: -+ lxc_terminal_delete(terminal); -+ return -ENODEV; -+} -+ -+/* isulad: add fifos dynamic*/ -+int lxc_terminal_add_fifos(struct lxc_conf *conf, const char *fifonames) -+{ -+ int ret = 0; -+ struct lxc_terminal *terminal = &conf->console; -+ int fifofd_in = -1; -+ char *tmp = NULL, *saveptr = NULL, *in = NULL, *out = NULL, *err = NULL; -+ const char *none_fifo_name = "none"; -+ -+ tmp = safe_strdup(fifonames); -+ -+ in = strtok_r(tmp, "&&&&", &saveptr); -+ if (!in) { -+ ret = -1; -+ goto free_out; -+ } -+ if (strcmp(in, none_fifo_name) == 0) -+ in = NULL; -+ -+ out = strtok_r(NULL, "&&&&", &saveptr); -+ if (!out) { -+ ret = -1; -+ goto free_out; -+ } -+ if (strcmp(out, none_fifo_name) == 0) -+ out = NULL; -+ -+ err = strtok_r(NULL, "&&&&", &saveptr); -+ if (!err) { -+ ret = -1; -+ goto free_out; -+ } -+ if (strcmp(err, none_fifo_name) == 0) -+ err = NULL; -+ -+ ret = lxc_terminal_set_fifo(terminal, in, out, err, &fifofd_in); -+ if (ret < 0) { -+ ERROR("Faild to set fifos to console config"); -+ ret = -1; -+ goto free_out; -+ } -+ -+ if (lxc_mainloop_add_handler(terminal->descr, fifofd_in, -+ lxc_terminal_io_cb, terminal)) { -+ ERROR("console fifo not added to mainloop"); -+ lxc_terminal_delete_fifo(fifofd_in, &terminal->fifos); -+ ret = -1; -+ goto free_out; -+ } -+ -+free_out: -+ if (tmp) -+ free(tmp); -+ return ret; -+} -+ -+#else -+int lxc_terminal_create(struct lxc_terminal *terminal) -+{ -+ int ret; -+ -+ ret = openpty(&terminal->master, &terminal->slave, NULL, NULL, NULL); - if (ret < 0) { - SYSERROR("Failed to open terminal"); - return -1; - } - -- ret = ttyname_r(terminal->pts, terminal->name, sizeof(terminal->name)); -+ ret = ttyname_r(terminal->slave, terminal->name, sizeof(terminal->name)); - if (ret < 0) { -- SYSERROR("Failed to retrieve name of terminal pts"); -+ SYSERROR("Failed to retrieve name of terminal slave"); - goto err; - } - -- ret = fd_cloexec(terminal->ptmx, true); -+ ret = fd_cloexec(terminal->master, true); - if (ret < 0) { -- SYSERROR("Failed to set FD_CLOEXEC flag on terminal ptmx"); -+ SYSERROR("Failed to set FD_CLOEXEC flag on terminal master"); - goto err; - } - -- ret = fd_cloexec(terminal->pts, true); -+ ret = fd_cloexec(terminal->slave, true); - if (ret < 0) { -- SYSERROR("Failed to set FD_CLOEXEC flag on terminal pts"); -+ SYSERROR("Failed to set FD_CLOEXEC flag on terminal slave"); - goto err; - } - -@@ -868,6 +1790,7 @@ err: - lxc_terminal_delete(terminal); - return -ENODEV; - } -+#endif - - int lxc_terminal_setup(struct lxc_conf *conf) - { -@@ -883,6 +1806,18 @@ int lxc_terminal_setup(struct lxc_conf *conf) - if (ret < 0) - return -1; - -+#ifdef HAVE_ISULAD -+ if (is_syslog(terminal->log_driver)) { -+ if (terminal->log_syslog_tag == NULL) { -+ terminal->log_syslog_tag = malloc(16 * sizeof(char)); -+ (void)strlcpy(terminal->log_syslog_tag, conf->name, 16); -+ } -+ if (terminal->log_syslog_facility <= 0) { -+ terminal->log_syslog_facility = LOG_DAEMON; -+ } -+ openlog(terminal->log_syslog_tag, LOG_PID, terminal->log_syslog_facility); -+ } -+#endif - ret = lxc_terminal_create_log_file(terminal); - if (ret < 0) - goto err; -@@ -956,21 +1891,21 @@ int lxc_terminal_stdin_cb(int fd, uint32_t events, void *cbdata, - ts->saw_escape = 0; - } - -- ret = lxc_write_nointr(ts->ptmxfd, &c, 1); -+ ret = lxc_write_nointr(ts->masterfd, &c, 1); - if (ret <= 0) - return LXC_MAINLOOP_CLOSE; - - return LXC_MAINLOOP_CONTINUE; - } - --int lxc_terminal_ptmx_cb(int fd, uint32_t events, void *cbdata, -+int lxc_terminal_master_cb(int fd, uint32_t events, void *cbdata, - struct lxc_epoll_descr *descr) - { - int r, w; - char buf[LXC_TERMINAL_BUFFER_SIZE]; - struct lxc_terminal_state *ts = cbdata; - -- if (fd != ts->ptmxfd) -+ if (fd != ts->masterfd) - return LXC_MAINLOOP_CLOSE; - - r = lxc_read_nointr(fd, buf, sizeof(buf)); -@@ -984,16 +1919,16 @@ int lxc_terminal_ptmx_cb(int fd, uint32_t events, void *cbdata, - return LXC_MAINLOOP_CONTINUE; - } - --int lxc_terminal_getfd(struct lxc_container *c, int *ttynum, int *ptmxfd) -+int lxc_terminal_getfd(struct lxc_container *c, int *ttynum, int *masterfd) - { -- return lxc_cmd_console(c->name, ttynum, ptmxfd, c->config_path); -+ return lxc_cmd_console(c->name, ttynum, masterfd, c->config_path); - } - - int lxc_console(struct lxc_container *c, int ttynum, - int stdinfd, int stdoutfd, int stderrfd, - int escape) - { -- int ptmxfd, ret, ttyfd; -+ int masterfd, ret, ttyfd; - struct lxc_epoll_descr descr; - struct termios oldtios; - struct lxc_terminal_state *ts; -@@ -1002,7 +1937,7 @@ int lxc_console(struct lxc_container *c, int ttynum, - }; - int istty = 0; - -- ttyfd = lxc_cmd_console(c->name, &ttynum, &ptmxfd, c->config_path); -+ ttyfd = lxc_cmd_console(c->name, &ttynum, &masterfd, c->config_path); - if (ttyfd < 0) - return -1; - -@@ -1010,7 +1945,7 @@ int lxc_console(struct lxc_container *c, int ttynum, - if (ret < 0) - TRACE("Process is already group leader"); - -- ts = lxc_terminal_signal_init(stdinfd, ptmxfd); -+ ts = lxc_terminal_signal_init(stdinfd, masterfd); - if (!ts) { - ret = -1; - goto close_fds; -@@ -1021,8 +1956,8 @@ int lxc_console(struct lxc_container *c, int ttynum, - - istty = isatty(stdinfd); - if (istty) { -- lxc_terminal_winsz(stdinfd, ptmxfd); -- lxc_terminal_winsz(ts->stdinfd, ts->ptmxfd); -+ lxc_terminal_winsz(stdinfd, masterfd); -+ lxc_terminal_winsz(ts->stdinfd, ts->masterfd); - } else { - INFO("File descriptor %d does not refer to a terminal", stdinfd); - } -@@ -1049,10 +1984,10 @@ int lxc_console(struct lxc_container *c, int ttynum, - goto close_mainloop; - } - -- ret = lxc_mainloop_add_handler(&descr, ts->ptmxfd, -- lxc_terminal_ptmx_cb, ts); -+ ret = lxc_mainloop_add_handler(&descr, ts->masterfd, -+ lxc_terminal_master_cb, ts); - if (ret < 0) { -- ERROR("Failed to add ptmx handler"); -+ ERROR("Failed to add master handler"); - goto close_mainloop; - } - -@@ -1093,7 +2028,7 @@ sigwinch_fini: - lxc_terminal_signal_fini(&terminal); - - close_fds: -- close(ptmxfd); -+ close(masterfd); - close(ttyfd); - - return ret; -@@ -1120,9 +2055,15 @@ int lxc_terminal_prepare_login(int fd) - if (ret < 0) - return -1; - -+#ifdef HAVE_ISULAD -+ ret = set_stdfds(fd); -+ if (ret < 0) -+ return -1; -+#else - ret = lxc_terminal_set_stdfds(fd); - if (ret < 0) - return -1; -+#endif - - if (fd > STDERR_FILENO) - close(fd); -@@ -1133,19 +2074,31 @@ int lxc_terminal_prepare_login(int fd) - void lxc_terminal_info_init(struct lxc_terminal_info *terminal) - { - terminal->name[0] = '\0'; -- terminal->ptmx = -EBADF; -- terminal->pts = -EBADF; -+ terminal->master = -EBADF; -+ terminal->slave = -EBADF; - terminal->busy = -1; - } - - void lxc_terminal_init(struct lxc_terminal *terminal) - { - memset(terminal, 0, sizeof(*terminal)); -- terminal->pts = -EBADF; -- terminal->ptmx = -EBADF; -+ terminal->slave = -EBADF; -+ terminal->master = -EBADF; - terminal->peer = -EBADF; - terminal->log_fd = -EBADF; - lxc_terminal_info_init(&terminal->proxy); -+#ifdef HAVE_ISULAD -+ terminal->init_fifo[0] = NULL; -+ terminal->init_fifo[1] = NULL; -+ terminal->init_fifo[2] = NULL; -+ terminal->pipes[0][0] = -1; -+ terminal->pipes[0][1] = -1; -+ terminal->pipes[1][0] = -1; -+ terminal->pipes[1][1] = -1; -+ terminal->pipes[2][0] = -1; -+ terminal->pipes[2][1] = -1; -+ lxc_list_init(&terminal->fifos); -+#endif - } - - void lxc_terminal_conf_free(struct lxc_terminal *terminal) -@@ -1155,6 +2108,15 @@ void lxc_terminal_conf_free(struct lxc_terminal *terminal) - if (terminal->buffer_size > 0 && terminal->ringbuf.addr) - lxc_ringbuf_release(&terminal->ringbuf); - lxc_terminal_signal_fini(terminal); -+#ifdef HAVE_ISULAD -+ /*isulad: free console fifos */ -+ free(terminal->init_fifo[0]); -+ free(terminal->init_fifo[1]); -+ free(terminal->init_fifo[2]); -+ lxc_terminal_delete_fifo(-1, &terminal->fifos); -+ free(terminal->log_driver); -+ free(terminal->log_syslog_tag); -+#endif - } - - int lxc_terminal_map_ids(struct lxc_conf *c, struct lxc_terminal *terminal) -@@ -1167,13 +2129,14 @@ int lxc_terminal_map_ids(struct lxc_conf *c, struct lxc_terminal *terminal) - if (strcmp(terminal->name, "") == 0) - return 0; - -- ret = userns_exec_mapped_root(terminal->name, terminal->pts, c); -+ ret = chown_mapped_root(terminal->name, c); - if (ret < 0) { -- return log_error(-1, "Failed to chown terminal %d(%s)", -- terminal->pts, terminal->name); -+ ERROR("Failed to chown terminal \"%s\"", terminal->name); -+ return -1; - } - -- TRACE("Chowned terminal %d(%s)", terminal->pts, terminal->name); -+ TRACE("Chowned terminal \"%s\"", terminal->name); - - return 0; - } -+ -diff --git a/src/lxc/terminal.h b/src/lxc/terminal.h -index 4d21f33d9..9de4cd055 100644 ---- a/src/lxc/terminal.h -+++ b/src/lxc/terminal.h -@@ -15,14 +15,14 @@ struct lxc_conf; - struct lxc_epoll_descr; - - struct lxc_terminal_info { -- /* the path name of the pts side */ -+ /* the path name of the slave side */ - char name[PATH_MAX]; - -- /* the file descriptor of the ptmx */ -- int ptmx; -+ /* the file descriptor of the master */ -+ int master; - -- /* the file descriptor of the pts */ -- int pts; -+ /* the file descriptor of the slave */ -+ int slave; - - /* whether the terminal is currently used */ - int busy; -@@ -32,7 +32,7 @@ struct lxc_terminal_state { - struct lxc_list node; - int stdinfd; - int stdoutfd; -- int ptmxfd; -+ int masterfd; - - /* Escape sequence to use for exiting the terminal. A single char can - * be specified. The terminal can then exited by doing: Ctrl + -@@ -57,8 +57,8 @@ struct lxc_terminal_state { - }; - - struct lxc_terminal { -- int pts; -- int ptmx; -+ int slave; -+ int master; - int peer; - struct lxc_terminal_info proxy; - struct lxc_epoll_descr *descr; -@@ -79,6 +79,16 @@ struct lxc_terminal { - - /* whether the log file will be rotated */ - unsigned int log_rotate; -+#ifdef HAVE_ISULAD -+ /* driver of log, support file and syslog */ -+ char *log_driver; -+ -+ /* syslog tag for every log */ -+ char *log_syslog_tag; -+ -+ /* syslog facility */ -+ int log_syslog_facility; -+#endif - }; - - struct /* lxc_terminal_ringbuf */ { -@@ -88,7 +98,27 @@ struct lxc_terminal { - /* the in-memory ringbuffer */ - struct lxc_ringbuf ringbuf; - }; -+#ifdef HAVE_ISULAD -+ char *init_fifo[3]; /* isulad: default fifos for the start */ -+ struct lxc_list fifos; /* isulad: fifos used to forward teminal */ -+ bool disable_pty; -+ bool open_stdin; -+ int pipes[3][2]; /* isulad: pipes for dup to container fds of stdin,stdout,stderr on daemonize mode*/ -+#endif -+}; -+ -+#ifdef HAVE_ISULAD -+/* isulad: fifo struct */ -+struct lxc_fifos_fd { -+ char *in_fifo; -+ char *out_fifo; -+ char *err_fifo; -+ int in_fd; -+ int out_fd; -+ int err_fd; -+ struct lxc_list node; - }; -+#endif - - /** - * lxc_terminal_allocate: allocate the console or a tty -@@ -102,10 +132,10 @@ extern int lxc_terminal_allocate(struct lxc_conf *conf, int sockfd, int *ttynum - - /** - * Create a new terminal: -- * - calls openpty() to allocate a ptmx/pts pair -- * - sets the FD_CLOEXEC flag on the ptmx/pts fds -+ * - calls openpty() to allocate a master/slave pair -+ * - sets the FD_CLOEXEC flag on the master/slave fds - * - allocates either the current controlling terminal (default) or a user -- * specified terminal as proxy for the newly created ptmx/pts pair -+ * specified terminal as proxy for the newly created master/slave pair - * - sets up SIGWINCH handler, winsz, and new terminal settings - * (Handlers for SIGWINCH and I/O are not registered in a mainloop.) - */ -@@ -164,7 +194,7 @@ extern int lxc_console(struct lxc_container *c, int ttynum, - * the range specified by lxc.tty.max to allocate a specific tty. - */ - extern int lxc_terminal_getfd(struct lxc_container *c, int *ttynum, -- int *ptmxfd); -+ int *masterfd); - - /** - * Make fd a duplicate of the standard file descriptors. The fd is made a -@@ -183,12 +213,12 @@ extern int lxc_terminal_stdin_cb(int fd, uint32_t events, void *cbdata, - struct lxc_epoll_descr *descr); - - /** -- * Handler for events on the ptmx fd of the terminal. To be registered via -+ * Handler for events on the master fd of the terminal. To be registered via - * the corresponding functions declared and defined in mainloop.{c,h} or - * lxc_terminal_mainloop_add(). - * This function exits the loop cleanly when an EPOLLHUP event is received. - */ --extern int lxc_terminal_ptmx_cb(int fd, uint32_t events, void *cbdata, -+extern int lxc_terminal_master_cb(int fd, uint32_t events, void *cbdata, - struct lxc_epoll_descr *descr); - - /** -@@ -202,9 +232,9 @@ extern int lxc_setup_tios(int fd, struct termios *oldtios); - * lxc_terminal_winsz: propagate winsz from one terminal to another - * - * @srcfd -- * - terminal to get size from (typically a pts pty) -+ * - terminal to get size from (typically a slave pty) - * @dstfd -- * - terminal to set size on (typically a ptmx pty) -+ * - terminal to set size on (typically a master pty) - */ - extern void lxc_terminal_winsz(int srcfd, int dstfd); - -@@ -254,4 +284,9 @@ extern void lxc_terminal_init(struct lxc_terminal *terminal); - extern int lxc_terminal_map_ids(struct lxc_conf *c, - struct lxc_terminal *terminal); - -+#ifdef HAVE_ISULAD -+int lxc_terminal_add_fifos(struct lxc_conf *conf, const char *fifonames); -+int lxc_set_terminal_winsz(struct lxc_terminal *terminal, unsigned int height, unsigned int width); -+#endif -+ - #endif /* __LXC_TERMINAL_H */ -diff --git a/src/lxc/tools/arguments.h b/src/lxc/tools/arguments.h -index cb0ba744d..41ea1097a 100644 ---- a/src/lxc/tools/arguments.h -+++ b/src/lxc/tools/arguments.h -@@ -40,6 +40,16 @@ struct lxc_arguments { - - /* for lxc-start */ - const char *share_ns[32]; /* size must be greater than LXC_NS_MAX */ -+#ifdef HAVE_ISULAD -+ const char *container_info; /* isulad: file used to store pid and ppid info of container */ -+ char *terminal_fifos[3]; /* isulad add, fifos used to redirct stdin/out/err */ -+ const char *exit_monitor_fifo; /* isulad: fifo used to monitor state of monitor process */ -+ const char *suffix; /* isulad add, suffix used for connect with parent of execed process*/ -+ int disable_pty; -+ int open_stdin; -+ unsigned int start_timeout; /* isulad: Seconds for waiting on a container to start before it is killed*/ -+ int64_t attach_timeout; /* for lxc-attach */ -+#endif - - /* for lxc-console */ - unsigned int ttynum; -@@ -152,6 +162,19 @@ struct lxc_arguments { - #define OPT_SHARE_UTS OPT_USAGE - 5 - #define OPT_SHARE_PID OPT_USAGE - 6 - -+#ifdef HAVE_ISULAD -+#define OPT_INPUT_FIFO OPT_USAGE - 7 -+#define OPT_OUTPUT_FIFO OPT_USAGE - 8 -+#define OPT_STDERR_FIFO OPT_USAGE - 9 -+#define OPT_CONTAINER_INFO OPT_USAGE - 10 -+#define OPT_EXIT_FIFO OPT_USAGE - 11 -+#define OPT_START_TIMEOUT OPT_USAGE - 12 -+#define OPT_DISABLE_PTY OPT_USAGE - 13 -+#define OPT_OPEN_STDIN OPT_USAGE - 14 -+#define OPT_ATTACH_TIMEOUT OPT_USAGE - 15 -+#define OPT_ATTACH_SUFFIX OPT_USAGE - 16 -+#endif -+ - extern int lxc_arguments_parse(struct lxc_arguments *args, int argc, - char *const argv[]); - -diff --git a/src/lxc/tools/lxc_attach.c b/src/lxc/tools/lxc_attach.c -index a8f493aa7..dbddc2a51 100644 ---- a/src/lxc/tools/lxc_attach.c -+++ b/src/lxc/tools/lxc_attach.c -@@ -72,8 +72,19 @@ static const struct option my_longopts[] = { - {"set-var", required_argument, 0, 'v'}, - {"pty-log", required_argument, 0, 'L'}, - {"rcfile", required_argument, 0, 'f'}, -+#ifndef HAVE_ISULAD - {"uid", required_argument, 0, 'u'}, - {"gid", required_argument, 0, 'g'}, -+#else -+ {"user", required_argument, 0, 'u'}, -+ {"in-fifo", required_argument, 0, OPT_INPUT_FIFO}, /* isulad add terminal fifos*/ -+ {"out-fifo", required_argument, 0, OPT_OUTPUT_FIFO}, -+ {"err-fifo", required_argument, 0, OPT_STDERR_FIFO}, -+ {"suffix", required_argument, 0, OPT_ATTACH_SUFFIX}, -+ {"timeout", required_argument, 0, OPT_ATTACH_TIMEOUT}, -+ {"disable-pty", no_argument, 0, OPT_DISABLE_PTY}, -+ {"open-stdin", no_argument, 0, OPT_OPEN_STDIN}, -+#endif - LXC_COMMON_OPTIONS - }; - -@@ -124,9 +135,19 @@ Options :\n\ - multiple times.\n\ - -f, --rcfile=FILE\n\ - Load configuration file FILE\n\ -+" -+#ifndef HAVE_ISULAD -+"\ - -u, --uid=UID Execute COMMAND with UID inside the container\n\ - -g, --gid=GID Execute COMMAND with GID inside the container\n\ --", -+" -+#else -+"\ -+ --user User ID (format: UID[:GID])\n\ -+ --timeout Timeout in seconds (default: 0)\n\ -+" -+#endif -+, - .options = my_longopts, - .parser = my_parser, - .checker = NULL, -@@ -136,6 +157,71 @@ Options :\n\ - .gid = LXC_INVALID_GID, - }; - -+#ifdef HAVE_ISULAD -+static int parse_user_id(const char *username, char **uid, char **gid, char **tmp_dup) -+{ -+ char *tmp = NULL; -+ char *pdot = NULL; -+ -+ if (uid == NULL || gid == NULL || tmp_dup == NULL) { -+ return -1; -+ } -+ -+ if (username != NULL) { -+ tmp = strdup(username); -+ if (tmp == NULL) { -+ ERROR("Failed to duplicate user name"); -+ return -1; -+ } -+ -+ // for free tmp in caller -+ *tmp_dup = tmp; -+ pdot = strstr(tmp, ":"); -+ if (pdot != NULL) { -+ *pdot = '\0'; -+ if (pdot != tmp) { -+ // uid found -+ *uid = tmp; -+ } -+ -+ if (*(pdot + 1) != '\0') { -+ // gid found -+ *gid = pdot + 1; -+ } -+ } else { -+ // No : found -+ if (*tmp != '\0') { -+ *uid = tmp; -+ } -+ } -+ } -+ -+ return 0; -+} -+ -+static int get_attach_uid_gid(const char *username, uid_t *user_id, gid_t *group_id) -+{ -+ char *tmp = NULL; -+ char *uid = NULL; -+ char *gid = NULL; -+ -+ // parse uid and gid by username -+ if (parse_user_id(username, &uid, &gid, &tmp) != 0) { -+ return -1; -+ } -+ -+ if (uid != NULL) { -+ *user_id = (unsigned int)atoll(uid); -+ } -+ if (gid != NULL) { -+ *group_id = (unsigned int)atoll(gid); -+ } -+ -+ free(tmp); -+ return 0; -+} -+#endif -+ - static int my_parser(struct lxc_arguments *args, int c, char *arg) - { - int ret; -@@ -193,6 +279,7 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg) - case 'f': - args->rcfile = arg; - break; -+#ifndef HAVE_ISULAD - case 'u': - if (lxc_safe_uint(arg, &args->uid) < 0) - return -1; -@@ -201,8 +288,40 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg) - if (lxc_safe_uint(arg, &args->gid) < 0) - return -1; - break; -+#else -+ case 'u': -+ if (get_attach_uid_gid(arg, &args->uid, &args->gid) != 0) { -+ ERROR("Failed to get attach user U/GID"); -+ return -1; -+ } -+ break; -+ case OPT_INPUT_FIFO: -+ args->terminal_fifos[0] = arg; -+ break; -+ case OPT_OUTPUT_FIFO: -+ args->terminal_fifos[1] = arg; -+ break; -+ case OPT_STDERR_FIFO: -+ args->terminal_fifos[2] = arg; -+ break; -+ case OPT_ATTACH_SUFFIX: -+ args->suffix = arg; -+ break; -+ case OPT_ATTACH_TIMEOUT: -+ if(!is_non_negative_num(arg)) { -+ ERROR("Error attach timeout parameter:%s.\n", arg); -+ return -1; -+ } -+ args->attach_timeout = (unsigned int)atoll(arg); -+ break; -+ case OPT_DISABLE_PTY: -+ args->disable_pty = 1; -+ break; -+ case OPT_OPEN_STDIN: -+ args->open_stdin = 1; -+ break; -+#endif - } -- - return 0; - } - -@@ -264,6 +383,281 @@ static int lxc_attach_create_log_file(const char *log_file) - return fd; - } - -+#ifdef HAVE_ISULAD -+// isulad: send '128 + signal' if container is killed by signal. -+#define EXIT_SIGNAL_OFFSET 128 -+ -+/*isulad: attach with terminal*/ -+static int do_attach_foreground(struct lxc_container *c, lxc_attach_command_t *command, -+ lxc_attach_options_t *attach_options, -+ char **errmsg) -+{ -+ int ret = 0; -+ pid_t pid; -+ int wexit = -1; -+ int signal; -+ -+ if (command->program) -+ ret = c->attach(c, lxc_attach_run_command, command, attach_options, &pid); -+ else -+ ret = c->attach(c, lxc_attach_run_shell, NULL, attach_options, &pid); -+ if (ret < 0) { -+ *errmsg = safe_strdup("Internal error, failed to call attach"); -+ goto out; -+ } -+ -+ ret = lxc_wait_for_pid_status(pid); -+ if (ret < 0) { -+ free(*errmsg); -+ *errmsg = safe_strdup("Internal error, failed to wait attached process"); -+ goto out; -+ } -+ -+ if (WIFEXITED(ret)) -+ wexit = WEXITSTATUS(ret); -+ else -+ wexit = -1; -+ -+ if (WIFSIGNALED(ret)) { -+ signal = WTERMSIG(ret); -+ wexit = EXIT_SIGNAL_OFFSET + signal; -+ } -+ -+ ERROR("Execd pid %d exit with %d", pid, wexit); -+ -+out: -+ if (c->lxc_conf->errmsg) { -+ free(*errmsg); -+ *errmsg = safe_strdup(c->lxc_conf->errmsg); -+ } -+ return wexit; -+} -+ -+static void close_msg_pipe(int *errpipe) -+{ -+ if (errpipe[0] >= 0) { -+ close(errpipe[0]); -+ errpipe[0] = -1; -+ } -+ if (errpipe[1] >= 0) { -+ close(errpipe[1]); -+ errpipe[1] = -1; -+ } -+} -+ -+/*isulad: attach without terminal in background */ -+static int do_attach_background(struct lxc_container *c, lxc_attach_command_t *command, -+ lxc_attach_options_t *attach_options, -+ char **errmsg) -+{ -+ int ret = 0; -+ int msgpipe[2]; -+ pid_t pid = 0; -+ ssize_t size_read; -+ char msgbuf[BUFSIZ + 1] = {0}; -+ -+ //pipdfd for get error message of child or grandchild process. -+ if (pipe2(msgpipe, O_CLOEXEC) != 0) { -+ SYSERROR("Failed to init msgpipe"); -+ return -1; -+ } -+ -+ pid = fork(); -+ if (pid < 0) { -+ close_msg_pipe(msgpipe); -+ return -1; -+ } -+ -+ if (pid != 0) { -+ close(msgpipe[1]); -+ msgpipe[1] = -1; -+ size_read = read(msgpipe[0], msgbuf, BUFSIZ); -+ if (size_read > 0) { -+ *errmsg = safe_strdup(msgbuf); -+ ret = -1; -+ } -+ -+ close(msgpipe[0]); -+ msgpipe[0] = -1; -+ -+ return ret; -+ } -+ -+ /* second fork to be reparented by init */ -+ pid = fork(); -+ if (pid < 0) { -+ SYSERROR("Error doing dual-fork"); -+ close_msg_pipe(msgpipe); -+ exit(1); -+ } -+ if (pid != 0) { -+ close_msg_pipe(msgpipe); -+ exit(0); -+ } -+ -+ close(msgpipe[0]); -+ msgpipe[0] = -1; -+ -+ if (null_stdfds() < 0) { -+ ERROR("failed to close fds"); -+ exit(1); -+ } -+ setsid(); -+ -+ if (command->program) -+ ret = c->attach(c, lxc_attach_run_command, command, attach_options, &pid); -+ else -+ ret = c->attach(c, lxc_attach_run_shell, NULL, attach_options, &pid); -+ if (ret < 0) { -+ if (c->lxc_conf->errmsg) -+ lxc_write_error_message(msgpipe[1], "%s", c->lxc_conf->errmsg); -+ else -+ lxc_write_error_message(msgpipe[1], "Failed to attach container"); -+ close(msgpipe[1]); -+ msgpipe[1] = -1; -+ ret = -1; -+ goto out; -+ } -+ -+ close(msgpipe[1]); -+ msgpipe[1] = -1; -+ -+ ret = wait_for_pid(pid); -+out: -+ lxc_container_put(c); -+ if (ret) -+ exit(EXIT_FAILURE); -+ else -+ exit(0); -+} -+ -+int main(int argc, char *argv[]) -+{ -+ int wexit = 0; -+ struct lxc_log log; -+ char *errmsg = NULL; -+ lxc_attach_options_t attach_options = LXC_ATTACH_OPTIONS_DEFAULT; -+ lxc_attach_command_t command = (lxc_attach_command_t){.program = NULL}; -+ -+ if (lxc_caps_init()) -+ exit(EXIT_FAILURE); -+ -+ if (lxc_arguments_parse(&my_args, argc, argv)) -+ exit(EXIT_FAILURE); -+ -+ log.name = my_args.name; -+ log.file = my_args.log_file; -+ log.level = my_args.log_priority; -+ log.prefix = my_args.progname; -+ log.quiet = my_args.quiet; -+ log.lxcpath = my_args.lxcpath[0]; -+ -+ if (lxc_log_init(&log)) -+ exit(EXIT_FAILURE); -+ -+ if (geteuid()) -+ if (access(my_args.lxcpath[0], O_RDONLY) < 0) { -+ ERROR("You lack access to %s", my_args.lxcpath[0]); -+ exit(EXIT_FAILURE); -+ } -+ -+ struct lxc_container *c = lxc_container_new(my_args.name, my_args.lxcpath[0]); -+ if (!c) -+ exit(EXIT_FAILURE); -+ -+ if (my_args.rcfile) { -+ c->clear_config(c); -+ if (!c->load_config(c, my_args.rcfile)) { -+ ERROR("Failed to load rcfile"); -+ lxc_container_put(c); -+ exit(EXIT_FAILURE); -+ } -+ -+ c->configfile = strdup(my_args.rcfile); -+ if (!c->configfile) { -+ ERROR("Out of memory setting new config filename"); -+ lxc_container_put(c); -+ exit(EXIT_FAILURE); -+ } -+ } -+ -+ if (!c->may_control(c)) { -+ ERROR("Insufficent privileges to control %s", c->name); -+ lxc_container_put(c); -+ exit(EXIT_FAILURE); -+ } -+ -+ if (remount_sys_proc) -+ attach_options.attach_flags |= LXC_ATTACH_REMOUNT_PROC_SYS; -+ -+ if (elevated_privileges) -+ attach_options.attach_flags &= ~(elevated_privileges); -+ -+ if (my_args.terminal_fifos[0] || my_args.terminal_fifos[1] || my_args.terminal_fifos[2]) { -+ attach_options.init_fifo[0] = my_args.terminal_fifos[0]; -+ attach_options.init_fifo[1] = my_args.terminal_fifos[1]; -+ attach_options.init_fifo[2] = my_args.terminal_fifos[2]; -+ attach_options.attach_flags |= LXC_ATTACH_TERMINAL; -+ } else if (stdfd_is_pty()) { -+ attach_options.attach_flags |= LXC_ATTACH_TERMINAL; -+ } -+ -+ attach_options.namespaces = namespace_flags; -+ attach_options.personality = new_personality; -+ attach_options.env_policy = env_policy; -+ attach_options.extra_env_vars = extra_env; -+ attach_options.extra_keep_env = extra_keep; -+ attach_options.timeout = my_args.attach_timeout; -+ -+ if (my_args.argc > 0) { -+ command.program = my_args.argv[0]; -+ command.argv = (char**)my_args.argv; -+ } -+ -+ if (my_args.console_log) { -+ attach_options.log_fd = lxc_attach_create_log_file(my_args.console_log); -+ if (attach_options.log_fd < 0) { -+ ERROR("Failed to create log file for %s", c->name); -+ lxc_container_put(c); -+ exit(EXIT_FAILURE); -+ } -+ } -+ -+ if (my_args.uid != LXC_INVALID_UID) -+ attach_options.uid = my_args.uid; -+ -+ if (my_args.gid != LXC_INVALID_GID) -+ attach_options.gid = my_args.gid; -+ -+ attach_options.suffix = my_args.suffix; -+ -+ if (my_args.disable_pty) { -+ attach_options.disable_pty = true; -+ } -+ -+ if (my_args.open_stdin) { -+ attach_options.open_stdin = true; -+ } -+ -+ /* isulad: add do attach background */ -+ if (attach_options.attach_flags & LXC_ATTACH_TERMINAL) -+ wexit = do_attach_foreground(c, &command, &attach_options, &errmsg); -+ else -+ wexit = do_attach_background(c, &command, &attach_options, &errmsg); -+ -+ if (errmsg) { -+ fprintf(stderr, "%s:%s:%s:%d starting container process caused \"%s\"", c->name, -+ __FILE__, __func__, __LINE__, errmsg); -+ free(errmsg); -+ } -+ -+ lxc_container_put(c); -+ if (wexit >= 0) -+ exit(wexit); -+ -+ exit(EXIT_FAILURE); -+} -+#else - int main(int argc, char *argv[]) - { - int ret = -1; -@@ -377,3 +771,4 @@ out: - - exit(EXIT_FAILURE); - } -+#endif -diff --git a/src/lxc/tools/lxc_ls.c b/src/lxc/tools/lxc_ls.c -index 0abcd7a63..e601f9d70 100644 ---- a/src/lxc/tools/lxc_ls.c -+++ b/src/lxc/tools/lxc_ls.c -@@ -106,7 +106,11 @@ struct wrapargs { - /* - * Takes struct wrapargs as argument. - */ -+#ifdef HAVE_ISULAD -+static int ls_get_wrapper(void *wrap, int msgfd); -+#else - static int ls_get_wrapper(void *wrap); -+#endif - - /* - * To calculate swap usage we should not simply check memory.usage_in_bytes and -@@ -1005,7 +1009,11 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg) - return 0; - } - -+#ifdef HAVE_ISULAD -+static int ls_get_wrapper(void *wrap, int msgfd) -+#else - static int ls_get_wrapper(void *wrap) -+#endif - { - int ret = -1; - size_t len = 0; -@@ -1166,9 +1174,6 @@ static int ls_recv_str(int fd, char **buf) - if (ret != sizeof(slen)) - return -1; - -- if (slen == SIZE_MAX) -- return -1; -- - if (slen > 0) { - *buf = malloc(sizeof(char) * (slen + 1)); - if (!*buf) -@@ -1180,11 +1185,6 @@ static int ls_recv_str(int fd, char **buf) - return -1; - } - -- if (slen == SIZE_MAX) { -- free(*buf); -- return -1; -- } -- - (*buf)[slen] = '\0'; - } - -diff --git a/src/lxc/tools/lxc_start.c b/src/lxc/tools/lxc_start.c -index 459b86793..4f2c8afa7 100644 ---- a/src/lxc/tools/lxc_start.c -+++ b/src/lxc/tools/lxc_start.c -@@ -28,6 +28,11 @@ - #include "confile.h" - #include "log.h" - -+#ifdef HAVE_ISULAD -+#include -+#include "isulad_utils.h" -+#endif -+ - lxc_log_define(lxc_start, lxc); - - static int my_parser(struct lxc_arguments *args, int c, char *arg); -@@ -48,6 +53,17 @@ static const struct option my_longopts[] = { - {"share-ipc", required_argument, 0, OPT_SHARE_IPC}, - {"share-uts", required_argument, 0, OPT_SHARE_UTS}, - {"share-pid", required_argument, 0, OPT_SHARE_PID}, -+#ifdef HAVE_ISULAD -+ {"in-fifo", required_argument, 0, OPT_INPUT_FIFO}, -+ {"out-fifo", required_argument, 0, OPT_OUTPUT_FIFO}, -+ {"err-fifo", required_argument, 0, OPT_STDERR_FIFO}, -+ {"container-pidfile", required_argument, 0, OPT_CONTAINER_INFO}, -+ {"exit-fifo", required_argument, 0, OPT_EXIT_FIFO}, -+ {"start-timeout", required_argument, 0, OPT_START_TIMEOUT}, -+ {"disable-pty", no_argument, 0, OPT_DISABLE_PTY}, -+ {"open-stdin", no_argument, 0, OPT_OPEN_STDIN}, -+ {"start-timeout", required_argument, 0, OPT_START_TIMEOUT}, -+#endif - LXC_COMMON_OPTIONS - }; - -@@ -118,6 +134,38 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg) - case OPT_SHARE_PID: - args->share_ns[LXC_NS_PID] = arg; - break; -+ -+#ifdef HAVE_ISULAD -+ case OPT_CONTAINER_INFO: -+ args->container_info = arg; -+ break; -+ case OPT_INPUT_FIFO: -+ args->terminal_fifos[0] = arg; -+ break; -+ case OPT_OUTPUT_FIFO: -+ args->terminal_fifos[1] = arg; -+ break; -+ case OPT_STDERR_FIFO: -+ args->terminal_fifos[2] = arg; -+ break; -+ case OPT_EXIT_FIFO: -+ args->exit_monitor_fifo = arg; -+ break; -+ case OPT_DISABLE_PTY: -+ args->disable_pty = 1; -+ break; -+ case OPT_OPEN_STDIN: -+ args->open_stdin = 1; -+ break; -+ case OPT_START_TIMEOUT: -+ if(!is_non_negative_num(arg)) { -+ fprintf(stderr, "Error start timeout parameter:%s.\n", arg); -+ return -1; -+ } -+ args->start_timeout = (unsigned int)atoi(arg); -+ break; -+#endif -+ - } - return 0; - } -@@ -163,6 +211,9 @@ int main(int argc, char *argv[]) - "/sbin/init", - NULL, - }; -+#ifdef HAVE_ISULAD -+ char *container_info_file = NULL; -+#endif - - lxc_list_init(&defines); - -@@ -283,6 +334,42 @@ int main(int argc, char *argv[]) - goto out; - } - -+#ifdef HAVE_ISULAD -+ /* isulad: container info file used to store pid and ppid info of container*/ -+ if (my_args.container_info != NULL) { -+ if (ensure_path(&container_info_file, my_args.container_info) < 0) { -+ ERROR("Failed to ensure container's piddile '%s'", my_args.container_info); -+ goto out; -+ } -+ if (!c->set_container_info_file(c, container_info_file)) { -+ ERROR("Failed to set container's piddile '%s'", container_info_file); -+ goto out; -+ } -+ } -+ -+ if (my_args.terminal_fifos[0] || my_args.terminal_fifos[1] || my_args.terminal_fifos[2]) { -+ c->set_terminal_init_fifos(c, my_args.terminal_fifos[0], my_args.terminal_fifos[1], my_args.terminal_fifos[2]); -+ } -+ -+ /* isulad: fifo used to monitor state of monitor process */ -+ if (my_args.exit_monitor_fifo != NULL) { -+ c->exit_fifo = safe_strdup(my_args.exit_monitor_fifo); -+ } -+ -+ if (my_args.disable_pty) { -+ c->want_disable_pty(c, true); -+ } -+ -+ if (my_args.open_stdin) { -+ c->want_open_stdin(c, true); -+ } -+ -+ /* isulad: add start timeout */ -+ if(my_args.start_timeout) { -+ c->set_start_timeout(c, my_args.start_timeout); -+ } -+#endif -+ - if (my_args.console) - if (!c->set_config_item(c, "lxc.console.path", my_args.console)) - goto out; -@@ -305,6 +392,11 @@ int main(int argc, char *argv[]) - else - err = c->start(c, 0, args) ? EXIT_SUCCESS : EXIT_FAILURE; - if (err) { -+#ifdef HAVE_ISULAD -+ if (c->lxc_conf->errmsg) -+ fprintf(stderr, "%s:%s:%s:%d starting container process caused \"%s\"", c->name, -+ __FILE__, __func__, __LINE__, c->lxc_conf->errmsg); -+#endif - ERROR("The container failed to start"); - - if (my_args.daemonize) -@@ -320,5 +412,8 @@ int main(int argc, char *argv[]) - - out: - lxc_container_put(c); -+#ifdef HAVE_ISULAD -+ free(container_info_file); -+#endif - exit(err); - } -diff --git a/src/lxc/utils.c b/src/lxc/utils.c -index 88d0f85ee..4e418fbb9 100644 ---- a/src/lxc/utils.c -+++ b/src/lxc/utils.c -@@ -27,6 +27,8 @@ - #include - #include - #include -+#include -+#include - - #include "config.h" - #include "log.h" -@@ -35,7 +37,7 @@ - #include "memory_utils.h" - #include "namespace.h" - #include "parse.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "syscall_wrappers.h" - #include "utils.h" - -@@ -71,6 +73,9 @@ static int _recursive_rmdir(const char *dirname, dev_t pdev, - int ret; - struct dirent *direntp; - char pathname[PATH_MAX]; -+#ifdef HAVE_ISULAD -+ int saved_errno = 0; -+#endif - - dir = opendir(dirname); - if (!dir) -@@ -133,6 +138,11 @@ static int _recursive_rmdir(const char *dirname, dev_t pdev, - } else { - ret = unlink(pathname); - if (ret < 0) { -+#ifdef HAVE_ISULAD -+ if (saved_errno == 0) { -+ saved_errno = errno; -+ } -+#endif - __do_close int fd = -EBADF; - - fd = open(pathname, O_RDONLY | O_CLOEXEC | O_NONBLOCK); -@@ -158,10 +168,18 @@ static int _recursive_rmdir(const char *dirname, dev_t pdev, - } - - if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) { -+#ifdef HAVE_ISULAD -+ if (saved_errno == 0) { -+ saved_errno = errno; -+ } -+#endif - SYSERROR("Failed to delete \"%s\"", dirname); - failed = 1; - } - -+#ifdef HAVE_ISULAD -+ errno = saved_errno; -+#endif - return failed ? -1 : 0; - } - -@@ -1008,7 +1026,7 @@ static int open_if_safe(int dirfd, const char *nextpath) - * - * Return an open fd for the path, or <0 on error. - */ --static int open_without_symlink(const char *target, const char *prefix_skip) -+int open_without_symlink(const char *target, const char *prefix_skip) - { - int curlen = 0, dirfd, fulllen, i; - char *dup; -@@ -1425,6 +1443,11 @@ static int lxc_get_unused_loop_dev(char *name_loop) - { - int loop_nr, ret; - int fd_ctl = -1, fd_tmp = -1; -+#if HAVE_ISULAD -+ // isulad: retry and try mknod -+ int max_retry = 200; -+ bool try_mknod = true; -+#endif - - fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC); - if (fd_ctl < 0) { -@@ -1442,8 +1465,37 @@ static int lxc_get_unused_loop_dev(char *name_loop) - if (ret < 0 || ret >= LO_NAME_SIZE) - goto on_error; - -+#if HAVE_ISULAD -+retry: -+#endif - fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC); - if (fd_tmp < 0) { -+#if HAVE_ISULAD -+ /* Success of LOOP_CTL_GET_FREE doesn't mean /dev/loop$i is ready, -+ * we try to make node by ourself to avoid wait. */ -+ if (try_mknod) { -+ /* Do not check result of mknod because LOOP_CTL_GET_FREE -+ * alse do mknod, so this mknod may fail as node already -+ * exist. If we can open the node without error, we can -+ * say that it's be created successfully. -+ * -+ * note: 7 is the major device number of loopback devices -+ * in kernel. -+ */ -+ mknod(name_loop, S_IFBLK | 0640, makedev(7, loop_nr)); -+ try_mknod = false; -+ goto retry; -+ } -+ /* we need to wait some time to make sure it's ready for open if -+ * it can't open even if we have already try to make node by ourself. */ -+ if (max_retry > 0) { -+ max_retry--; -+ usleep(5000); /* 5 millisecond */ -+ goto retry; -+ } -+ SYSERROR("Failed to open loop \"%s\"", name_loop); -+ goto on_error; -+#else - /* on Android loop devices are moved under /dev/block, give it a shot */ - ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/block/loop%d", loop_nr); - if (ret < 0 || ret >= LO_NAME_SIZE) -@@ -1452,6 +1504,7 @@ static int lxc_get_unused_loop_dev(char *name_loop) - fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC); - if (fd_tmp < 0) - SYSERROR("Failed to open loop \"%s\"", name_loop); -+#endif - } - - on_error: -@@ -1661,6 +1714,7 @@ uint64_t lxc_find_next_power2(uint64_t n) - return n; - } - -+#ifndef HAVE_ISULAD - static int process_dead(/* takes */ int status_fd) - { - __do_close int dupfd = -EBADF; -@@ -1698,15 +1752,19 @@ static int process_dead(/* takes */ int status_fd) - - return ret; - } -+#endif - - int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd) - { - int ret; -+#ifndef HAVE_ISULAD - pid_t ppid; -+#endif - - ret = prctl(PR_SET_PDEATHSIG, prctl_arg(signal), prctl_arg(0), - prctl_arg(0), prctl_arg(0)); - -+#ifndef HAVE_ISULAD - /* verify that we haven't been orphaned in the meantime */ - ppid = (pid_t)syscall(SYS_getppid); - if (ppid == 0) { /* parent outside our pidns */ -@@ -1718,6 +1776,7 @@ int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd) - } else if (ppid != parent) { - return raise(SIGKILL); - } -+#endif - - if (ret < 0) - return -1; -@@ -1755,8 +1814,13 @@ int lxc_rm_rf(const char *dirname) - struct dirent *direntp; - - dir = opendir(dirname); -- if (!dir) -+ if (!dir) { -+ if (errno == ENOENT) { -+ WARN("Destroy path: \"%s\" do not exist", dirname); -+ return 0; -+ } - return log_error_errno(-1, errno, "Failed to open dir \"%s\"", dirname); -+ } - - while ((direntp = readdir(dir))) { - __do_free char *pathname = NULL; -@@ -1904,3 +1968,230 @@ int fix_stdio_permissions(uid_t uid) - - return fret; - } -+ -+#ifdef HAVE_ISULAD -+/* isulad: write error message */ -+void lxc_write_error_message(int errfd, const char *format, ...) -+{ -+ int ret; -+ char errbuf[BUFSIZ + 1] = {0}; -+ ssize_t sret; -+ va_list argp; -+ -+ if (errfd <= 0) -+ return; -+ -+ va_start(argp, format); -+ ret = vsnprintf(errbuf, BUFSIZ, format, argp); -+ va_end(argp); -+ if (ret < 0 || ret >= BUFSIZ) -+ SYSERROR("Failed to call vsnprintf"); -+ sret = write(errfd, errbuf, strlen(errbuf)); -+ if (sret < 0) -+ SYSERROR("Write errbuf failed"); -+} -+ -+/* isulad: read file to buffer */ -+int lxc_file2str(const char *filename, char ret[], int cap) -+{ -+ int fd, num_read; -+ -+ if ((fd = lxc_open(filename, O_RDONLY | O_CLOEXEC, 0)) == -1) -+ return -1; -+ if ((num_read = read(fd, ret, cap - 1)) <= 0) -+ num_read = -1; -+ else -+ ret[num_read] = 0; -+ close(fd); -+ -+ return num_read; -+} -+ -+/* isuald: lxc_stat2proc() makes sure it can handle arbitrary executable file basenames -+ * for `cmd', i.e. those with embedded whitespace or embedded ')'s. -+ * Such names confuse %s (see scanf(3)), so the string is split and %39c -+ * is used instead. (except for embedded ')' "(%[^)]c)" would work. -+ */ -+static proc_t *lxc_stat2proc(const char *S) -+{ -+ int num; -+ proc_t *P = NULL; -+ char *tmp = NULL; -+ -+ if (!S) -+ return NULL; -+ -+ tmp = strrchr(S, ')'); /* split into "PID (cmd" and "" */ -+ if (!tmp) -+ return NULL; -+ *tmp = '\0'; /* replace trailing ')' with NUL */ -+ -+ P = malloc(sizeof(proc_t)); -+ if (P == NULL) -+ return NULL; -+ (void)memset(P, 0x00, sizeof(proc_t)); -+ -+ /* parse these two strings separately, skipping the leading "(". */ -+ num = sscanf(S, "%d (%15c", &P->pid, P->cmd); /* comm[16] in kernel */ -+ if (num != 2) { -+ ERROR("Call sscanf error: %s", errno ? strerror(errno) : ""); -+ free(P); -+ return NULL; -+ } -+ num = sscanf(tmp + 2, /* skip space after ')' too */ -+ "%c " -+ "%d %d %d %d %d " -+ "%lu %lu %lu %lu %lu " -+ "%Lu %Lu %Lu %Lu " /* utime stime cutime cstime */ -+ "%ld %ld %ld %ld " -+ "%Lu " /* start_time */ -+ "%lu " -+ "%ld " -+ "%lu %lu %lu %lu %lu %lu " -+ "%*s %*s %*s %*s " /* discard, no RT signals & Linux 2.1 used hex */ -+ "%lu %lu %lu " -+ "%d %d " -+ "%lu %lu", -+ &P->state, -+ &P->ppid, &P->pgrp, &P->session, &P->tty, &P->tpgid, -+ &P->flags, &P->min_flt, &P->cmin_flt, &P->maj_flt, &P->cmaj_flt, -+ &P->utime, &P->stime, &P->cutime, &P->cstime, -+ &P->priority, &P->nice, &P->timeout, &P->it_real_value, -+ &P->start_time, -+ &P->vsize, -+ &P->rss, -+ &P->rss_rlim, &P->start_code, &P->end_code, &P->start_stack, &P->kstk_esp, -+ &P->kstk_eip, -+ &P->wchan, &P->nswap, &P->cnswap, -+ &P->exit_signal, &P->processor, /* 2.2.1 ends with "exit_signal" */ -+ &P->rtprio, &P->sched /* both added to 2.5.18 */ -+ ); -+ if (num != 35) { -+ ERROR("Call sscanf error: %s", errno ? strerror(errno) : ""); -+ free(P); -+ return NULL; -+ } -+ if (P->tty == 0) -+ P->tty = -1; /* the old notty val, update elsewhere bef. moving to 0 */ -+ return P; -+} -+ -+/* isulad: get starttime of process pid */ -+unsigned long long lxc_get_process_startat(pid_t pid) -+{ -+ int sret = 0; -+ unsigned long long startat = 0; -+ proc_t *pid_info = NULL; -+ char filename[PATH_MAX] = {0}; -+ char sbuf[1024] = {0}; /* bufs for stat */ -+ -+ sret = snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); -+ if (sret < 0 || sret >= sizeof(filename)) { -+ ERROR("Failed to sprintf filename"); -+ goto out; -+ } -+ -+ if ((lxc_file2str(filename, sbuf, sizeof(sbuf))) == -1) { -+ SYSERROR("Failed to read pidfile %s", filename); -+ goto out; -+ } -+ -+ pid_info = lxc_stat2proc(sbuf); -+ if (!pid_info) { -+ ERROR("Failed to get proc stat info"); -+ goto out; -+ } -+ -+ startat = pid_info->start_time; -+out: -+ free(pid_info); -+ return startat; -+} -+ -+// isulad: set env home in container -+int lxc_setup_env_home(uid_t uid) -+{ -+#define __PASSWD_FILE__ "/etc/passwd" -+ char *homedir = "/"; // default home dir is / -+ FILE *stream = NULL; -+ struct passwd pw, *pwbufp = NULL; -+ char buf[BUFSIZ]; -+ -+ stream = fopen_cloexec(__PASSWD_FILE__, "r"); -+ if (stream == NULL) { -+ SYSWARN("Failed to open %s", __PASSWD_FILE__); -+ goto set_env; -+ } -+ -+ while (fgetpwent_r(stream, &pw, buf, sizeof(buf), &pwbufp) == 0 && pwbufp != NULL) { -+ if (pwbufp->pw_uid == uid) { -+ homedir = pwbufp->pw_dir; -+ goto set_env; -+ } -+ } -+ WARN("User invalid, can not find user '%u'", uid); -+ -+set_env: -+ if (stream) -+ fclose(stream); -+ -+ // if we didn't configure HOME, set it based on uid -+ if (setenv("HOME", homedir, 0) < 0) { -+ SYSERROR("Unable to set env 'HOME'"); -+ return -1; -+ } -+ -+ NOTICE("Setted env 'HOME' to %s", homedir); -+ return 0; -+} -+ -+bool lxc_process_alive(pid_t pid, unsigned long long start_time) -+{ -+ int sret = 0; -+ bool alive = true; -+ proc_t *pid_info = NULL; -+ char filename[PATH_MAX] = {0}; -+ char sbuf[1024] = {0}; /* bufs for stat */ -+ -+ sret = kill(pid, 0); -+ if (sret < 0 && errno == ESRCH) -+ return false; -+ -+ sret = snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); -+ if (sret < 0 || sret >= sizeof(filename)) { -+ ERROR("Failed to sprintf filename"); -+ goto out; -+ } -+ -+ if ((lxc_file2str(filename, sbuf, sizeof(sbuf))) == -1) { -+ ERROR("Failed to read pidfile %s", filename); -+ alive = false; -+ goto out; -+ } -+ -+ pid_info = lxc_stat2proc(sbuf); -+ if (!pid_info) { -+ ERROR("Failed to get proc stat info"); -+ alive = false; -+ goto out; -+ } -+ -+ if (start_time != pid_info->start_time) -+ alive = false; -+out: -+ free(pid_info); -+ return alive; -+} -+ -+bool is_non_negative_num(const char *s) -+{ -+ if (!s || !strcmp(s, "")) -+ return false; -+ while(*s != '\0') { -+ if(!isdigit(*s)) -+ return false; -+ ++s; -+ } -+ return true; -+} -+#endif -diff --git a/src/lxc/utils.h b/src/lxc/utils.h -index cf2c04251..39ef5792f 100644 ---- a/src/lxc/utils.h -+++ b/src/lxc/utils.h -@@ -25,9 +25,16 @@ - #include "initutils.h" - #include "macro.h" - #include "memory_utils.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "string_utils.h" - -+#ifdef HAVE_ISULAD -+#include "isulad_utils.h" -+ -+/* isulad: replace space with SPACE_MAGIC_STR */ -+#define SPACE_MAGIC_STR "[#)" -+#endif -+ - /* returns 1 on success, 0 if there were any failures */ - extern int lxc_rmdir_onedev(const char *path, const char *exclude); - extern int get_u16(unsigned short *val, const char *arg, int base); -@@ -41,6 +48,73 @@ extern char *get_rundir(void); - #endif - #endif - -+#ifdef HAVE_ISULAD -+/* isulad: -+ ld cutime, cstime, priority, nice, timeout, it_real_value, rss, -+ c state, -+ d ppid, pgrp, session, tty, tpgid, -+ s signal, blocked, sigignore, sigcatch, -+ lu flags, min_flt, cmin_flt, maj_flt, cmaj_flt, utime, stime, -+ lu rss_rlim, start_code, end_code, start_stack, kstk_esp, kstk_eip, -+ lu start_time, vsize, wchan, nswap, cnswap, -+*/ -+ -+/* Basic data structure which holds all information we can get about a process. -+ * (unless otherwise specified, fields are read from /proc/#/stat) -+ * -+ * Most of it comes from task_struct in linux/sched.h -+ */ -+typedef struct proc_t { -+ // 1st 16 bytes -+ int pid; /* process id */ -+ int ppid; /* pid of parent process */ -+ -+ char state; /* single-char code for process state (S=sleeping) */ -+ -+ unsigned long long -+ utime, /* user-mode CPU time accumulated by process */ -+ stime, /* kernel-mode CPU time accumulated by process */ -+ // and so on... -+ cutime, /* cumulative utime of process and reaped children */ -+ cstime, /* cumulative stime of process and reaped children */ -+ start_time; /* start time of process -- seconds since 1-1-70 */ -+ -+ long -+ priority, /* kernel scheduling priority */ -+ timeout, /* ? */ -+ nice, /* standard unix nice level of process */ -+ rss, /* resident set size from /proc/#/stat (pages) */ -+ it_real_value; /* ? */ -+ unsigned long -+ rtprio, /* real-time priority */ -+ sched, /* scheduling class */ -+ vsize, /* number of pages of virtual memory ... */ -+ rss_rlim, /* resident set size limit? */ -+ flags, /* kernel flags for the process */ -+ min_flt, /* number of minor page faults since process start */ -+ maj_flt, /* number of major page faults since process start */ -+ cmin_flt, /* cumulative min_flt of process and child processes */ -+ cmaj_flt, /* cumulative maj_flt of process and child processes */ -+ nswap, /* ? */ -+ cnswap, /* cumulative nswap ? */ -+ start_code, /* address of beginning of code segment */ -+ end_code, /* address of end of code segment */ -+ start_stack, /* address of the bottom of stack for the process */ -+ kstk_esp, /* kernel stack pointer */ -+ kstk_eip, /* kernel instruction pointer */ -+ wchan; /* address of kernel wait channel proc is sleeping in */ -+ -+ char cmd[16]; /* basename of executable file in call to exec(2) */ -+ int -+ pgrp, /* process group id */ -+ session, /* session id */ -+ tty, /* full device number of controlling terminal */ -+ tpgid, /* terminal process group id */ -+ exit_signal, /* might not be SIGCHLD */ -+ processor; /* current (or most recent?) CPU */ -+} proc_t; -+#endif -+ - static inline int lxc_set_cloexec(int fd) - { - return fcntl(fd, F_SETFD, FD_CLOEXEC); -@@ -145,6 +219,7 @@ extern bool cgns_supported(void); - extern char *choose_init(const char *rootfs); - extern bool switch_to_ns(pid_t pid, const char *ns); - extern char *get_template_path(const char *t); -+extern int open_without_symlink(const char *target, const char *prefix_skip); - extern int safe_mount(const char *src, const char *dest, const char *fstype, - unsigned long flags, const void *data, - const char *rootfs); -@@ -241,14 +316,16 @@ extern bool lxc_can_use_pidfd(int pidfd); - - extern int fix_stdio_permissions(uid_t uid); - --static inline bool uid_valid(uid_t uid) --{ -- return uid != LXC_INVALID_UID; --} -+#ifdef HAVE_ISULAD -+extern void lxc_write_error_message(int errfd, const char *format, ...); -+extern int lxc_file2str(const char *filename, char ret[], int cap); -+extern int unsigned long long lxc_get_process_startat(pid_t pid); -+// set env home in container -+extern int lxc_setup_env_home(uid_t uid); - --static inline bool gid_valid(gid_t gid) --{ -- return gid != LXC_INVALID_GID; --} -+extern bool lxc_process_alive(pid_t pid, unsigned long long start_time); -+ -+extern bool is_non_negative_num(const char *s); -+#endif - - #endif /* __LXC_UTILS_H */ -diff --git a/src/lxc/uuid.c b/src/lxc/uuid.c -index 256225b8f..a5d24bbcb 100644 ---- a/src/lxc/uuid.c -+++ b/src/lxc/uuid.c -@@ -116,7 +116,7 @@ int lxc_id128_write_fd(int fd, lxc_id128_t id) - - int lxc_id128_write(const char *p, lxc_id128_t id) - { -- __do_close int fd = -EBADF; -+ int fd = -1; - - fd = open(p, O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY|O_TRUNC, 0444); - if (fd < 0) -diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am -index 11bba260a..59905d326 100644 ---- a/src/tests/Makefile.am -+++ b/src/tests/Makefile.am -@@ -30,7 +30,7 @@ lxc_test_parse_config_file_SOURCES = parse_config_file.c \ - lxc_test_raw_clone_SOURCES = lxc_raw_clone.c \ - lxctest.h \ - ../lxc/namespace.c ../lxc/namespace.h \ -- ../lxc/process_utils.c ../lxc/process_utils.h -+ ../lxc/raw_syscalls.c ../lxc/raw_syscalls.h - ../lxc/utils.c ../lxc/utils.h - lxc_test_reboot_SOURCES = reboot.c - lxc_test_saveconfig_SOURCES = saveconfig.c -@@ -58,6 +58,10 @@ AM_CFLAGS=-DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \ - -I $(top_srcdir)/src/lxc/tools \ - -pthread - -+if HAVE_ISULAD -+AM_CFLAGS += -I $(top_srcdir)/src/lxc/json -+endif -+ - if ENABLE_APPARMOR - AM_CFLAGS += -DHAVE_APPARMOR - endif -@@ -114,8 +118,7 @@ bin_SCRIPTS += lxc-test-automount \ - lxc-test-createconfig \ - lxc-test-exit-code \ - lxc-test-no-new-privs \ -- lxc-test-rootfs \ -- lxc-test-usernsexec -+ lxc-test-rootfs - - if DISTRO_UBUNTU - bin_SCRIPTS += lxc-test-lxc-attach \ -@@ -164,7 +167,6 @@ EXTRA_DIST = basic.c \ - lxc-test-snapdeps \ - lxc-test-symlink \ - lxc-test-unpriv \ -- lxc-test-usernsexec \ - lxc-test-utils.c \ - may_control.c \ - mount_injection.c \ -diff --git a/src/tests/attach.c b/src/tests/attach.c -index 07e641d56..acb4c89f4 100644 ---- a/src/tests/attach.c -+++ b/src/tests/attach.c -@@ -29,6 +29,7 @@ - #include "lxctest.h" - #include "utils.h" - #include "lsm/lsm.h" -+#include "config.h" - - #include - -@@ -76,7 +77,11 @@ static void test_attach_lsm_set_config(struct lxc_container *ct) - ct->save_config(ct, NULL); - } - -+#ifdef HAVE_ISULAD -+static int test_attach_lsm_func_func(void* payload, int fd) -+#else - static int test_attach_lsm_func_func(void* payload) -+#endif - { - TSTOUT("%s", lsm_process_label_get(syscall(SYS_getpid))); - return 0; -@@ -187,7 +192,11 @@ static int test_attach_lsm_func(struct lxc_container *ct) { return 0; } - static int test_attach_lsm_cmd(struct lxc_container *ct) { return 0; } - #endif /* HAVE_APPARMOR || HAVE_SELINUX */ - -+#ifdef HAVE_ISULAD -+static int test_attach_func_func(void* payload, int fd) -+#else - static int test_attach_func_func(void* payload) -+#endif - { - TSTOUT("%d", (int)syscall(SYS_getpid)); - return 0; -diff --git a/src/tests/console.c b/src/tests/console.c -index c88f4329b..c0ad16033 100644 ---- a/src/tests/console.c -+++ b/src/tests/console.c -@@ -37,14 +37,14 @@ - } while (0) - - static void test_console_close_all(int ttyfd[MAXCONSOLES], -- int ptmxfd[MAXCONSOLES]) -+ int masterfd[MAXCONSOLES]) - { - int i; - - for (i = 0; i < MAXCONSOLES; i++) { -- if (ptmxfd[i] != -1) { -- close(ptmxfd[i]); -- ptmxfd[i] = -1; -+ if (masterfd[i] != -1) { -+ close(masterfd[i]); -+ masterfd[i] = -1; - } - - if (ttyfd[i] != -1) { -@@ -59,14 +59,14 @@ static int test_console_running_container(struct lxc_container *c) - int nrconsoles, i, ret = -1; - int ttynum [MAXCONSOLES]; - int ttyfd [MAXCONSOLES]; -- int ptmxfd[MAXCONSOLES]; -+ int masterfd[MAXCONSOLES]; - - for (i = 0; i < MAXCONSOLES; i++) -- ttynum[i] = ttyfd[i] = ptmxfd[i] = -1; -+ ttynum[i] = ttyfd[i] = masterfd[i] = -1; - - ttynum[0] = 1; - -- ret = c->console_getfd(c, &ttynum[0], &ptmxfd[0]); -+ ret = c->console_getfd(c, &ttynum[0], &masterfd[0]); - if (ret < 0) { - TSTERR("console allocate failed"); - goto err1; -@@ -79,12 +79,12 @@ static int test_console_running_container(struct lxc_container *c) - } - - /* attempt to alloc same ttynum */ -- ret = c->console_getfd(c, &ttynum[0], &ptmxfd[1]); -+ ret = c->console_getfd(c, &ttynum[0], &masterfd[1]); - if (ret != -1) { - TSTERR("console allocate should fail for allocated ttynum %d", ttynum[0]); - goto err2; - } -- close(ptmxfd[0]); ptmxfd[0] = -1; -+ close(masterfd[0]); masterfd[0] = -1; - close(ttyfd[0]); ttyfd[0] = -1; - - /* ensure we can allocate all consoles, we do this a few times to -@@ -92,7 +92,7 @@ static int test_console_running_container(struct lxc_container *c) - */ - for (i = 0; i < 10; i++) { - for (nrconsoles = 0; nrconsoles < MAXCONSOLES; nrconsoles++) { -- ret = c->console_getfd(c, &ttynum[nrconsoles], &ptmxfd[nrconsoles]); -+ ret = c->console_getfd(c, &ttynum[nrconsoles], &masterfd[nrconsoles]); - if (ret < 0) - break; - ttyfd[nrconsoles] = ret; -@@ -103,13 +103,13 @@ static int test_console_running_container(struct lxc_container *c) - goto err2; - } - -- test_console_close_all(ttyfd, ptmxfd); -+ test_console_close_all(ttyfd, masterfd); - } - - ret = 0; - - err2: -- test_console_close_all(ttyfd, ptmxfd); -+ test_console_close_all(ttyfd, masterfd); - - err1: - return ret; -diff --git a/src/tests/containertests.c b/src/tests/containertests.c -index 0fb6fbdfb..b28bcd56d 100644 ---- a/src/tests/containertests.c -+++ b/src/tests/containertests.c -@@ -135,7 +135,7 @@ int main(int argc, char *argv[]) - - str = c->config_file_name(c); - #define CONFIGFNAM LXCPATH "/" MYNAME "/config" -- if (str && strcmp(str, CONFIGFNAM)) { -+ if (!str || strcmp(str, CONFIGFNAM)) { - fprintf(stderr, "%d: got wrong config file name (%s, not %s)\n", __LINE__, str, CONFIGFNAM); - goto out; - } -diff --git a/src/tests/lxc-test-no-new-privs b/src/tests/lxc-test-no-new-privs -index cfcb43bd6..8642992dd 100755 ---- a/src/tests/lxc-test-no-new-privs -+++ b/src/tests/lxc-test-no-new-privs -@@ -36,13 +36,11 @@ cleanup() { - - trap cleanup EXIT SIGHUP SIGINT SIGTERM - --if [ ! -d /etc/lxc ]; then -- mkdir -p /etc/lxc/ -- cat > /etc/lxc/default.conf << EOF -+mkdir -p /etc/lxc/ -+cat > /etc/lxc/default.conf << EOF - lxc.net.0.type = veth - lxc.net.0.link = lxcbr0 - EOF --fi - - ARCH=i386 - if type dpkg >/dev/null 2>&1; then -diff --git a/src/tests/lxc-test-usernsexec b/src/tests/lxc-test-usernsexec -deleted file mode 100755 -index 0ee48b353..000000000 ---- a/src/tests/lxc-test-usernsexec -+++ /dev/null -@@ -1,368 +0,0 @@ --#!/bin/bash --# --# This is a bash test case to test lxc-usernsexec. --# It basically supports usring lxc-usernsexec to execute itself --# and then create files and check that their ownership is as expected. --# --# It requires that the current user has at least 1 value in subuid and /etc/subgid --TEMP_D="" --VERBOSITY=0 --set -f -- --fail() { echo "$@" 1>&2; exit 1; } --error() { echo "$@" 1>&2; } --skip() { -- error "SKIP:" "$@" -- exit 0 --} --debug() { -- local level=${1}; shift; -- [ "${level}" -gt "${VERBOSITY}" ] && return -- error "${@}" --} -- --collect_owners() { -- # collect_owners([--dir=dir], file1, file2 ...) -- # set _RET to a space delimited array of -- # :owner:group :owner:group ... -- local out="" ret="" dir="" -- if [ "${1#--dir=}" != "$1" ]; then -- dir="${1#--dir=}" -- shift -- fi -- for arg in "$@"; do -- # drop the :* so that input can be same as touch_files. -- out=$(stat --format "%n:%u:%g" "${dir}${arg}") || { -- error "failed to stat ${arg}" -- return 1; -- } -- ret="$ret ${out##*/}" -- done -- _RET="${ret# }" --} -- --cleanup() { -- if [ -d "$TEMP_D" ]; then -- rm -Rf "$TEMP_D" -- fi --} -- --touch_files() { -- # touch_files tok [tok ...] -- # tok is filename:chown_id:chown_gid -- # if chown_id or chown_gid is empty, then chown will do the right thing -- # and only change the provided value. -- local args="" tok="" fname="" uidgid="" -- args=( "$@" ) -- for tok in "$@"; do -- fname=${tok%%:*} -- uidgid=${tok#$fname} -- uidgid=${uidgid#:} -- : > "$fname" || { error "failed to create $fname"; return 1; } -- [ -z "$uidgid" ] && continue -- chown $uidgid "$fname" || { error "failed to chmod '$uidgid' $fname ($?)"; return 1; } -- done --} -- --inside_cleanup() { -- local f="" -- rm -f "${FILES[@]}" -- echo "$STATUS" >&5 -- echo "$STATUS" >&6 --} -- --set_files() { -- local x="" -- FILES=( ) -- for x in "$@"; do -- FILES[${#FILES[@]}]="${x%%:*}" -- done --} -- --inside() { -- # this what gets run inside the usernsexec environment. -- # basically expects arguments of :uid:gid -- # it will create the file, and then chmod it to the provided uid:gid -- # it writes to file descriptor 5 a single line with space delimited -- # exit_value uid gid [:: ... ] -- STATUS=127 -- trap inside_cleanup EXIT -- local uid="" gid="" x="" -- -- uid=$(id -u) || fail "failed execution of id -u" -- gid=$(id -g) || fail "failed execution of id -g" -- -- set_files "$@" -- -- touch_files "$@" || fail "failed to create files" -- -- collect_owners "${FILES[@]}" || fail "failed to collect owners" -- result="$_RET" -- -- # tell caller we are done. -- echo "0" "$uid" "$gid" "$result" >&5 -- STATUS=0 -- -- # let the caller do things while the files are around. -- read -t 30 x <&6 -- -- exit --} -- --runtest() { -- # runtest(mydir, nsexec_args, [inside [...]]) -- # - use 'mydir' as a working dir. -- # - execute lxc-usernsexec $nsexec_args -- inside -- # -- # write to stdout -- # exit_value inside_exit_value inside_uid:inside_gid -- # -- # where results are a list of space separated -- # filename:uid:gid -- # for each file passed in inside_args -- [ $# -ge 3 ] || { error "runtest expects 2 args"; return 1; } -- local mydir="$1" nsexec_args="$2" -- shift 2 -- local ret inside_owners t="" -- KIDPID="" -- -- mkfifo "${mydir}/5" && exec 5<>"${mydir}/5" || return -- mkfifo "${mydir}/6" && exec 6<>"${mydir}/6" || return -- mkdir --mode=777 "${mydir}/work" || return -- cd "${mydir}/work" -- -- set_files "$@" -- -- local results="" oresults="" iresults="" iuid="" igid="" n=0 -- -- error "$" $USERNSEXEC ${nsexec_args} -- "$MYPATH" inside "$*" -- ${USERNSEXEC} ${nsexec_args} -- "$MYPATH" inside "$@" & -- KIDPID=$! -- -- [ -d "/proc/$KIDPID" ] || { -- wait $KIDPID -- fail "kid $KIDPID died quickly $?" -- } -- -- # if lxc-usernsexec fails to execute MYPATH inside, then -- # the read below would timeout. To avoid a long timeout, -- # we do a short timeout and check the pid is alive. -- while ! read -t 1 ret iuid igid inside_owners <&5; do -- n=$((n+1)) -- if [ ! -d "/proc/$KIDPID" ]; then -- wait $KIDPID -- fail "kid $KIDPID is gone $?" -- fi -- [ $n -ge 30 ] && fail "child never wrote to pipe" -- done -- iresults=( $inside_owners ) -- -- collect_owners "--dir=${mydir}/work/" "${FILES[@]}" || return -- oresults=( $_RET ) -- -- echo 0 >&6 -- wait -- -- ret=$? -- -- results=( ) -- for((i=0;i<${#iresults[@]};i++)); do -- results[$i]="${oresults[$i]}:${iresults[$i]#*:}" -- done -- -- echo 0 $ret "$iuid:$igid" "${results[@]}" --} -- --runcheck() { -- local name="$1" expected="$2" nsexec_args="$3" found="" -- shift 3 -- mkdir "${TEMP_D}/$name" || fail "failed mkdir /$name.d" -- local err="${TEMP_D}/$name.err" -- out=$("$MYPATH" runtest "${TEMP_D}/$name" "$nsexec_args" "$@" 2>"$err") || { -- error "$name: FAIL - runtest failed $?" -- [ -n "$out" ] && error " $out" -- sed 's,^, ,' "$err" 1>&2 -- ERRORS="${ERRORS} $name" -- return 1 -- } -- set -- $out -- local parentrc=$1 kidrc=$2 iuidgid="$3" found="" -- shift 3 -- found="$*" -- [ "$parentrc" = "0" -a "$kidrc" = "0" ] || { -- error "$name: FAIL - parentrc=$parentrc kidrc=$kidrc found=$found" -- ERRORS="${ERRORS} $name" -- return 1 -- } -- [ "$expected" = "$found" ] && { -- error "$name: PASS" -- PASS="${PASSES} $name" -- return 0 -- } -- echo "$name: FAIL expected '$expected' != found '$found'" -- FAILS="${FAILS} $name" -- return 1 --} -- --setup_Usage() { -- cat <> /etc/subuid || { -- error "failed to add $asuser to /etc/subuid" -- } -- fi -- -- subgid=$(awk -F: '$1 == n { print $2; exit(0); }' "n=$asuser" /etc/subgid) || { -- error "failed to read /etc/subgid for $asuser" -- return 1 -- } -- if [ -n "$subgid" ]; then -- debug 1 "$asuser already had subgid=$subgid" -- else -- debug 1 "adding $asuser:$create_subgid to /etc/subgid" -- echo "$asuser:$create_subgid" >> /etc/subgid || { -- error "failed to add $asuser to /etc/subgid" -- } -- fi -- -- debug 0 "as $asuser executing ${MYPATH} ${pt_args[*]}" -- sudo -Hu "$asuser" "${MYPATH}" "${pt_args[@]}" --} -- --USERNSEXEC=${USERNSEXEC:-lxc-usernsexec} --MYPATH=$(readlink -f "$0") || { echo "failed to get full path to self: $0"; exit 1; } --export MYPATH -- --if [ "$1" = "inside" ]; then -- shift -- inside "$@" -- exit --elif [ "$1" = "runtest" ]; then -- shift -- runtest "$@" -- exit --elif [ "$1" = "setup_and_run" ]; then -- shift -- setup_and_run "$@" -- exit --fi -- --name=$(id --user --name) || fail "failed to get username" --if [ "$name" = "root" ]; then -- setup_and_run "$@" -- exit --fi -- --subuid=$(awk -F: '$1 == n { print $2; exit(0); }' "n=$name" /etc/subuid) && -- [ -n "$subuid" ] || fail "did not find $name in /etc/subuid" -- --subgid=$(awk -F: '$1 == n { print $2; exit(0); }' "n=$name" /etc/subgid) && -- [ -n "$subgid" ] || fail "did not find $name in /etc/subgid" -- -- --uid=$(id --user) || fail "failed to get uid" --gid=$(id --group) || fail "failed to get gid" -- --mapuid="u:0:$uid:1" --mapgid="g:0:$gid:1" -- --ver=$(dpkg-query --show lxc-utils | awk '{print $2}') --error "uid=$uid gid=$gid name=$name subuid=$subuid subgid=$subgid ver=$ver" --error "lxc-utils=$ver kver=$(uname -r)" --error "USERNSEXEC=$USERNSEXEC" -- --TEMP_D=$(mktemp -d) --trap cleanup EXIT -- --PASSES=""; FAILS=""; ERRORS="" --runcheck nouidgid "f0:$subuid:$subgid:0:0" "" f0 -- --runcheck myuidgid "f0:$uid:$gid:0:0" \ -- "-m$mapuid -m$mapgid" f0 -- --runcheck subuidgid \ -- "f0:$subuid:$subgid:0:0" \ -- "-mu:0:$subuid:1 -mg:0:$subgid:1" f0:0:0 -- --runcheck bothsets "f0:$uid:$gid:0:0 f1:$subuid:$subgid:1:1 f2:$uid:$subgid:0:1" \ -- "-m$mapuid -m$mapgid -mu:1:$subuid:1 -mg:1:$subgid:1" \ -- f0 f1:1:1 f2::1 -- --runcheck mismatch "f0:$uid:$subgid:0:0 f1:$subuid:$gid:15:31" \ -- "-mu:0:$uid:1 -mg:0:$subgid:1 -mu:15:$subuid:1 -mg:31:$gid:1" \ -- f0 f1:15:31 -- --FAILS=${FAILS# } --ERRORS=${ERRORS# } --PASSES=${PASSES# } -- --[ -z "${FAILS}" ] || error "FAILS: ${FAILS}" --[ -z "${ERRORS}" ] || error "ERRORS: ${ERRORS}" --[ -z "${FAILS}" -a -z "${ERRORS}" ] || exit 1 --exit 0 -diff --git a/src/tests/lxc_raw_clone.c b/src/tests/lxc_raw_clone.c -index f72e20ccc..655454f39 100644 ---- a/src/tests/lxc_raw_clone.c -+++ b/src/tests/lxc_raw_clone.c -@@ -39,7 +39,7 @@ - - #include "lxctest.h" - #include "namespace.h" --#include "process_utils.h" -+#include "raw_syscalls.h" - #include "utils.h" - - int main(int argc, char *argv[]) -diff --git a/templates/lxc-oci.in b/templates/lxc-oci.in -index dab077191..8017c38c1 100644 ---- a/templates/lxc-oci.in -+++ b/templates/lxc-oci.in -@@ -348,7 +348,8 @@ fi - # shellcheck disable=SC2039 - # shellcheck disable=SC2068 - umoci --log=error unpack ${umoci_args[@]} --image "${DOWNLOAD_TEMP}:latest" "${LXC_ROOTFS}.tmp" --find "${LXC_ROOTFS}.tmp/rootfs" -mindepth 1 -maxdepth 1 -exec mv '{}' "${LXC_ROOTFS}/" \; -+rmdir "${LXC_ROOTFS}" -+mv "${LXC_ROOTFS}.tmp/rootfs" "${LXC_ROOTFS}" - - OCI_CONF_FILE=$(getconfigpath "${DOWNLOAD_TEMP}" latest) - LXC_CONF_FILE="${LXC_PATH}/config" --- -2.25.1 - diff --git a/0001-refactor-patch-code-of-utils-commands-and-so-on.patch b/0001-refactor-patch-code-of-utils-commands-and-so-on.patch new file mode 100644 index 0000000..ab6398c --- /dev/null +++ b/0001-refactor-patch-code-of-utils-commands-and-so-on.patch @@ -0,0 +1,2340 @@ +From eb5a5ccb84947a833e5d4881bb1e570e36f766f8 Mon Sep 17 00:00:00 2001 +From: zhangxiaoyu +Date: Fri, 15 Jul 2022 17:06:09 +0800 +Subject: [PATCH] refactor patch code of utils commands and so on + +Signed-off-by: zhangxiaoyu +--- + src/lxc/cgroups/isulad_cgroup2_devices.c | 575 +++++++++++++++++++++++ + src/lxc/commands.c | 180 +++++++ + src/lxc/commands.h | 10 + + src/lxc/conf.h | 95 ++++ + src/lxc/isulad_utils.c | 317 +++++++++++++ + src/lxc/isulad_utils.h | 30 ++ + src/lxc/lsm/lsm.c | 20 + + src/lxc/lxc.h | 19 + + src/lxc/lxccontainer.h | 194 ++++++++ + src/lxc/tools/lxc_ls.c | 8 + + src/lxc/tools/lxc_start.c | 109 ++++- + src/lxc/utils.c | 174 +++++++ + src/lxc/utils.h | 81 ++++ + 13 files changed, 1811 insertions(+), 1 deletion(-) + create mode 100644 src/lxc/cgroups/isulad_cgroup2_devices.c + create mode 100644 src/lxc/isulad_utils.c + create mode 100644 src/lxc/isulad_utils.h + +diff --git a/src/lxc/cgroups/isulad_cgroup2_devices.c b/src/lxc/cgroups/isulad_cgroup2_devices.c +new file mode 100644 +index 0000000..05613c5 +--- /dev/null ++++ b/src/lxc/cgroups/isulad_cgroup2_devices.c +@@ -0,0 +1,575 @@ ++/* SPDX-License-Identifier: LGPL-2.1+ */ ++ ++/* Parts of this taken from systemd's implementation. */ ++ ++#ifndef _GNU_SOURCE ++#define _GNU_SOURCE 1 ++#endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "cgroup2_devices.h" ++#include "config.h" ++#include "log.h" ++#include "macro.h" ++#include "memory_utils.h" ++ ++#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX ++#include ++#include ++ ++#define BPF_LOG_BUF_SIZE (1 << 23) /* 8MB */ ++#ifndef BPF_LOG_LEVEL1 ++#define BPF_LOG_LEVEL1 1 ++#endif ++ ++#ifndef BPF_LOG_LEVEL2 ++#define BPF_LOG_LEVEL2 2 ++#endif ++ ++#ifndef BPF_LOG_LEVEL ++#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2) ++#endif ++ ++lxc_log_define(cgroup2_devices, cgroup); ++ ++static int bpf_program_add_instructions(struct bpf_program *prog, ++ const struct bpf_insn *instructions, ++ size_t count) ++{ ++ ++ struct bpf_insn *new_insn; ++ ++ if (prog->kernel_fd >= 0) ++ return log_error_errno(-1, EBUSY, "Refusing to update bpf cgroup program that's already loaded"); ++ ++ new_insn = realloc(prog->instructions, sizeof(struct bpf_insn) * (count + prog->n_instructions)); ++ if (!new_insn) ++ return log_error_errno(-1, ENOMEM, "Failed to reallocate bpf cgroup program"); ++ ++ prog->instructions = new_insn; ++ memset(prog->instructions + prog->n_instructions, 0, ++ sizeof(struct bpf_insn) * count); ++ memcpy(prog->instructions + prog->n_instructions, instructions, ++ sizeof(struct bpf_insn) * count); ++ prog->n_instructions += count; ++ ++ return 0; ++} ++ ++void bpf_program_free(struct bpf_program *prog) ++{ ++ if (!prog) ++ return; ++ ++ (void)bpf_program_cgroup_detach(prog); ++ ++ if (prog->kernel_fd >= 0) ++ close(prog->kernel_fd); ++ free(prog->instructions); ++ free(prog->attached_path); ++ free(prog); ++} ++ ++/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ ++#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ ++ ((struct bpf_insn){.code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ ++ .dst_reg = DST, \ ++ .src_reg = SRC, \ ++ .off = OFF, \ ++ .imm = 0}) ++ ++/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ ++#define BPF_ALU32_IMM(OP, DST, IMM) \ ++ ((struct bpf_insn){.code = BPF_ALU | BPF_OP(OP) | BPF_K, \ ++ .dst_reg = DST, \ ++ .src_reg = 0, \ ++ .off = 0, \ ++ .imm = IMM}) ++ ++/* Short form of mov, dst_reg = src_reg */ ++#define BPF_MOV64_IMM(DST, IMM) \ ++ ((struct bpf_insn){.code = BPF_ALU64 | BPF_MOV | BPF_K, \ ++ .dst_reg = DST, \ ++ .src_reg = 0, \ ++ .off = 0, \ ++ .imm = IMM}) ++ ++#define BPF_MOV32_REG(DST, SRC) \ ++ ((struct bpf_insn){.code = BPF_ALU | BPF_MOV | BPF_X, \ ++ .dst_reg = DST, \ ++ .src_reg = SRC, \ ++ .off = 0, \ ++ .imm = 0}) ++ ++/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ ++#define BPF_JMP_REG(OP, DST, SRC, OFF) \ ++ ((struct bpf_insn){.code = BPF_JMP | BPF_OP(OP) | BPF_X, \ ++ .dst_reg = DST, \ ++ .src_reg = SRC, \ ++ .off = OFF, \ ++ .imm = 0}) ++ ++/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ ++#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ ++ ((struct bpf_insn){.code = BPF_JMP | BPF_OP(OP) | BPF_K, \ ++ .dst_reg = DST, \ ++ .src_reg = 0, \ ++ .off = OFF, \ ++ .imm = IMM}) ++ ++/* Program exit */ ++#define BPF_EXIT_INSN() \ ++ ((struct bpf_insn){.code = BPF_JMP | BPF_EXIT, \ ++ .dst_reg = 0, \ ++ .src_reg = 0, \ ++ .off = 0, \ ++ .imm = 0}) ++ ++static int bpf_access_mask(const char *acc, __u32 *mask) ++{ ++ if (!acc) ++ return 0; ++ ++ for (; *acc; acc++) ++ switch (*acc) { ++ case 'r': ++ *mask |= BPF_DEVCG_ACC_READ; ++ break; ++ case 'w': ++ *mask |= BPF_DEVCG_ACC_WRITE; ++ break; ++ case 'm': ++ *mask |= BPF_DEVCG_ACC_MKNOD; ++ break; ++ default: ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static int bpf_device_type(char type) ++{ ++ switch (type) { ++ case 'a': ++ return 0; ++ case 'b': ++ return BPF_DEVCG_DEV_BLOCK; ++ case 'c': ++ return BPF_DEVCG_DEV_CHAR; ++ } ++ ++ return -1; ++} ++ ++static inline bool bpf_device_all_access(__u32 access_mask) ++{ ++ return access_mask == (BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD); ++} ++ ++struct bpf_program *bpf_program_new(uint32_t prog_type) ++{ ++ __do_free struct bpf_program *prog = NULL; ++ ++ prog = zalloc(sizeof(struct bpf_program)); ++ if (!prog) ++ return ret_set_errno(NULL, ENOMEM); ++ ++ prog->prog_type = prog_type; ++ prog->kernel_fd = -EBADF; ++ /* ++ * By default a whitelist is used unless the user tells us otherwise. ++ */ ++ prog->device_list_type = LXC_BPF_DEVICE_CGROUP_WHITELIST; ++ ++ return move_ptr(prog); ++} ++ ++int bpf_program_init(struct bpf_program *prog) ++{ ++ if (!prog) ++ return ret_set_errno(-1, EINVAL); ++ ++ const struct bpf_insn pre_insn[] = { ++ /* load device type to r2 */ ++ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, access_type)), ++ BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0xFFFF), ++ ++ /* load access type to r3 */ ++ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, access_type)), ++ BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16), ++ ++ /* load major number to r4 */ ++ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, major)), ++ ++ /* load minor number to r5 */ ++ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, offsetof(struct bpf_cgroup_dev_ctx, minor)), ++ }; ++ ++ return bpf_program_add_instructions(prog, pre_insn, ARRAY_SIZE(pre_insn)); ++} ++ ++int bpf_program_append_device(struct bpf_program *prog, struct device_item *device) ++{ ++ int ret; ++ int jump_nr = 1; ++ __u32 access_mask = 0; ++ int device_type; ++ struct bpf_insn bpf_access_decision[2]; ++ bool add_exist = false; ++ ++ if (!prog || !device) ++ return ret_set_errno(-1, EINVAL); ++ ++ /* This is a global rule so no need to append anything. */ ++ if (device->global_rule > LXC_BPF_DEVICE_CGROUP_LOCAL_RULE) { ++ prog->device_list_type = device->global_rule; ++ return 0; ++ } ++ ++ ret = bpf_access_mask(device->access, &access_mask); ++ if (ret < 0) ++ return log_error_errno(ret, -ret, "Invalid access mask specified %s", device->access); ++ ++ if (!bpf_device_all_access(access_mask)) ++ jump_nr += 3; ++ ++ device_type = bpf_device_type(device->type); ++ if (device_type < 0) ++ return log_error_errno(-1, EINVAL, "Invalid bpf cgroup device type %c", device->type); ++ ++ if (device_type > 0) ++ jump_nr++; ++ ++ if (device->major >= 0) ++ jump_nr++; ++ ++ if (device->minor >= 0) ++ jump_nr++; ++ ++ if (device_type > 0) { ++ struct bpf_insn ins[] = { ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, device_type, jump_nr--), ++ }; ++ ++ ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); ++ if (ret) ++ return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); ++ add_exist = true; ++ } ++ ++ if (!bpf_device_all_access(access_mask)) { ++ struct bpf_insn ins[] = { ++ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), ++ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access_mask), ++ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, jump_nr-2), ++ }; ++ ++ jump_nr -= 3; ++ ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); ++ if (ret) ++ return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); ++ add_exist = true; ++ } ++ ++ if (device->major >= 0) { ++ struct bpf_insn ins[] = { ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, device->major, jump_nr--), ++ }; ++ ++ ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); ++ if (ret) ++ return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); ++ add_exist = true; ++ } ++ ++ if (device->minor >= 0) { ++ struct bpf_insn ins[] = { ++ BPF_JMP_IMM(BPF_JNE, BPF_REG_5, device->minor, jump_nr--), ++ }; ++ ++ ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); ++ if (ret) ++ return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); ++ add_exist = true; ++ } ++ ++ if (add_exist) { ++ bpf_access_decision[0] = BPF_MOV64_IMM(BPF_REG_0, device->allow); ++ bpf_access_decision[1] = BPF_EXIT_INSN(); ++ ret = bpf_program_add_instructions(prog, bpf_access_decision, ++ ARRAY_SIZE(bpf_access_decision)); ++ if (ret) ++ return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); ++ } ++ ++ return 0; ++} ++ ++int bpf_program_finalize(struct bpf_program *prog) ++{ ++ struct bpf_insn ins[] = { ++ BPF_MOV64_IMM(BPF_REG_0, prog->device_list_type), ++ BPF_EXIT_INSN(), ++ }; ++ ++ if (!prog) ++ return ret_set_errno(-1, EINVAL); ++ ++ TRACE("Implementing %s bpf device cgroup program", ++ prog->device_list_type == LXC_BPF_DEVICE_CGROUP_BLACKLIST ++ ? "blacklist" ++ : "whitelist"); ++ return bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); ++} ++ ++static int bpf_program_load_kernel(struct bpf_program *prog) ++{ ++ __do_free char *log_buf = NULL; ++ __u32 log_level = 0; ++ __u32 log_size = 0; ++ union bpf_attr attr; ++ struct rlimit limit = { ++ .rlim_cur = RLIM_INFINITY, ++ .rlim_max = RLIM_INFINITY, ++ }; ++ ++ if (prog->kernel_fd >= 0) { ++ return 0; ++ } ++ ++ if (lxc_log_get_level() <= LXC_LOG_LEVEL_DEBUG) { ++ log_buf = zalloc(BPF_LOG_BUF_SIZE); ++ if (!log_buf) { ++ WARN("Failed to allocate bpf log buffer"); ++ } else { ++ log_level = BPF_LOG_LEVEL; ++ log_size = BPF_LOG_BUF_SIZE; ++ } ++ } ++ ++ if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0) ++ return log_error_errno(-1, errno, "Failed to set rlimit memlock to unlimited"); ++ ++ attr = (union bpf_attr){ ++ .prog_type = prog->prog_type, ++ .insns = PTR_TO_UINT64(prog->instructions), ++ .insn_cnt = prog->n_instructions, ++ .license = PTR_TO_UINT64("GPL"), ++ .log_buf = PTR_TO_UINT64(log_buf), ++ .log_level = log_level, ++ .log_size = log_size, ++ }; ++ ++ prog->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); ++ if (prog->kernel_fd < 0) ++ return log_error_errno(-1, errno, "Failed to load bpf program: %s", log_buf); ++ ++ TRACE("Loaded bpf program: %s", log_buf ?: "(null)"); ++ return 0; ++} ++ ++int bpf_program_cgroup_attach(struct bpf_program *prog, int type, ++ const char *path, uint32_t flags) ++{ ++ __do_free char *copy = NULL; ++ __do_close int fd = -EBADF; ++ union bpf_attr attr; ++ int ret; ++ ++ if (!prog) ++ return ret_set_errno(-1, EINVAL); ++ ++ if (flags & ~(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)) ++ return log_error_errno(-1, EINVAL, "Invalid flags for bpf program"); ++ ++ if (prog->attached_path) { ++ if (prog->attached_type != type) ++ return log_error_errno(-1, EBUSY, "Wrong type for bpf program"); ++ ++ if (prog->attached_flags != flags) ++ return log_error_errno(-1, EBUSY, "Wrong flags for bpf program"); ++ ++ if (flags != BPF_F_ALLOW_OVERRIDE) ++ return true; ++ } ++ ++ ret = bpf_program_load_kernel(prog); ++ if (ret < 0) ++ return log_error_errno(-1, ret, "Failed to load bpf program"); ++ ++ copy = strdup(path); ++ if (!copy) ++ return log_error_errno(-1, ENOMEM, "Failed to duplicate cgroup path %s", path); ++ ++ fd = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC); ++ if (fd < 0) ++ return log_error_errno(-1, errno, "Failed to open cgroup path %s", path); ++ ++ attr = (union bpf_attr){ ++ .attach_type = type, ++ .target_fd = fd, ++ .attach_bpf_fd = prog->kernel_fd, ++ .attach_flags = flags, ++ }; ++ ++ ret = bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); ++ if (ret < 0) ++ return log_error_errno(-1, errno, "Failed to attach bpf program"); ++ ++ free_move_ptr(prog->attached_path, copy); ++ prog->attached_type = type; ++ prog->attached_flags = flags; ++ ++ TRACE("Loaded and attached bpf program to cgroup %s", prog->attached_path); ++ return 0; ++} ++ ++int bpf_program_cgroup_detach(struct bpf_program *prog) ++{ ++ int ret; ++ __do_close int fd = -EBADF; ++ ++ if (!prog) ++ return 0; ++ ++ if (!prog->attached_path) ++ return 0; ++ ++ fd = open(prog->attached_path, O_DIRECTORY | O_RDONLY | O_CLOEXEC); ++ if (fd < 0) { ++ if (errno != ENOENT) ++ return log_error_errno(-1, errno, "Failed to open attach cgroup %s", ++ prog->attached_path); ++ } else { ++ union bpf_attr attr; ++ ++ attr = (union bpf_attr){ ++ .attach_type = prog->attached_type, ++ .target_fd = fd, ++ .attach_bpf_fd = prog->kernel_fd, ++ }; ++ ++ ret = bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); ++ if (ret < 0) ++ return log_error_errno(-1, errno, "Failed to detach bpf program from cgroup %s", ++ prog->attached_path); ++ } ++ ++ free(prog->attached_path); ++ prog->attached_path = NULL; ++ ++ return 0; ++} ++ ++void lxc_clear_cgroup2_devices(struct lxc_conf *conf) ++{ ++ if (conf->cgroup2_devices) { ++ (void)bpf_program_cgroup_detach(conf->cgroup2_devices); ++ (void)bpf_program_free(conf->cgroup2_devices); ++ } ++} ++ ++int bpf_list_add_device(struct lxc_conf *conf, struct device_item *device) ++{ ++ __do_free struct lxc_list *list_elem = NULL; ++ __do_free struct device_item *new_device = NULL; ++ struct lxc_list *it; ++ ++ lxc_list_for_each(it, &conf->devices) { ++ struct device_item *cur = it->elem; ++ ++ if (cur->global_rule > LXC_BPF_DEVICE_CGROUP_LOCAL_RULE && ++ device->global_rule > LXC_BPF_DEVICE_CGROUP_LOCAL_RULE) { ++ TRACE("Switched from %s to %s", ++ cur->global_rule == LXC_BPF_DEVICE_CGROUP_WHITELIST ++ ? "whitelist" ++ : "blacklist", ++ device->global_rule == LXC_BPF_DEVICE_CGROUP_WHITELIST ++ ? "whitelist" ++ : "blacklist"); ++ cur->global_rule = device->global_rule; ++ return 1; ++ } ++ ++ if (cur->type != device->type) ++ continue; ++ if (cur->major != device->major) ++ continue; ++ if (cur->minor != device->minor) ++ continue; ++ if (strcmp(cur->access, device->access)) ++ continue; ++ ++ /* ++ * The rule is switched from allow to deny or vica versa so ++ * don't bother allocating just flip the existing one. ++ */ ++ if (cur->allow != device->allow) { ++ cur->allow = device->allow; ++ return log_trace(0, "Switched existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d", ++ cur->type, cur->major, cur->minor, ++ cur->access, cur->allow, ++ cur->global_rule); ++ } ++ ++ return log_trace(1, "Reusing existing rule of bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d", ++ cur->type, cur->major, cur->minor, cur->access, ++ cur->allow, cur->global_rule); ++ } ++ ++ list_elem = malloc(sizeof(*list_elem)); ++ if (!list_elem) ++ return log_error_errno(-1, ENOMEM, "Failed to allocate new device list"); ++ ++ new_device = memdup(device, sizeof(struct device_item)); ++ if (!new_device) ++ return log_error_errno(-1, ENOMEM, "Failed to allocate new device item"); ++ ++ lxc_list_add_elem(list_elem, move_ptr(new_device)); ++ lxc_list_add_tail(&conf->devices, move_ptr(list_elem)); ++ ++ return 0; ++} ++ ++bool bpf_devices_cgroup_supported(void) ++{ ++ const struct bpf_insn dummy[] = { ++ BPF_MOV64_IMM(BPF_REG_0, 1), ++ BPF_EXIT_INSN(), ++ }; ++ ++ __do_bpf_program_free struct bpf_program *prog = NULL; ++ int ret; ++ ++ if (geteuid() != 0) ++ return log_trace(false, ++ "The bpf device cgroup requires real root"); ++ ++ prog = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE); ++ if (prog < 0) ++ return log_trace(false, "Failed to allocate new bpf device cgroup program"); ++ ++ ret = bpf_program_init(prog); ++ if (ret) ++ return log_error_errno(false, ENOMEM, "Failed to initialize bpf program"); ++ ++ ret = bpf_program_add_instructions(prog, dummy, ARRAY_SIZE(dummy)); ++ if (ret < 0) ++ return log_trace(false, "Failed to add new instructions to bpf device cgroup program"); ++ ++ ret = bpf_program_load_kernel(prog); ++ if (ret < 0) ++ return log_trace(false, "Failed to load new bpf device cgroup program"); ++ ++ return log_trace(true, "The bpf device cgroup is supported"); ++} ++#endif +diff --git a/src/lxc/commands.c b/src/lxc/commands.c +index b6ae101..ae50cc6 100644 +--- a/src/lxc/commands.c ++++ b/src/lxc/commands.c +@@ -86,6 +86,10 @@ static const char *lxc_cmd_str(lxc_cmd_t cmd) + [LXC_CMD_GET_INIT_PIDFD] = "get_init_pidfd", + [LXC_CMD_GET_LIMITING_CGROUP] = "get_limiting_cgroup", + [LXC_CMD_GET_LIMITING_CGROUP2_FD] = "get_limiting_cgroup2_fd", ++#ifdef HAVE_ISULAD ++ [LXC_CMD_SET_TERMINAL_FIFOS] = "set_terminal_fifos", ++ [LXC_CMD_SET_TERMINAL_WINCH] = "set_terminal_winch", ++#endif + }; + + if (cmd >= LXC_CMD_MAX) +@@ -117,7 +121,15 @@ static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) + int ret; + struct lxc_cmd_rsp *rsp = &cmd->rsp; + ++#ifdef HAVE_ISULAD ++ /*isulad: add timeout 1s to avoid long block due to [lxc monitor] error*/ ++ ret = lxc_abstract_unix_recv_fds_timeout(sock, &fd_rsp, 1, rsp, sizeof(*rsp), 1000 * 1000); ++ if (ret < 0 && (errno == ECONNRESET || errno == EAGAIN || errno == EWOULDBLOCK)) { ++ errno = ECONNRESET; /*isulad set errno ECONNRESET when timeout */ ++ } ++#else + ret = lxc_abstract_unix_recv_fds(sock, &fd_rsp, 1, rsp, sizeof(*rsp)); ++#endif + if (ret < 0) + return log_warn_errno(-1, + errno, "Failed to receive response for command \"%s\"", +@@ -1260,7 +1272,11 @@ int lxc_cmd_serve_state_clients(const char *name, const char *lxcpath, + + ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); + if (ret < 0) ++#ifdef HAVE_ISULAD ++ return log_warn_errno(-1, errno, "Failed to serve state clients"); ++#else + return log_error_errno(-1, errno, "Failed to serve state clients"); ++#endif + + return 0; + } +@@ -1475,6 +1491,123 @@ static int lxc_cmd_get_limiting_cgroup2_fd_callback(int fd, + return ret_errno(ENOSYS); + } + ++#ifdef HAVE_ISULAD ++/* ++ * isulad: lxc_cmd_set_terminal_fifos: Set the fifos used for the container as terminal input/output ++ * ++ * @hashed_sock_name: hashed socket name ++ * ++ * Returns 0 when success, else when fail. ++ */ ++int lxc_cmd_set_terminal_fifos(const char *name, const char *lxcpath, const char *in_fifo, ++ const char *out_fifo, const char *err_fifo) ++{ ++ int ret = 0, stopped = 0; ++ int len = 0; ++ char *tmp = NULL; ++ const char *split = "&&&&", *none_fifo_name = "none"; ++ const char *cmd_in_fifo = in_fifo ? in_fifo : none_fifo_name; ++ const char *cmd_out_fifo = out_fifo ? out_fifo : none_fifo_name; ++ const char *cmd_err_fifo = err_fifo ? err_fifo : none_fifo_name; ++ ++ if (len + strlen(cmd_in_fifo) + strlen(split) + strlen(cmd_out_fifo) + ++ strlen(split) + strlen(cmd_err_fifo) == SIZE_MAX) ++ return -1; ++ len += strlen(cmd_in_fifo) + strlen(split) + strlen(cmd_out_fifo) + strlen(split) + strlen(cmd_err_fifo) + 1; ++ tmp = malloc(len); ++ if (tmp == NULL) ++ return -1; ++ ret = snprintf(tmp, len, "%s%s%s%s%s", cmd_in_fifo, split, cmd_out_fifo, split, cmd_err_fifo); ++ if (ret < 0 || ret >= len) { ++ ERROR("Failed to snprintf in fifo of command"); ++ free(tmp); ++ return -1; ++ } ++ ++ struct lxc_cmd_rr cmd = { ++ .req = { ++ .cmd = LXC_CMD_SET_TERMINAL_FIFOS, ++ .datalen = strlen(tmp)+1, ++ .data = tmp, ++ }, ++ }; ++ ++ ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); ++ if (ret < 0) { ++ ERROR("Failed to send command to container"); ++ free(tmp); ++ return -1; ++ } ++ ++ if (cmd.rsp.ret != 0) { ++ ERROR("Command response error:%d", cmd.rsp.ret); ++ free(tmp); ++ return -1; ++ } ++ ++ free(tmp); ++ return 0; ++} ++ ++static int lxc_cmd_set_terminal_fifos_callback(int fd, struct lxc_cmd_req *req, ++ struct lxc_handler *handler, struct lxc_epoll_descr *descr) ++{ ++ struct lxc_cmd_rsp rsp; ++ memset(&rsp, 0, sizeof(rsp)); ++ ++ rsp.ret = lxc_terminal_add_fifos(handler->conf, req->data);; ++ ++ return lxc_cmd_rsp_send(fd, &rsp); ++} ++ ++struct lxc_cmd_set_terminal_winch_request { ++ unsigned int height; ++ unsigned int width; ++}; ++ ++int lxc_cmd_set_terminal_winch(const char *name, const char *lxcpath, unsigned int height, unsigned int width) ++{ ++ int ret = 0, stopped = 0; ++ struct lxc_cmd_set_terminal_winch_request data = { 0 }; ++ ++ data.height = height; ++ data.width = width; ++ ++ struct lxc_cmd_rr cmd = { ++ .req = { ++ .cmd = LXC_CMD_SET_TERMINAL_WINCH, ++ .datalen = sizeof(struct lxc_cmd_set_terminal_winch_request), ++ .data = &data, ++ }, ++ }; ++ ++ ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); ++ if (ret < 0) { ++ ERROR("Failed to send command to container"); ++ return -1; ++ } ++ ++ if (cmd.rsp.ret != 0) { ++ ERROR("Command response error:%d", cmd.rsp.ret); ++ return -1; ++ } ++ return 0; ++} ++ ++static int lxc_cmd_set_terminal_winch_callback(int fd, struct lxc_cmd_req *req, ++ struct lxc_handler *handler, struct lxc_epoll_descr *descr) ++{ ++ struct lxc_cmd_rsp rsp; ++ struct lxc_cmd_set_terminal_winch_request *data = (struct lxc_cmd_set_terminal_winch_request *)(req->data); ++ memset(&rsp, 0, sizeof(rsp)); ++ ++ rsp.ret = lxc_set_terminal_winsz(&handler->conf->console, data->height, data->width);; ++ ++ return lxc_cmd_rsp_send(fd, &rsp); ++ ++} ++#endif ++ + static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler, + struct lxc_epoll_descr *descr) +@@ -1504,10 +1637,18 @@ static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, + [LXC_CMD_GET_INIT_PIDFD] = lxc_cmd_get_init_pidfd_callback, + [LXC_CMD_GET_LIMITING_CGROUP] = lxc_cmd_get_limiting_cgroup_callback, + [LXC_CMD_GET_LIMITING_CGROUP2_FD] = lxc_cmd_get_limiting_cgroup2_fd_callback, ++#ifdef HAVE_ISULAD ++ [LXC_CMD_SET_TERMINAL_FIFOS] = lxc_cmd_set_terminal_fifos_callback, ++ [LXC_CMD_SET_TERMINAL_WINCH] = lxc_cmd_set_terminal_winch_callback, ++#endif + }; + + if (req->cmd >= LXC_CMD_MAX) ++#ifdef HAVE_ISULAD ++ return log_error_errno(-1, ENOENT, "Undefined command id %d", req->cmd); ++#else + return log_trace_errno(-1, EINVAL, "Invalid command id %d", req->cmd); ++#endif + + return cb[req->cmd](fd, req, handler, descr); + } +@@ -1646,6 +1787,44 @@ static int lxc_cmd_accept(int fd, uint32_t events, void *data, + return ret; + } + ++#ifdef HAVE_ISULAD ++int lxc_cmd_init(const char *name, const char *lxcpath, const char *suffix) ++{ ++ __do_close int fd = -EBADF; ++ int ret; ++ char path[LXC_AUDS_ADDR_LEN] = {0}; ++ __do_free char *runtime_sock_dir = NULL; ++ ++ runtime_sock_dir = generate_named_unix_sock_dir(name); ++ if (runtime_sock_dir == NULL) ++ return -1; ++ ++ if (mkdir_p(runtime_sock_dir, 0700) < 0) ++ return log_error_errno(-1, errno, "Failed to create container runtime unix sock directory %s", path); ++ ++ if (generate_named_unix_sock_path(name, suffix, path, sizeof(path)) != 0) ++ return -1; ++ ++ fd = lxc_named_unix_open(path, SOCK_STREAM, 0); ++ if (fd < 0) { ++ if (errno == EADDRINUSE) { ++ WARN("Container \"%s\" appears to be already running", name); ++ (void)unlink(path); ++ ++ fd = lxc_named_unix_open(path, SOCK_STREAM, 0); ++ if (fd < 0) ++ return log_error_errno(-1, errno, "Failed to create command socket %s", path); ++ } else ++ return log_error_errno(-1, errno, "Failed to create command socket %s", path); ++ } ++ ++ ret = fcntl(fd, F_SETFD, FD_CLOEXEC); ++ if (ret < 0) ++ return log_error_errno(-1, errno, "Failed to set FD_CLOEXEC on command socket file descriptor"); ++ ++ return log_trace(move_fd(fd), "Created unix socket \"%s\"", path); ++} ++#else + int lxc_cmd_init(const char *name, const char *lxcpath, const char *suffix) + { + __do_close int fd = -EBADF; +@@ -1670,6 +1849,7 @@ int lxc_cmd_init(const char *name, const char *lxcpath, const char *suffix) + + return log_trace(move_fd(fd), "Created abstract unix socket \"%s\"", &path[1]); + } ++#endif + + int lxc_cmd_mainloop_add(const char *name, struct lxc_epoll_descr *descr, + struct lxc_handler *handler) +diff --git a/src/lxc/commands.h b/src/lxc/commands.h +index 3624a14..f6371fd 100644 +--- a/src/lxc/commands.h ++++ b/src/lxc/commands.h +@@ -40,6 +40,10 @@ typedef enum { + LXC_CMD_GET_INIT_PIDFD, + LXC_CMD_GET_LIMITING_CGROUP, + LXC_CMD_GET_LIMITING_CGROUP2_FD, ++#ifdef HAVE_ISULAD ++ LXC_CMD_SET_TERMINAL_FIFOS, ++ LXC_CMD_SET_TERMINAL_WINCH, ++#endif + LXC_CMD_MAX, + } lxc_cmd_t; + +@@ -136,4 +140,10 @@ extern char *lxc_cmd_get_limiting_cgroup_path(const char *name, + const char *subsystem); + extern int lxc_cmd_get_limiting_cgroup2_fd(const char *name, const char *lxcpath); + ++#ifdef HAVE_ISULAD ++extern int lxc_cmd_set_terminal_fifos(const char *name, const char *lxcpath, ++ const char *in_fifo, const char *out_fifo, const char *err_fifo); ++extern int lxc_cmd_set_terminal_winch(const char *name, const char *lxcpath, unsigned int height, unsigned int width); ++#endif ++ + #endif /* __commands_h */ +diff --git a/src/lxc/conf.h b/src/lxc/conf.h +index b72afba..0478eb1 100644 +--- a/src/lxc/conf.h ++++ b/src/lxc/conf.h +@@ -23,6 +23,10 @@ + #include "start.h" + #include "terminal.h" + ++#ifdef HAVE_ISULAD ++#include "oci_runtime_hooks.h" ++#endif ++ + #if HAVE_SYS_RESOURCE_H + #include + #endif +@@ -146,6 +150,8 @@ struct lxc_tty_info { + * @mountflags : the portion of @options that are flags + * @data : the portion of @options that are not flags + * @managed : whether it is managed by LXC ++ * @maskedpaths: A list of paths to be msked over inside the container ++ * @ropaths : A list of paths to be remounted with readonly inside the container + */ + struct lxc_rootfs { + char *path; +@@ -155,6 +161,14 @@ struct lxc_rootfs { + unsigned long mountflags; + char *data; + bool managed; ++#ifdef HAVE_ISULAD ++ /* isulad: maskedpaths */ ++ struct lxc_list maskedpaths; ++ /* isulad: ropaths */ ++ struct lxc_list ropaths; ++ /* isulad: errfd */ ++ int errfd; ++#endif + }; + + /* +@@ -203,6 +217,11 @@ enum lxchooks { + LXCHOOK_CLONE, + LXCHOOK_DESTROY, + LXCHOOK_START_HOST, ++#ifdef HAVE_ISULAD ++ OCI_HOOK_PRESTART, ++ OCI_HOOK_POSTSTART, ++ OCI_HOOK_POSTSTOP, ++#endif + NUM_LXC_HOOKS + }; + +@@ -233,6 +252,27 @@ struct device_item { + int global_rule; + }; + ++#ifdef HAVE_ISULAD ++/* ++ * iSulad: Defines a structure to store the devices which will ++ * be attached in container ++ * @name : the target device name in container ++ * @type : the type of target device "c" or "b" ++ * @mode : file mode for the device ++ * @maj : major number for the device ++ * @min : minor number for the device ++ */ ++struct lxc_populate_devs { ++ char *name; ++ char *type; ++ mode_t file_mode; ++ int maj; ++ int min; ++ uid_t uid; ++ gid_t gid; ++}; ++#endif ++ + struct lxc_conf { + /* Pointer to the name of the container. Do not free! */ + const char *name; +@@ -401,6 +441,40 @@ struct lxc_conf { + /* Absolute path (in the container) to the shared mount point */ + char *path_cont; + } shmount; ++ ++#ifdef HAVE_ISULAD ++ /* support oci hook */ ++ oci_runtime_spec_hooks *ocihooks; ++ ++ /* init args used to repalce init_cmd */ ++ char **init_argv; ++ size_t init_argc; ++ ++ gid_t *init_groups; ++ size_t init_groups_len; ++ ++ /* populate devices */ ++ struct lxc_list populate_devs; ++ mode_t umask; // umask value ++ ++ char *container_info_file; ++ ++ /* exit fifo fd*/ ++ int exit_fd; ++ ++ /* record error messages */ ++ char *errmsg; ++ ++ /* pipdfd for get error message of child or grandchild process */ ++ int errpipe[2]; ++ ++ /* systemd value */ ++ char *systemd; ++ ++ /* Linux Security Modules SELinux context for device mount */ ++ char *lsm_se_mount_context; ++#endif ++ + }; + + extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, +@@ -439,7 +513,11 @@ extern int lxc_setup_rootfs_prepare_root(struct lxc_conf *conf, + const char *name, const char *lxcpath); + extern int lxc_setup(struct lxc_handler *handler); + extern int lxc_setup_parent(struct lxc_handler *handler); ++#ifdef HAVE_ISULAD ++extern int setup_resource_limits(struct lxc_list *limits, pid_t pid, int errfd); ++#else + extern int setup_resource_limits(struct lxc_list *limits, pid_t pid); ++#endif + extern int find_unmapped_nsid(const struct lxc_conf *conf, enum idtype idtype); + extern int mapped_hostid(unsigned id, const struct lxc_conf *conf, + enum idtype idtype); +@@ -447,8 +525,14 @@ extern int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), + void *data, const char *fn_name); + extern int userns_exec_full(struct lxc_conf *conf, int (*fn)(void *), + void *data, const char *fn_name); ++#ifdef HAVE_ISULAD ++// isulad modify ++extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, ++ unsigned long *pflags, char **mntdata); ++#else + extern int parse_mntopts(const char *mntopts, unsigned long *mntflags, + char **mntdata); ++#endif + extern int parse_propagationopts(const char *mntopts, unsigned long *pflags); + extern void tmp_proc_unmount(struct lxc_conf *lxc_conf); + extern void turn_into_dependent_mounts(void); +@@ -480,4 +564,15 @@ static inline int chown_mapped_root(const char *path, const struct lxc_conf *con + return userns_exec_mapped_root(path, -EBADF, conf); + } + ++#ifdef HAVE_ISULAD ++// isulad add ++int lxc_clear_init_args(struct lxc_conf *lxc_conf); ++int lxc_clear_init_groups(struct lxc_conf *lxc_conf); ++int lxc_clear_populate_devices(struct lxc_conf *c); ++int lxc_clear_rootfs_masked_paths(struct lxc_conf *c); ++int lxc_clear_rootfs_ro_paths(struct lxc_conf *c); ++int lxc_drop_caps(struct lxc_conf *conf); ++int run_oci_hooks(const char *name, const char *hookname, struct lxc_conf *conf, const char *lxcpath); ++void lxc_close_error_pipe(int *errpipe); ++#endif + #endif /* __LXC_CONF_H */ +diff --git a/src/lxc/isulad_utils.c b/src/lxc/isulad_utils.c +new file mode 100644 +index 0000000..47d0ff5 +--- /dev/null ++++ b/src/lxc/isulad_utils.c +@@ -0,0 +1,317 @@ ++/* SPDX-License-Identifier: LGPL-2.1+ */ ++/****************************************************************************** ++ * Copyright (c) Huawei Technologies Co., Ltd. 2020. Allrights reserved ++ * Description: isulad utils ++ * Author: lifeng ++ * Create: 2020-04-11 ++******************************************************************************/ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "isulad_utils.h" ++#include "log.h" ++#include "path.h" ++#include "file_utils.h" ++ ++lxc_log_define(isulad_utils, lxc); ++ ++void *lxc_common_calloc_s(size_t size) ++{ ++ if (size == 0 || size > SIZE_MAX) { ++ return NULL; ++ } ++ ++ return calloc((size_t)1, size); ++} ++ ++int lxc_mem_realloc(void **newptr, size_t newsize, void *oldptr, size_t oldsize) ++{ ++ void *tmp = NULL; ++ ++ if (newsize == 0) { ++ goto err_out; ++ } ++ ++ tmp = lxc_common_calloc_s(newsize); ++ if (tmp == NULL) { ++ ERROR("Failed to malloc memory"); ++ goto err_out; ++ } ++ ++ if (oldptr != NULL) { ++ memcpy(tmp, oldptr, (newsize < oldsize) ? newsize : oldsize); ++ ++ memset(oldptr, 0, oldsize); ++ ++ free(oldptr); ++ } ++ ++ *newptr = tmp; ++ return 0; ++ ++err_out: ++ return -1; ++} ++ ++char *safe_strdup(const char *src) ++{ ++ char *dst = NULL; ++ ++ if (src == NULL) { ++ return NULL; ++ } ++ ++ dst = strdup(src); ++ if (dst == NULL) { ++ abort(); ++ } ++ ++ return dst; ++} ++ ++int lxc_open(const char *filename, int flags, mode_t mode) ++{ ++ char rpath[PATH_MAX] = {0x00}; ++ ++ if (cleanpath(filename, rpath, sizeof(rpath)) == NULL) { ++ return -1; ++ } ++ if (mode) { ++ return open(rpath, (int)((unsigned int)flags | O_CLOEXEC), mode); ++ } else { ++ return open(rpath, (int)((unsigned int)flags | O_CLOEXEC)); ++ } ++} ++ ++FILE *lxc_fopen(const char *filename, const char *mode) ++{ ++ char rpath[PATH_MAX] = {0x00}; ++ ++ if (cleanpath(filename, rpath, sizeof(rpath)) == NULL) { ++ return NULL; ++ } ++ ++ return fopen_cloexec(rpath, mode); ++} ++ ++/* isulad: write error message */ ++void lxc_write_error_message(int errfd, const char *format, ...) ++{ ++ int ret; ++ char errbuf[BUFSIZ + 1] = {0}; ++ ssize_t sret; ++ va_list argp; ++ ++ if (errfd <= 0) ++ return; ++ ++ va_start(argp, format); ++#pragma GCC diagnostic push ++#pragma GCC diagnostic ignored "-Wformat-nonliteral" ++ ret = vsnprintf(errbuf, BUFSIZ, format, argp); ++#pragma GCC diagnostic pop ++ va_end(argp); ++ if (ret < 0 || ret >= BUFSIZ) ++ SYSERROR("Failed to call vsnprintf"); ++ sret = write(errfd, errbuf, strlen(errbuf)); ++ if (sret < 0) ++ SYSERROR("Write errbuf failed"); ++} ++ ++/* isulad: read file to buffer */ ++int lxc_file2str(const char *filename, char ret[], int cap) ++{ ++ int fd, num_read; ++ ++ if ((fd = lxc_open(filename, O_RDONLY | O_CLOEXEC, 0)) == -1) ++ return -1; ++ if ((num_read = read(fd, ret, cap - 1)) <= 0) ++ num_read = -1; ++ else ++ ret[num_read] = 0; ++ close(fd); ++ ++ return num_read; ++} ++ ++/* isuald: lxc_stat2proc() makes sure it can handle arbitrary executable file basenames ++ * for `cmd', i.e. those with embedded whitespace or embedded ')'s. ++ * Such names confuse %s (see scanf(3)), so the string is split and %39c ++ * is used instead. (except for embedded ')' "(%[^)]c)" would work. ++ */ ++static proc_t *lxc_stat2proc(const char *S) ++{ ++ int num; ++ proc_t *P = NULL; ++ char *tmp = NULL; ++ ++ if (!S) ++ return NULL; ++ ++ tmp = strrchr(S, ')'); /* split into "PID (cmd" and "" */ ++ if (!tmp) ++ return NULL; ++ *tmp = '\0'; /* replace trailing ')' with NUL */ ++ ++ P = malloc(sizeof(proc_t)); ++ if (P == NULL) ++ return NULL; ++ (void)memset(P, 0x00, sizeof(proc_t)); ++ ++ /* parse these two strings separately, skipping the leading "(". */ ++ num = sscanf(S, "%d (%15c", &P->pid, P->cmd); /* comm[16] in kernel */ ++ if (num != 2) { ++ ERROR("Call sscanf error: %s", errno ? strerror(errno) : ""); ++ free(P); ++ return NULL; ++ } ++ num = sscanf(tmp + 2, /* skip space after ')' too */ ++ "%c " ++ "%d %d %d %d %d " ++ "%lu %lu %lu %lu %lu " ++ "%Lu %Lu %Lu %Lu " /* utime stime cutime cstime */ ++ "%ld %ld %ld %ld " ++ "%Lu " /* start_time */ ++ "%lu " ++ "%ld " ++ "%lu %lu %lu %lu %lu %lu " ++ "%*s %*s %*s %*s " /* discard, no RT signals & Linux 2.1 used hex */ ++ "%lu %lu %lu " ++ "%d %d " ++ "%lu %lu", ++ &P->state, ++ &P->ppid, &P->pgrp, &P->session, &P->tty, &P->tpgid, ++ &P->flags, &P->min_flt, &P->cmin_flt, &P->maj_flt, &P->cmaj_flt, ++ &P->utime, &P->stime, &P->cutime, &P->cstime, ++ &P->priority, &P->nice, &P->timeout, &P->it_real_value, ++ &P->start_time, ++ &P->vsize, ++ &P->rss, ++ &P->rss_rlim, &P->start_code, &P->end_code, &P->start_stack, &P->kstk_esp, ++ &P->kstk_eip, ++ &P->wchan, &P->nswap, &P->cnswap, ++ &P->exit_signal, &P->processor, /* 2.2.1 ends with "exit_signal" */ ++ &P->rtprio, &P->sched /* both added to 2.5.18 */ ++ ); ++ if (num != 35) { ++ ERROR("Call sscanf error: %s", errno ? strerror(errno) : ""); ++ free(P); ++ return NULL; ++ } ++ if (P->tty == 0) ++ P->tty = -1; /* the old notty val, update elsewhere bef. moving to 0 */ ++ return P; ++} ++ ++/* isulad: get starttime of process pid */ ++unsigned long long lxc_get_process_startat(pid_t pid) ++{ ++ int sret = 0; ++ unsigned long long startat = 0; ++ proc_t *pid_info = NULL; ++ char filename[PATH_MAX] = {0}; ++ char sbuf[1024] = {0}; /* bufs for stat */ ++ ++ sret = snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); ++ if (sret < 0 || sret >= sizeof(filename)) { ++ ERROR("Failed to sprintf filename"); ++ goto out; ++ } ++ ++ if ((lxc_file2str(filename, sbuf, sizeof(sbuf))) == -1) { ++ SYSERROR("Failed to read pidfile %s", filename); ++ goto out; ++ } ++ ++ pid_info = lxc_stat2proc(sbuf); ++ if (!pid_info) { ++ ERROR("Failed to get proc stat info"); ++ goto out; ++ } ++ ++ startat = pid_info->start_time; ++out: ++ free(pid_info); ++ return startat; ++} ++ ++// isulad: set env home in container ++int lxc_setup_env_home(uid_t uid) ++{ ++ char *homedir = "/"; // default home dir is / ++ struct passwd pw, *pwbufp = NULL; ++ char buf[BUFSIZ]; ++ int ret; ++ ++ ret = getpwuid_r(uid, &pw, buf, sizeof(buf), &pwbufp); ++ if ((ret == 0) && (pwbufp != NULL) && (pwbufp->pw_uid == uid)) { ++ homedir = pwbufp->pw_dir; ++ goto set_env; ++ } ++ ++ WARN("User invalid, can not find user '%u'", uid); ++ ++set_env: ++ // if we didn't configure HOME, set it based on uid ++ if (setenv("HOME", homedir, 0) < 0) { ++ SYSERROR("Unable to set env 'HOME'"); ++ return -1; ++ } ++ ++ NOTICE("Setted env 'HOME' to %s", homedir); ++ return 0; ++} ++ ++bool lxc_process_alive(pid_t pid, unsigned long long start_time) ++{ ++ int sret = 0; ++ bool alive = true; ++ proc_t *pid_info = NULL; ++ char filename[PATH_MAX] = {0}; ++ char sbuf[1024] = {0}; /* bufs for stat */ ++ ++ sret = kill(pid, 0); ++ if (sret < 0 && errno == ESRCH) ++ return false; ++ ++ sret = snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); ++ if (sret < 0 || sret >= sizeof(filename)) { ++ ERROR("Failed to sprintf filename"); ++ goto out; ++ } ++ ++ if ((lxc_file2str(filename, sbuf, sizeof(sbuf))) == -1) { ++ ERROR("Failed to read pidfile %s", filename); ++ alive = false; ++ goto out; ++ } ++ ++ pid_info = lxc_stat2proc(sbuf); ++ if (!pid_info) { ++ ERROR("Failed to get proc stat info"); ++ alive = false; ++ goto out; ++ } ++ ++ if (start_time != pid_info->start_time) ++ alive = false; ++out: ++ free(pid_info); ++ return alive; ++} ++ ++bool is_non_negative_num(const char *s) ++{ ++ if (!s || !strcmp(s, "")) ++ return false; ++ while(*s != '\0') { ++ if(!isdigit(*s)) ++ return false; ++ ++s; ++ } ++ return true; ++} +diff --git a/src/lxc/isulad_utils.h b/src/lxc/isulad_utils.h +new file mode 100644 +index 0000000..8e7adb1 +--- /dev/null ++++ b/src/lxc/isulad_utils.h +@@ -0,0 +1,30 @@ ++/* SPDX-License-Identifier: LGPL-2.1+ */ ++/****************************************************************************** ++ * Copyright (c) Huawei Technologies Co., Ltd. 2020. Allrights reserved ++ * Description: isulad utils ++ * Author: lifeng ++ * Create: 2020-04-11 ++******************************************************************************/ ++#ifndef __iSULAD_UTILS_H ++#define __iSULAD_UTILS_H ++ ++#include ++ ++extern int lxc_mem_realloc(void **newptr, size_t newsize, void *oldptr, size_t oldsize); ++extern void *lxc_common_calloc_s(size_t size); ++extern char *safe_strdup(const char *src); ++ ++extern int lxc_open(const char *filename, int flags, mode_t mode); ++extern FILE *lxc_fopen(const char *filename, const char *mode); ++ ++extern void lxc_write_error_message(int errfd, const char *format, ...); ++extern int lxc_file2str(const char *filename, char ret[], int cap); ++extern int unsigned long long lxc_get_process_startat(pid_t pid); ++// set env home in container ++extern int lxc_setup_env_home(uid_t uid); ++ ++extern bool lxc_process_alive(pid_t pid, unsigned long long start_time); ++ ++extern bool is_non_negative_num(const char *s); ++ ++#endif +diff --git a/src/lxc/lsm/lsm.c b/src/lxc/lsm/lsm.c +index 553e0c9..2f87dd6 100644 +--- a/src/lxc/lsm/lsm.c ++++ b/src/lxc/lsm/lsm.c +@@ -168,6 +168,26 @@ int lsm_process_label_set(const char *label, struct lxc_conf *conf, + return drv->process_label_set(label, conf, on_exec); + } + ++#ifdef HAVE_ISULAD ++int lsm_file_label_set(const char *path, const char *label) ++{ ++ if (!drv) { ++ ERROR("LSM driver not inited"); ++ return -1; ++ } ++ return drv->file_label_set(path, label); ++} ++ ++int lsm_relabel(const char *path, const char *label, bool share) ++{ ++ if (!drv) { ++ ERROR("LSM driver not inited"); ++ return -1; ++ } ++ return drv->relabel(path, label, share); ++} ++#endif ++ + int lsm_process_prepare(struct lxc_conf *conf, const char *lxcpath) + { + if (!drv) { +diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h +index 630eff0..fb57083 100644 +--- a/src/lxc/lxc.h ++++ b/src/lxc/lxc.h +@@ -32,8 +32,14 @@ struct lxc_handler; + * @daemonize : whether or not the container is daemonized + * Returns 0 on success, < 0 otherwise + */ ++#ifdef HAVE_ISULAD ++extern int lxc_start(char *const argv[], struct lxc_handler *handler, ++ const char *lxcpath, bool daemonize, int *error_num, ++ unsigned int start_timeout); ++#else + extern int lxc_start(char *const argv[], struct lxc_handler *handler, + const char *lxcpath, bool daemonize, int *error_num); ++#endif + + /* + * Start the specified command inside an application container +@@ -44,9 +50,15 @@ extern int lxc_start(char *const argv[], struct lxc_handler *handler, + * @daemonize : whether or not the container is daemonized + * Returns 0 on success, < 0 otherwise + */ ++#ifdef HAVE_ISULAD ++extern int lxc_execute(const char *name, char *const argv[], int quiet, ++ struct lxc_handler *handler, const char *lxcpath, ++ bool daemonize, int *error_num, unsigned int start_timeout); ++#else + extern int lxc_execute(const char *name, char *const argv[], int quiet, + struct lxc_handler *handler, const char *lxcpath, + bool daemonize, int *error_num); ++#endif + + /* + * Close the fd associated with the monitoring +@@ -83,6 +95,13 @@ extern lxc_state_t lxc_state(const char *name, const char *lxcpath); + */ + extern struct lxc_container *lxc_container_new(const char *name, const char *configpath); + ++#ifdef HAVE_ISULAD ++/* ++ * Create a new container without loading config. ++ */ ++extern struct lxc_container *lxc_container_without_config_new(const char *name, const char *configpath); ++#endif ++ + /* + * Returns 1 on success, 0 on failure. + */ +diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h +index b4ec1d6..3680ade 100644 +--- a/src/lxc/lxccontainer.h ++++ b/src/lxc/lxccontainer.h +@@ -26,6 +26,10 @@ extern "C" { + #define LXC_CREATE_MAXFLAGS (1 << 1) /*!< Number of \c LXC_CREATE* flags */ + #define LXC_MOUNT_API_V1 1 + ++#ifdef HAVE_ISULAD ++#define LXC_IMAGE_OCI_KEY "lxc.imagetype.oci" ++#endif ++ + struct bdev_specs; + + struct lxc_snapshot; +@@ -40,6 +44,41 @@ struct lxc_mount { + int version; + }; + ++#ifdef HAVE_ISULAD ++struct lxc_blkio_metrics { ++ uint64_t read; ++ uint64_t write; ++ uint64_t total; ++}; ++ ++struct lxc_container_metrics { ++ /* State of container */ ++ const char *state; ++ /* The process ID of the init container */ ++ pid_t init; ++ /* Current pids */ ++ uint64_t pids_current; ++ /* CPU usage */ ++ uint64_t cpu_use_nanos; ++ uint64_t cpu_use_user; ++ uint64_t cpu_use_sys; ++ /* BlkIO usage */ ++ struct lxc_blkio_metrics io_service_bytes; ++ struct lxc_blkio_metrics io_serviced; ++ /* Memory usage */ ++ uint64_t mem_used; ++ uint64_t mem_limit; ++ /* Kernel Memory usage */ ++ uint64_t kmem_used; ++ uint64_t kmem_limit; ++ /* Cache usage */ ++ uint64_t cache; ++ uint64_t cache_total; ++ /* total inactive file */ ++ uint64_t inactive_file_total; ++}; ++#endif ++ + /*! + * An LXC container. + * +@@ -107,6 +146,38 @@ struct lxc_container { + /*! Full path to configuration file */ + char *config_path; + ++#ifdef HAVE_ISULAD ++ /*! isulad: ++ * \private ++ * exit FIFO File to open used monitor the state of lxc monitor process. ++ */ ++ char *exit_fifo; ++ /*! Whether container wishes to create pty or pipes for console log */ ++ bool disable_pty; ++ ++ /*! Whether container wishes to keep stdin active */ ++ bool open_stdin; ++ ++ /*! ++ * \private ++ * isulad: support oci hook from json file ++ * full path of json file ++ * */ ++ char *ocihookfile; ++ ++ /*! isulad: ++ * \private ++ * start_timeout. ++ */ ++ unsigned int start_timeout; ++ ++ /*! isulad: ++ * \private ++ * image_type_oci ++ */ ++ bool image_type_oci; ++#endif ++ + /*! + * \brief Determine if \c /var/lib/lxc/$name/config exists. + * +@@ -865,6 +936,115 @@ struct lxc_container { + * \return pidfd of init process of the container. + */ + int (*init_pidfd)(struct lxc_container *c); ++ ++#ifdef HAVE_ISULAD ++ /*! isulad add ++ * \brief An API call to set the path of info file ++ * ++ * \param c Container. ++ * \param info_file Value of the path of info file. ++ * ++ * \return \c true on success, else \c false. ++ */ ++ bool (*set_container_info_file) (struct lxc_container *c, const char *info_file); ++ ++ /*! isulad add ++ * \brief An API call to change the path of the console default fifos ++ * ++ * \param c Container. ++ * \param path Value of the console path. ++ * ++ * \return \c true on success, else \c false. ++ */ ++ bool (*set_terminal_init_fifos)(struct lxc_container *c, const char *in, const char *out, const char *err); ++ ++ /*! isulad add ++ * \brief An API call to add the path of terminal fifos ++ * ++ * \param c Container. ++ * \param path Value of the console path.. ++ * ++ * \return \c true on success, else \c false. ++ */ ++ bool (*add_terminal_fifos)(struct lxc_container *c, const char *in, const char *out, const char *err); ++ ++ bool (*set_terminal_winch)(struct lxc_container *c, unsigned int height, unsigned int width); ++ ++ bool (*set_exec_terminal_winch)(struct lxc_container *c, const char *suffix, unsigned int height, unsigned int width); ++ ++ /*! ++ * \brief Change whether the container wants to create pty or pipes ++ * from the console log. ++ * ++ * \param c Container. ++ * \param state Value for the disable pty bit (0 or 1). ++ * ++ * \return \c true on success, else \c false. ++ */ ++ bool (*want_disable_pty)(struct lxc_container *c, bool state); ++ ++ /*! ++ * \brief Change whether the container wants to keep stdin active ++ * for parent process of container ++ * ++ * \param c Container. ++ * \param state Value for the open_stdin bit (0 or 1). ++ * ++ * \return \c true on success, else \c false. ++ */ ++ bool (*want_open_stdin)(struct lxc_container *c, bool state); ++ ++ /*! isulad add ++ * \brief An API call to clean resources of container ++ * ++ * \param c Container. ++ * \param pid Value of container process. ++ * ++ * \return \c true on success, else \c false. ++ */ ++ bool (*clean_container_resource) (struct lxc_container *c, pid_t pid); ++ ++ /*! isulad add ++ * \brief An API call to get container pids ++ * ++ * \param c Container. ++ * \param pids Value of container pids. ++ * \param pids_len Value of container pids len. ++ * \param pid Value of container pid. ++ * \return \c true on success, else \c false. ++ */ ++ bool (*get_container_pids)(struct lxc_container *c,pid_t **pids,size_t *pids_len); ++ ++ /*! isulad add ++ * \brief An API call to set start timeout ++ * ++ * \param c Container. ++ * \param start_timeout Value of start timeout. ++ * ++ * \return \c true on success, else \c false. ++ */ ++ bool (*set_start_timeout)(struct lxc_container *c, unsigned int start_timeout); ++ ++ /*! isulad add ++ * \brief An API call to set oci type ++ * ++ * \param c Container. ++ * \param image_type_oci image oci type. ++ * ++ * \return \c true on success, else \c false. ++ */ ++ bool (*set_oci_type)(struct lxc_container *c, bool image_type_oci); ++ ++ /*! isulad add ++ * \brief An API call to set start timeout ++ * ++ * \param c Container. ++ * \param start_timeout Value of start timeout. ++ * ++ * \return \c true on success, else \c false. ++ */ ++ bool (*get_container_metrics)(struct lxc_container *c, struct lxc_container_metrics *metrics); ++#endif + }; + + /*! +@@ -998,6 +1178,20 @@ struct lxc_console_log { + */ + struct lxc_container *lxc_container_new(const char *name, const char *configpath); + ++#ifdef HAVE_ISULAD ++/*! ++ * \brief Create a new container without loading config. ++ * ++ * \param name Name to use for container. ++ * \param configpath Full path to configuration file to use. ++ * ++ * \return Newly-allocated container, or \c NULL on error. ++ * ++ * \note This function can only used for listing container. ++ */ ++struct lxc_container *lxc_container_without_config_new(const char *name, const char *configpath); ++#endif ++ + /*! + * \brief Add a reference to the specified container. + * +diff --git a/src/lxc/tools/lxc_ls.c b/src/lxc/tools/lxc_ls.c +index 0abcd7a..7c0b69c 100644 +--- a/src/lxc/tools/lxc_ls.c ++++ b/src/lxc/tools/lxc_ls.c +@@ -106,7 +106,11 @@ struct wrapargs { + /* + * Takes struct wrapargs as argument. + */ ++#ifdef HAVE_ISULAD ++static int ls_get_wrapper(void *wrap, int msgfd); ++#else + static int ls_get_wrapper(void *wrap); ++#endif + + /* + * To calculate swap usage we should not simply check memory.usage_in_bytes and +@@ -1005,7 +1009,11 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg) + return 0; + } + ++#ifdef HAVE_ISULAD ++static int ls_get_wrapper(void *wrap, int msgfd) ++#else + static int ls_get_wrapper(void *wrap) ++#endif + { + int ret = -1; + size_t len = 0; +diff --git a/src/lxc/tools/lxc_start.c b/src/lxc/tools/lxc_start.c +index 459b867..3ef5961 100644 +--- a/src/lxc/tools/lxc_start.c ++++ b/src/lxc/tools/lxc_start.c +@@ -28,6 +28,11 @@ + #include "confile.h" + #include "log.h" + ++#ifdef HAVE_ISULAD ++#include ++#include "isulad_utils.h" ++#endif ++ + lxc_log_define(lxc_start, lxc); + + static int my_parser(struct lxc_arguments *args, int c, char *arg); +@@ -48,6 +53,16 @@ static const struct option my_longopts[] = { + {"share-ipc", required_argument, 0, OPT_SHARE_IPC}, + {"share-uts", required_argument, 0, OPT_SHARE_UTS}, + {"share-pid", required_argument, 0, OPT_SHARE_PID}, ++#ifdef HAVE_ISULAD ++ {"in-fifo", required_argument, 0, OPT_INPUT_FIFO}, ++ {"out-fifo", required_argument, 0, OPT_OUTPUT_FIFO}, ++ {"err-fifo", required_argument, 0, OPT_STDERR_FIFO}, ++ {"container-pidfile", required_argument, 0, OPT_CONTAINER_INFO}, ++ {"exit-fifo", required_argument, 0, OPT_EXIT_FIFO}, ++ {"start-timeout", required_argument, 0, OPT_START_TIMEOUT}, ++ {"disable-pty", no_argument, 0, OPT_DISABLE_PTY}, ++ {"open-stdin", no_argument, 0, OPT_OPEN_STDIN}, ++#endif + LXC_COMMON_OPTIONS + }; + +@@ -70,7 +85,20 @@ Options :\n\ + Note: --daemon implies --close-all-fds\n\ + -s, --define KEY=VAL Assign VAL to configuration variable KEY\n\ + --share-[net|ipc|uts|pid]=NAME Share a namespace with another container or pid\n\ +-", ++" ++#ifdef HAVE_ISULAD ++"\ ++ --in-fifo Stdin fifo path\n\ ++ --out-fifo Stdout fifo path\n\ ++ --err-fifo Stderr fifo path\n\ ++ --container-pidfile File path for container pid\n\ ++ --exit-fifo Fifo path to save exit code\n\ ++ --start-timeout Timeout for start container\n\ ++ --disable-pty Disable pty for attach\n\ ++ --open-stdin Open stdin for attach\n\ ++" ++#endif ++, + .options = my_longopts, + .parser = my_parser, + .checker = NULL, +@@ -118,6 +146,38 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg) + case OPT_SHARE_PID: + args->share_ns[LXC_NS_PID] = arg; + break; ++ ++#ifdef HAVE_ISULAD ++ case OPT_CONTAINER_INFO: ++ args->container_info = arg; ++ break; ++ case OPT_INPUT_FIFO: ++ args->terminal_fifos[0] = arg; ++ break; ++ case OPT_OUTPUT_FIFO: ++ args->terminal_fifos[1] = arg; ++ break; ++ case OPT_STDERR_FIFO: ++ args->terminal_fifos[2] = arg; ++ break; ++ case OPT_EXIT_FIFO: ++ args->exit_monitor_fifo = arg; ++ break; ++ case OPT_DISABLE_PTY: ++ args->disable_pty = 1; ++ break; ++ case OPT_OPEN_STDIN: ++ args->open_stdin = 1; ++ break; ++ case OPT_START_TIMEOUT: ++ if(!is_non_negative_num(arg)) { ++ fprintf(stderr, "Error start timeout parameter:%s.\n", arg); ++ return -1; ++ } ++ args->start_timeout = (unsigned int)atoi(arg); ++ break; ++#endif ++ + } + return 0; + } +@@ -163,6 +223,9 @@ int main(int argc, char *argv[]) + "/sbin/init", + NULL, + }; ++#ifdef HAVE_ISULAD ++ char *container_info_file = NULL; ++#endif + + lxc_list_init(&defines); + +@@ -283,6 +346,42 @@ int main(int argc, char *argv[]) + goto out; + } + ++#ifdef HAVE_ISULAD ++ /* isulad: container info file used to store pid and ppid info of container*/ ++ if (my_args.container_info != NULL) { ++ if (ensure_path(&container_info_file, my_args.container_info) < 0) { ++ ERROR("Failed to ensure container's piddile '%s'", my_args.container_info); ++ goto out; ++ } ++ if (!c->set_container_info_file(c, container_info_file)) { ++ ERROR("Failed to set container's piddile '%s'", container_info_file); ++ goto out; ++ } ++ } ++ ++ if (my_args.terminal_fifos[0] || my_args.terminal_fifos[1] || my_args.terminal_fifos[2]) { ++ c->set_terminal_init_fifos(c, my_args.terminal_fifos[0], my_args.terminal_fifos[1], my_args.terminal_fifos[2]); ++ } ++ ++ /* isulad: fifo used to monitor state of monitor process */ ++ if (my_args.exit_monitor_fifo != NULL) { ++ c->exit_fifo = safe_strdup(my_args.exit_monitor_fifo); ++ } ++ ++ if (my_args.disable_pty) { ++ c->want_disable_pty(c, true); ++ } ++ ++ if (my_args.open_stdin) { ++ c->want_open_stdin(c, true); ++ } ++ ++ /* isulad: add start timeout */ ++ if(my_args.start_timeout) { ++ c->set_start_timeout(c, my_args.start_timeout); ++ } ++#endif ++ + if (my_args.console) + if (!c->set_config_item(c, "lxc.console.path", my_args.console)) + goto out; +@@ -305,6 +404,11 @@ int main(int argc, char *argv[]) + else + err = c->start(c, 0, args) ? EXIT_SUCCESS : EXIT_FAILURE; + if (err) { ++#ifdef HAVE_ISULAD ++ if (c->lxc_conf->errmsg) ++ fprintf(stderr, "%s:%s:%s:%d starting container process caused \"%s\"", c->name, ++ __FILE__, __func__, __LINE__, c->lxc_conf->errmsg); ++#endif + ERROR("The container failed to start"); + + if (my_args.daemonize) +@@ -320,5 +424,8 @@ int main(int argc, char *argv[]) + + out: + lxc_container_put(c); ++#ifdef HAVE_ISULAD ++ free(container_info_file); ++#endif + exit(err); + } +diff --git a/src/lxc/utils.c b/src/lxc/utils.c +index 88d0f85..f038dc5 100644 +--- a/src/lxc/utils.c ++++ b/src/lxc/utils.c +@@ -27,6 +27,10 @@ + #include + #include + #include ++#ifdef HAVE_ISULAD ++#include ++#include ++#endif + + #include "config.h" + #include "log.h" +@@ -71,6 +75,9 @@ static int _recursive_rmdir(const char *dirname, dev_t pdev, + int ret; + struct dirent *direntp; + char pathname[PATH_MAX]; ++#ifdef HAVE_ISULAD ++ int saved_errno = 0; ++#endif + + dir = opendir(dirname); + if (!dir) +@@ -133,6 +140,11 @@ static int _recursive_rmdir(const char *dirname, dev_t pdev, + } else { + ret = unlink(pathname); + if (ret < 0) { ++#ifdef HAVE_ISULAD ++ if (saved_errno == 0) { ++ saved_errno = errno; ++ } ++#endif + __do_close int fd = -EBADF; + + fd = open(pathname, O_RDONLY | O_CLOEXEC | O_NONBLOCK); +@@ -158,10 +170,18 @@ static int _recursive_rmdir(const char *dirname, dev_t pdev, + } + + if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) { ++#ifdef HAVE_ISULAD ++ if (saved_errno == 0) { ++ saved_errno = errno; ++ } ++#endif + SYSERROR("Failed to delete \"%s\"", dirname); + failed = 1; + } + ++#ifdef HAVE_ISULAD ++ errno = saved_errno; ++#endif + return failed ? -1 : 0; + } + +@@ -1008,7 +1028,11 @@ static int open_if_safe(int dirfd, const char *nextpath) + * + * Return an open fd for the path, or <0 on error. + */ ++#ifdef HAVE_ISULAD ++int open_without_symlink(const char *target, const char *prefix_skip) ++#else + static int open_without_symlink(const char *target, const char *prefix_skip) ++#endif + { + int curlen = 0, dirfd, fulllen, i; + char *dup; +@@ -1079,6 +1103,65 @@ out: + return dirfd; + } + ++#ifdef HAVE_ISULAD ++static int format_mount_label(const char *data, const char *mount_label, char **mnt_opts) ++{ ++ int ret = 0; ++ ++ if (mount_label != NULL) { ++ if (data != NULL) { ++ ret = asprintf(mnt_opts, "%s,context=\"%s\"", data, mount_label); ++ } else { ++ ret = asprintf(mnt_opts, "context=\"%s\"", mount_label); ++ } ++ ++ return ret < 0 ? -1 : 0; ++ } ++ ++ *mnt_opts = data != NULL ? strdup(data) : NULL; ++ return 0; ++} ++ ++static int receive_mount_options(const char *data, const char *mount_label, ++ const char *fstype, char **mnt_opts) ++{ ++ // SELinux kernels don't support labeling of /proc or /sys ++ if (fstype != NULL && (strcmp(fstype, "proc") == 0 || strcmp(fstype, "sysfs") == 0)) { ++ return format_mount_label(data, NULL, mnt_opts); ++ } ++ ++ return format_mount_label(data, mount_label, mnt_opts); ++} ++ ++static int relabel_bind_mount_source(const char *src, const char *fstype, const char *data, const char *mount_label) ++{ ++ __do_free_string_list char **parts = NULL; ++ ssize_t parts_len; ++ ssize_t i; ++ ++ if (data == NULL) { ++ return lsm_relabel(src, mount_label, false); ++ } ++ ++ parts = lxc_string_split(data, ','); ++ if (parts == NULL) { ++ return -1; ++ } ++ ++ parts_len = lxc_array_len((void **)parts); ++ for (i = 0; i < parts_len; i++) { ++ if (strcmp(parts[i], "z") == 0) { ++ return lsm_relabel(src, mount_label, true); ++ } else if (strcmp(parts[i], "Z") == 0) { ++ return lsm_relabel(src, mount_label, false); ++ } ++ } ++ ++ return lsm_relabel(src, mount_label, false); ++} ++ ++#endif ++ + /* + * Safely mount a path into a container, ensuring that the mount target + * is under the container's @rootfs. (If @rootfs is NULL, then the container +@@ -1087,14 +1170,22 @@ out: + * CAVEAT: This function must not be used for other purposes than container + * setup before executing the container's init + */ ++#ifdef HAVE_ISULAD ++int safe_mount(const char *src, const char *dest, const char *fstype, ++ unsigned long flags, const void *data, const char *rootfs, const char *mount_label) ++#else + int safe_mount(const char *src, const char *dest, const char *fstype, + unsigned long flags, const void *data, const char *rootfs) ++#endif + { + int destfd, ret, saved_errno; + /* Only needs enough for /proc/self/fd/. */ + char srcbuf[50], destbuf[50]; + int srcfd = -1; + const char *mntsrc = src; ++#ifdef HAVE_ISULAD ++ __do_free char *mnt_opts = NULL; ++#endif + + if (!rootfs) + rootfs = ""; +@@ -1137,8 +1228,23 @@ int safe_mount(const char *src, const char *dest, const char *fstype, + return -EINVAL; + } + ++#ifdef HAVE_ISULAD ++ if (receive_mount_options(data, mount_label, fstype, &mnt_opts) != 0) { ++ ERROR("Failed to receive mount options"); ++ return -EINVAL; ++ } ++ ++ ret = mount(mntsrc, destbuf, fstype, flags, mnt_opts); ++ saved_errno = errno; ++ if (ret < 0 && fstype != NULL && strcmp(fstype, "mqueue") == 0) { ++ INFO("older kernels don't support labeling of /dev/mqueue, retry without selinux context"); ++ ret = mount(mntsrc, destbuf, fstype, flags, data); ++ saved_errno = errno; ++ } ++#else + ret = mount(mntsrc, destbuf, fstype, flags, data); + saved_errno = errno; ++#endif + if (srcfd != -1) + close(srcfd); + +@@ -1149,6 +1255,19 @@ int safe_mount(const char *src, const char *dest, const char *fstype, + return ret; + } + ++#ifdef HAVE_ISULAD ++ if (fstype != NULL && strcmp(fstype, "mqueue") == 0 && lsm_file_label_set(dest, mount_label) != 0) { ++ ERROR("Failed to set file label on %s", dest); ++ return -EINVAL; ++ } ++ ++ if (fstype != NULL && strcmp(fstype, "bind") == 0 && ++ relabel_bind_mount_source(src, fstype, (const char *)data, mount_label) != 0) { ++ ERROR("Failed to reabel %s with %s", src, mount_label); ++ return -EINVAL; ++ } ++#endif ++ + return 0; + } + +@@ -1215,7 +1334,11 @@ domount: + if (!strcmp(rootfs, "")) + ret = mount("proc", path, "proc", 0, NULL); + else ++#ifdef HAVE_ISULAD ++ ret = safe_mount("proc", path, "proc", 0, NULL, rootfs, NULL); ++#else + ret = safe_mount("proc", path, "proc", 0, NULL, rootfs); ++#endif + if (ret < 0) + return -1; + +@@ -1425,6 +1548,11 @@ static int lxc_get_unused_loop_dev(char *name_loop) + { + int loop_nr, ret; + int fd_ctl = -1, fd_tmp = -1; ++#ifdef HAVE_ISULAD ++ // isulad: retry and try mknod ++ int max_retry = 200; ++ bool try_mknod = true; ++#endif + + fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC); + if (fd_ctl < 0) { +@@ -1442,8 +1570,37 @@ static int lxc_get_unused_loop_dev(char *name_loop) + if (ret < 0 || ret >= LO_NAME_SIZE) + goto on_error; + ++#ifdef HAVE_ISULAD ++retry: ++#endif + fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC); + if (fd_tmp < 0) { ++#ifdef HAVE_ISULAD ++ /* Success of LOOP_CTL_GET_FREE doesn't mean /dev/loop$i is ready, ++ * we try to make node by ourself to avoid wait. */ ++ if (try_mknod) { ++ /* Do not check result of mknod because LOOP_CTL_GET_FREE ++ * alse do mknod, so this mknod may fail as node already ++ * exist. If we can open the node without error, we can ++ * say that it's be created successfully. ++ * ++ * note: 7 is the major device number of loopback devices ++ * in kernel. ++ */ ++ mknod(name_loop, S_IFBLK | 0640, makedev(7, loop_nr)); ++ try_mknod = false; ++ goto retry; ++ } ++ /* we need to wait some time to make sure it's ready for open if ++ * it can't open even if we have already try to make node by ourself. */ ++ if (max_retry > 0) { ++ max_retry--; ++ usleep(5000); /* 5 millisecond */ ++ goto retry; ++ } ++ SYSERROR("Failed to open loop \"%s\"", name_loop); ++ goto on_error; ++#else + /* on Android loop devices are moved under /dev/block, give it a shot */ + ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/block/loop%d", loop_nr); + if (ret < 0 || ret >= LO_NAME_SIZE) +@@ -1452,6 +1609,7 @@ static int lxc_get_unused_loop_dev(char *name_loop) + fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC); + if (fd_tmp < 0) + SYSERROR("Failed to open loop \"%s\"", name_loop); ++#endif + } + + on_error: +@@ -1661,6 +1819,7 @@ uint64_t lxc_find_next_power2(uint64_t n) + return n; + } + ++#ifndef HAVE_ISULAD + static int process_dead(/* takes */ int status_fd) + { + __do_close int dupfd = -EBADF; +@@ -1698,15 +1857,19 @@ static int process_dead(/* takes */ int status_fd) + + return ret; + } ++#endif + + int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd) + { + int ret; ++#ifndef HAVE_ISULAD + pid_t ppid; ++#endif + + ret = prctl(PR_SET_PDEATHSIG, prctl_arg(signal), prctl_arg(0), + prctl_arg(0), prctl_arg(0)); + ++#ifndef HAVE_ISULAD + /* verify that we haven't been orphaned in the meantime */ + ppid = (pid_t)syscall(SYS_getppid); + if (ppid == 0) { /* parent outside our pidns */ +@@ -1718,6 +1881,7 @@ int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd) + } else if (ppid != parent) { + return raise(SIGKILL); + } ++#endif + + if (ret < 0) + return -1; +@@ -1755,8 +1919,18 @@ int lxc_rm_rf(const char *dirname) + struct dirent *direntp; + + dir = opendir(dirname); ++#ifdef HAVE_ISULAD ++ if (!dir) { ++ if (errno == ENOENT) { ++ WARN("Destroy path: \"%s\" do not exist", dirname); ++ return 0; ++ } ++ return log_error_errno(-1, errno, "Failed to open dir \"%s\"", dirname); ++ } ++#else + if (!dir) + return log_error_errno(-1, errno, "Failed to open dir \"%s\"", dirname); ++#endif + + while ((direntp = readdir(dir))) { + __do_free char *pathname = NULL; +diff --git a/src/lxc/utils.h b/src/lxc/utils.h +index cf2c042..dea78c2 100644 +--- a/src/lxc/utils.h ++++ b/src/lxc/utils.h +@@ -28,6 +28,13 @@ + #include "process_utils.h" + #include "string_utils.h" + ++#ifdef HAVE_ISULAD ++#include "isulad_utils.h" ++ ++/* isulad: replace space with SPACE_MAGIC_STR */ ++#define SPACE_MAGIC_STR "[#)" ++#endif ++ + /* returns 1 on success, 0 if there were any failures */ + extern int lxc_rmdir_onedev(const char *path, const char *exclude); + extern int get_u16(unsigned short *val, const char *arg, int base); +@@ -41,6 +48,73 @@ extern char *get_rundir(void); + #endif + #endif + ++#ifdef HAVE_ISULAD ++/* isulad: ++ ld cutime, cstime, priority, nice, timeout, it_real_value, rss, ++ c state, ++ d ppid, pgrp, session, tty, tpgid, ++ s signal, blocked, sigignore, sigcatch, ++ lu flags, min_flt, cmin_flt, maj_flt, cmaj_flt, utime, stime, ++ lu rss_rlim, start_code, end_code, start_stack, kstk_esp, kstk_eip, ++ lu start_time, vsize, wchan, nswap, cnswap, ++*/ ++ ++/* Basic data structure which holds all information we can get about a process. ++ * (unless otherwise specified, fields are read from /proc/#/stat) ++ * ++ * Most of it comes from task_struct in linux/sched.h ++ */ ++typedef struct proc_t { ++ // 1st 16 bytes ++ int pid; /* process id */ ++ int ppid; /* pid of parent process */ ++ ++ char state; /* single-char code for process state (S=sleeping) */ ++ ++ unsigned long long ++ utime, /* user-mode CPU time accumulated by process */ ++ stime, /* kernel-mode CPU time accumulated by process */ ++ // and so on... ++ cutime, /* cumulative utime of process and reaped children */ ++ cstime, /* cumulative stime of process and reaped children */ ++ start_time; /* start time of process -- seconds since 1-1-70 */ ++ ++ long ++ priority, /* kernel scheduling priority */ ++ timeout, /* ? */ ++ nice, /* standard unix nice level of process */ ++ rss, /* resident set size from /proc/#/stat (pages) */ ++ it_real_value; /* ? */ ++ unsigned long ++ rtprio, /* real-time priority */ ++ sched, /* scheduling class */ ++ vsize, /* number of pages of virtual memory ... */ ++ rss_rlim, /* resident set size limit? */ ++ flags, /* kernel flags for the process */ ++ min_flt, /* number of minor page faults since process start */ ++ maj_flt, /* number of major page faults since process start */ ++ cmin_flt, /* cumulative min_flt of process and child processes */ ++ cmaj_flt, /* cumulative maj_flt of process and child processes */ ++ nswap, /* ? */ ++ cnswap, /* cumulative nswap ? */ ++ start_code, /* address of beginning of code segment */ ++ end_code, /* address of end of code segment */ ++ start_stack, /* address of the bottom of stack for the process */ ++ kstk_esp, /* kernel stack pointer */ ++ kstk_eip, /* kernel instruction pointer */ ++ wchan; /* address of kernel wait channel proc is sleeping in */ ++ ++ char cmd[16]; /* basename of executable file in call to exec(2) */ ++ int ++ pgrp, /* process group id */ ++ session, /* session id */ ++ tty, /* full device number of controlling terminal */ ++ tpgid, /* terminal process group id */ ++ exit_signal, /* might not be SIGCHLD */ ++ processor; /* current (or most recent?) CPU */ ++} proc_t; ++#endif ++ + static inline int lxc_set_cloexec(int fd) + { + return fcntl(fd, F_SETFD, FD_CLOEXEC); +@@ -145,9 +219,16 @@ extern bool cgns_supported(void); + extern char *choose_init(const char *rootfs); + extern bool switch_to_ns(pid_t pid, const char *ns); + extern char *get_template_path(const char *t); ++#ifdef HAVE_ISULAD ++extern int open_without_symlink(const char *target, const char *prefix_skip); ++extern int safe_mount(const char *src, const char *dest, const char *fstype, ++ unsigned long flags, const void *data, ++ const char *rootfs, const char *mount_label); ++#else + extern int safe_mount(const char *src, const char *dest, const char *fstype, + unsigned long flags, const void *data, + const char *rootfs); ++#endif + extern int lxc_mount_proc_if_needed(const char *rootfs); + extern int open_devnull(void); + extern int set_stdfds(int fd); +-- +2.25.1 + diff --git a/0002-add-mount-label-for-rootfs.patch b/0002-add-mount-label-for-rootfs.patch deleted file mode 100644 index 2617125..0000000 --- a/0002-add-mount-label-for-rootfs.patch +++ /dev/null @@ -1,1214 +0,0 @@ -From 0b8bc902c0c7acb54efb1fd4be5121dbf9a08598 Mon Sep 17 00:00:00 2001 -From: wujing -Date: Wed, 15 Jul 2020 16:09:35 +0800 -Subject: [PATCH 02/10] add mount label for rootfs - -Signed-off-by: wujing ---- - src/lxc/cgroups/cgfsng.c | 59 +++++----- - src/lxc/conf.c | 209 +++++++++++++++++++++++++++++++---- - src/lxc/conf.h | 25 +++-- - src/lxc/confile.c | 27 ++++- - src/lxc/lsm/lsm.c | 20 ++++ - src/lxc/lsm/lsm.h | 8 ++ - src/lxc/lsm/selinux.c | 227 +++++++++++++++++++++++++++++++++++++++ - src/lxc/utils.c | 70 ++++++++++++ - src/lxc/utils.h | 6 ++ - 9 files changed, 591 insertions(+), 60 deletions(-) - -diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c -index 4a0961f13..1ff3d9812 100644 ---- a/src/lxc/cgroups/cgfsng.c -+++ b/src/lxc/cgroups/cgfsng.c -@@ -2133,7 +2133,7 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, - } - ret = safe_mount(NULL, tmpfspath, "tmpfs", - MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, -- "size=10240k,mode=755", root); -+ "size=10240k,mode=755", root, handler->conf->lsm_se_mount_context); - if (ret < 0) - goto on_error; - -@@ -2244,37 +2244,42 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, - } - } - -- // isulad: remount /sys/fs/cgroup/systemd to readwrite for system container -- if (handler->conf->systemd != NULL && strcmp(handler->conf->systemd, "true") == 0) { -- // isulad: don't use the unified hierarchy for the systemd cgroup -- unifiedpath = must_make_path(root, "/sys/fs/cgroup/unified", NULL); -- if (dir_exists(unifiedpath)) { -- ret = umount2(unifiedpath, MNT_DETACH); -- if (ret < 0) { -- SYSERROR("Failed to umount /sys/fs/cgroup/unified."); -- goto on_error; -- } -- } -- -- systemdpath = must_make_path(root, "/sys/fs/cgroup/systemd", NULL); -- ret = mount(systemdpath, systemdpath, "bind", -- MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_BIND|MS_REMOUNT, NULL); -- if (ret < 0) { -- SYSERROR("Failed to remount /sys/fs/cgroup/systemd."); -- goto on_error; -- } -- } -+ // isulad: remount /sys/fs/cgroup/systemd to readwrite for system container -+ if (handler->conf->systemd != NULL && strcmp(handler->conf->systemd, "true") == 0) -+ { -+ unifiedpath = must_make_path(root, "/sys/fs/cgroup/unified", NULL); -+ if (dir_exists(unifiedpath)) -+ { -+ ret = umount2(unifiedpath, MNT_DETACH); -+ if (ret < 0) -+ { -+ SYSERROR("Failed to umount /sys/fs/cgroup/unified."); -+ goto on_error; -+ } -+ } -+ -+ systemdpath = must_make_path(root, "/sys/fs/cgroup/systemd", NULL); -+ ret = mount(systemdpath, systemdpath, "bind", -+ MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME | MS_BIND | MS_REMOUNT, NULL); -+ if (ret < 0) -+ { -+ SYSERROR("Failed to remount /sys/fs/cgroup/systemd."); -+ goto on_error; -+ } -+ } - - retval = true; - - on_error: - free(tmpfspath); -- if (systemdpath != NULL) { -- free(systemdpath); -- } -- if (unifiedpath != NULL) { -- free(unifiedpath); -- } -+ if (systemdpath != NULL) -+ { -+ free(systemdpath); -+ } -+ if (unifiedpath != NULL) -+ { -+ free(unifiedpath); -+ } - lxc_free_array((void **)merged, free); - return retval; - } -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 0744c19b3..7e4af0a95 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -699,9 +699,15 @@ static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_ha - - mflags = add_required_remount_flags(source, destination, - default_mounts[i].flags); -+#ifdef HAVE_ISULAD -+ r = safe_mount(source, destination, default_mounts[i].fstype, -+ mflags, default_mounts[i].options, -+ conf->rootfs.path ? conf->rootfs.mount : NULL, NULL); -+#else - r = safe_mount(source, destination, default_mounts[i].fstype, - mflags, default_mounts[i].options, - conf->rootfs.path ? conf->rootfs.mount : NULL); -+#endif - saved_errno = errno; - if (r < 0 && errno == ENOENT) { - INFO("Mount source or target for \"%s\" on \"%s\" does not exist. Skipping", source, destination); -@@ -1076,7 +1082,7 @@ on_error: - */ - #ifdef HAVE_ISULAD - static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, -- int autodevtmpfssize, const char *lxcpath, char *systemd) -+ int autodevtmpfssize, const char *lxcpath, char *systemd, const char *mount_label) - #else - static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, - int autodevtmpfssize, const char *lxcpath) -@@ -1118,7 +1124,7 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, - } - } else { - ret = safe_mount("none", path, "tmpfs", 0, mount_options, -- rootfs->path ? rootfs->mount : NULL); -+ rootfs->path ? rootfs->mount : NULL, mount_label); - if (ret < 0) { - SYSERROR("Failed to mount tmpfs on \"%s\"", path); - goto reset_umask; -@@ -1127,7 +1133,7 @@ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, - } - #else - ret = safe_mount("none", path, "tmpfs", 0, mount_options, -- rootfs->path ? rootfs->mount : NULL ); -+ rootfs->path ? rootfs->mount : NULL); - if (ret < 0) { - SYSERROR("Failed to mount tmpfs on \"%s\"", path); - goto reset_umask; -@@ -1183,8 +1189,11 @@ enum { - LXC_DEVNODE_PARTIAL, - LXC_DEVNODE_OPEN, - }; -- -+#ifdef HAVE_ISULAD -+static int lxc_fill_autodev(const struct lxc_rootfs *rootfs, const char *mount_label) -+#else - static int lxc_fill_autodev(const struct lxc_rootfs *rootfs) -+#endif - { - int i, ret; - char path[PATH_MAX]; -@@ -1260,9 +1269,13 @@ static int lxc_fill_autodev(const struct lxc_rootfs *rootfs) - ret = snprintf(hostpath, PATH_MAX, "/dev/%s", device->name); - if (ret < 0 || ret >= PATH_MAX) - return -1; -- -+#ifdef HAVE_ISULAD -+ ret = safe_mount(hostpath, path, 0, MS_BIND, NULL, -+ rootfs->path ? rootfs->mount : NULL, mount_label); -+#else - ret = safe_mount(hostpath, path, 0, MS_BIND, NULL, - rootfs->path ? rootfs->mount : NULL); -+#endif - if (ret < 0) - return log_error_errno(-1, errno, "Failed to bind mount host device node \"%s\" onto \"%s\"", - hostpath, path); -@@ -1818,17 +1831,34 @@ static int lxc_setup_devpts(struct lxc_conf *conf) - { - int ret; - char **opts; -+#ifdef HAVE_ISULAD -+ __do_free char *devpts_mntopts = NULL; -+#else - char devpts_mntopts[256]; -+#endif - char *mntopt_sets[5]; - char default_devpts_mntopts[256] = "gid=5,newinstance,ptmxmode=0666,mode=0620"; - - if (conf->pty_max <= 0) - return log_debug(0, "No new devpts instance will be mounted since no pts devices are requested"); - -+#ifdef HAVE_ISULAD -+ if (conf->lsm_se_mount_context != NULL) { -+ if (asprintf(&devpts_mntopts, "%s,max=%zu,context=\"%s\"", -+ default_devpts_mntopts, conf->pty_max, conf->lsm_se_mount_context) < 0) { -+ return -1; -+ } -+ } else { -+ if (asprintf(&devpts_mntopts, "%s,max=%zu", default_devpts_mntopts, conf->pty_max) < 0) { -+ return -1; -+ } -+ } -+#else - ret = snprintf(devpts_mntopts, sizeof(devpts_mntopts), "%s,max=%zu", - default_devpts_mntopts, conf->pty_max); - if (ret < 0 || (size_t)ret >= sizeof(devpts_mntopts)) - return -1; -+#endif - - (void)umount2("/dev/pts", MNT_DETACH); - -@@ -1917,9 +1947,13 @@ static int setup_personality(int persona) - - return 0; - } -- -+#ifdef HAVE_ISULAD -+static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs, -+ const struct lxc_terminal *console, const char *mount_label) -+#else - static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs, - const struct lxc_terminal *console) -+#endif - { - int ret; - char path[PATH_MAX]; -@@ -1956,8 +1990,12 @@ static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs, - ret = fchmod(console->slave, S_IXUSR | S_IXGRP); - if (ret < 0) - return log_error_errno(-errno, errno, "Failed to set mode \"0%o\" to \"%s\"", S_IXUSR | S_IXGRP, console->name); -- -+#ifdef HAVE_ISULAD -+ // ret = safe_mount(console->name, path, "none", MS_BIND, 0, rootfs_path, mount_label); -+ ret = safe_mount(console->name, path, "bind", MS_BIND, 0, rootfs_path, mount_label); -+#else - ret = safe_mount(console->name, path, "none", MS_BIND, 0, rootfs_path); -+#endif - if (ret < 0) - return log_error_errno(-1, errno, "Failed to mount \"%s\" on \"%s\"", console->name, path); - #ifdef HAVE_ISULAD -@@ -1967,9 +2005,15 @@ static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs, - return 0; - } - -+#ifdef HAVE_ISULAD -+static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs, -+ const struct lxc_terminal *console, -+ char *ttydir, const char *mount_label) -+#else - static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs, - const struct lxc_terminal *console, - char *ttydir) -+#endif - { - int ret; - char path[PATH_MAX], lxcpath[PATH_MAX]; -@@ -2020,7 +2064,11 @@ static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs, - return log_error_errno(-errno, errno, "Failed to set mode \"0%o\" to \"%s\"", S_IXUSR | S_IXGRP, console->name); - - /* bind mount console->name to '/dev//console' */ -+#ifdef HAVE_ISULAD -+ ret = safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs_path, mount_label); -+#else - ret = safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs_path); -+#endif - if (ret < 0) - return log_error_errno(-1, errno, "Failed to mount \"%s\" on \"%s\"", console->name, lxcpath); - DEBUG("Mounted \"%s\" onto \"%s\"", console->name, lxcpath); -@@ -2029,7 +2077,11 @@ static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs, - #endif - - /* bind mount '/dev//console' to '/dev/console' */ -+#ifdef HAVE_ISULAD -+ ret = safe_mount(lxcpath, path, "none", MS_BIND, 0, rootfs_path, mount_label); -+#else - ret = safe_mount(lxcpath, path, "none", MS_BIND, 0, rootfs_path); -+#endif - if (ret < 0) - return log_error_errno(-1, errno, "Failed to mount \"%s\" on \"%s\"", console->name, lxcpath); - DEBUG("Mounted \"%s\" onto \"%s\"", console->name, lxcpath); -@@ -2038,15 +2090,26 @@ static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs, - return 0; - } - -+#ifdef HAVE_ISULAD - static int lxc_setup_console(const struct lxc_rootfs *rootfs, -- const struct lxc_terminal *console, char *ttydir) -+ const struct lxc_terminal *console, char *ttydir, const char *mount_label) - { -+ if (!ttydir) -+ return lxc_setup_dev_console(rootfs, console, mount_label); - -+ return lxc_setup_ttydir_console(rootfs, console, ttydir, mount_label); -+} -+#else -+static int lxc_setup_console(const struct lxc_rootfs *rootfs, -+ const struct lxc_terminal *console, char *ttydir) -+{ - if (!ttydir) - return lxc_setup_dev_console(rootfs, console); - - return lxc_setup_ttydir_console(rootfs, console, ttydir); - } -+#endif -+ - #ifdef HAVE_ISULAD - static void parse_mntopt(char *opt, unsigned long *mflags, unsigned long *pflags, char **data, size_t size) - { -@@ -2226,10 +2289,17 @@ int parse_propagationopts(const char *mntopts, unsigned long *pflags) - return 0; - } - -+#ifdef HAVE_ISULAD -+static int mount_entry(const char *fsname, const char *target, -+ const char *fstype, unsigned long mountflags, -+ unsigned long pflags, const char *data, bool optional, -+ bool dev, bool relative, const char *rootfs, const char *mount_label) -+#else - static int mount_entry(const char *fsname, const char *target, - const char *fstype, unsigned long mountflags, - unsigned long pflags, const char *data, bool optional, - bool dev, bool relative, const char *rootfs) -+#endif - { - int ret; - char srcbuf[PATH_MAX]; -@@ -2245,8 +2315,13 @@ static int mount_entry(const char *fsname, const char *target, - srcpath = srcbuf; - } - -+#ifdef HAVE_ISULAD -+ ret = safe_mount(srcpath, target, fstype, mountflags & ~MS_REMOUNT, data, -+ rootfs, mount_label); -+#else - ret = safe_mount(srcpath, target, fstype, mountflags & ~MS_REMOUNT, data, - rootfs); -+#endif - if (ret < 0) { - if (optional) - return log_info_errno(0, errno, "Failed to mount \"%s\" on \"%s\" (optional)", -@@ -2579,11 +2654,20 @@ static int check_mount_destination(const char *rootfs, const char *dest) - - /* rootfs, lxc_name, and lxc_path can be NULL when the container is created - * without a rootfs. */ -+#ifdef HAVE_ISULAD -+static inline int mount_entry_on_generic(struct mntent *mntent, -+ const char *path, -+ const struct lxc_rootfs *rootfs, -+ const char *lxc_name, -+ const char *lxc_path, -+ const char *mount_label) -+#else - static inline int mount_entry_on_generic(struct mntent *mntent, - const char *path, - const struct lxc_rootfs *rootfs, - const char *lxc_name, - const char *lxc_path) -+#endif - { - __do_free char *mntdata = NULL; - unsigned long mntflags = 0, pflags = 0; -@@ -2655,7 +2739,7 @@ static inline int mount_entry_on_generic(struct mntent *mntent, - mntent->mnt_opts, rootfs_path); - } else { - ret = mount_entry(mntent->mnt_fsname, dest, mntent->mnt_type, mntflags, -- pflags, mntdata, optional, dev, relative, rootfs_path); -+ pflags, mntdata, optional, dev, relative, rootfs_path, mount_label); - } - - if (ret < 0) { -@@ -2695,13 +2779,25 @@ static inline int mount_entry_on_systemfs(struct mntent *mntent) - if (ret < 0 || ret >= sizeof(path)) - return -1; - -+#ifdef HAVE_ISULAD -+ return mount_entry_on_generic(mntent, path, NULL, NULL, NULL, NULL); -+#else - return mount_entry_on_generic(mntent, path, NULL, NULL, NULL); -+#endif - } - -+#ifdef HAVE_ISULAD -+static int mount_entry_on_absolute_rootfs(struct mntent *mntent, -+ const struct lxc_rootfs *rootfs, -+ const char *lxc_name, -+ const char *lxc_path, -+ const char *mount_label) -+#else - static int mount_entry_on_absolute_rootfs(struct mntent *mntent, - const struct lxc_rootfs *rootfs, - const char *lxc_name, - const char *lxc_path) -+#endif - { - int offset; - char *aux; -@@ -2736,14 +2832,25 @@ skipabs: - ret = snprintf(path, PATH_MAX, "%s/%s", rootfs->mount, aux + offset); - if (ret < 0 || ret >= PATH_MAX) - return -1; -- -+#ifdef HAVE_ISULAD -+ return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path, mount_label); -+#else - return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path); -+#endif - } - -+#ifdef HAVE_ISULAD -+static int mount_entry_on_relative_rootfs(struct mntent *mntent, -+ const struct lxc_rootfs *rootfs, -+ const char *lxc_name, -+ const char *lxc_path, -+ const char *mount_label) -+#else - static int mount_entry_on_relative_rootfs(struct mntent *mntent, - const struct lxc_rootfs *rootfs, - const char *lxc_name, - const char *lxc_path) -+#endif - { - int ret; - char path[PATH_MAX]; -@@ -2753,12 +2860,22 @@ static int mount_entry_on_relative_rootfs(struct mntent *mntent, - if (ret < 0 || (size_t)ret >= sizeof(path)) - return -1; - -+#ifdef HAVE_ISULAD -+ return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path, mount_label); -+#else - return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path); -+#endif - } - -+#ifdef HAVE_ISULAD -+static int mount_file_entries(const struct lxc_conf *conf, -+ const struct lxc_rootfs *rootfs, FILE *file, -+ const char *lxc_name, const char *lxc_path, const char *mount_label) -+#else - static int mount_file_entries(const struct lxc_conf *conf, - const struct lxc_rootfs *rootfs, FILE *file, - const char *lxc_name, const char *lxc_path) -+#endif - { - char buf[PATH_MAX]; - struct mntent mntent; -@@ -2786,22 +2903,30 @@ static int mount_file_entries(const struct lxc_conf *conf, - free(mntent.mnt_fsname); - return -1; - } --#endif - - if (!rootfs->path) - ret = mount_entry_on_systemfs(&mntent); - else if (mntent.mnt_dir[0] != '/') - ret = mount_entry_on_relative_rootfs(&mntent, rootfs, -- lxc_name, lxc_path); -+ lxc_name, lxc_path, mount_label); - else - ret = mount_entry_on_absolute_rootfs(&mntent, rootfs, -- lxc_name, lxc_path); -+ lxc_name, lxc_path, mount_label); - --#ifdef HAVE_ISULAD - free(mntent.mnt_fsname); - mntent.mnt_fsname = NULL; - free(mntent.mnt_dir); - mntent.mnt_dir = NULL; -+#else -+ -+ if (!rootfs->path) -+ ret = mount_entry_on_systemfs(&mntent); -+ else if (mntent.mnt_dir[0] != '/') -+ ret = mount_entry_on_relative_rootfs(&mntent, rootfs, -+ lxc_name, lxc_path); -+ else -+ ret = mount_entry_on_absolute_rootfs(&mntent, rootfs, -+ lxc_name, lxc_path); - #endif - - if (ret < 0) -@@ -2822,9 +2947,15 @@ static inline void __auto_endmntent__(FILE **f) - - #define __do_endmntent __attribute__((__cleanup__(__auto_endmntent__))) - -+#ifdef HAVE_ISULAD -+static int setup_mount(const struct lxc_conf *conf, -+ const struct lxc_rootfs *rootfs, const char *fstab, -+ const char *lxc_name, const char *lxc_path, const char *mount_label) -+#else - static int setup_mount(const struct lxc_conf *conf, - const struct lxc_rootfs *rootfs, const char *fstab, - const char *lxc_name, const char *lxc_path) -+#endif - { - __do_endmntent FILE *f = NULL; - int ret; -@@ -2836,7 +2967,11 @@ static int setup_mount(const struct lxc_conf *conf, - if (!f) - return log_error_errno(-1, errno, "Failed to open \"%s\"", fstab); - -+#ifdef HAVE_ISULAD -+ ret = mount_file_entries(conf, rootfs, f, lxc_name, lxc_path, mount_label); -+#else - ret = mount_file_entries(conf, rootfs, f, lxc_name, lxc_path); -+#endif - if (ret < 0) - ERROR("Failed to set up mount entries"); - -@@ -2912,10 +3047,17 @@ FILE *make_anonymous_mount_file(struct lxc_list *mount, - return f; - } - -+#ifdef HAVE_ISULAD -+static int setup_mount_entries(const struct lxc_conf *conf, -+ const struct lxc_rootfs *rootfs, -+ struct lxc_list *mount, const char *lxc_name, -+ const char *lxc_path, const char *mount_label) -+#else - static int setup_mount_entries(const struct lxc_conf *conf, - const struct lxc_rootfs *rootfs, - struct lxc_list *mount, const char *lxc_name, - const char *lxc_path) -+#endif - { - __do_fclose FILE *f = NULL; - -@@ -2923,7 +3065,11 @@ static int setup_mount_entries(const struct lxc_conf *conf, - if (!f) - return -1; - -+#ifdef HAVE_ISULAD -+ return mount_file_entries(conf, rootfs, f, lxc_name, lxc_path, mount_label); -+#else - return mount_file_entries(conf, rootfs, f, lxc_name, lxc_path); -+#endif - } - - #ifdef HAVE_ISULAD -@@ -3892,8 +4038,11 @@ static int lxc_execute_bind_init(struct lxc_handler *handler) - if (ret < 0 && errno != EEXIST) - return log_error_errno(-1, errno, "Failed to create dummy \"%s\" file as bind mount target", destpath); - } -- -+#ifdef HAVE_ISULAD -+ ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount, conf->lsm_se_mount_context); -+#else - ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount); -+#endif - if (ret < 0) - return log_error_errno(-1, errno, "Failed to bind mount lxc.init.static into container"); - -@@ -4035,7 +4184,7 @@ static int lxc_setup_boot_id(void) - - #ifdef HAVE_ISULAD - /* isulad: setup devices which will be populated in the container.*/ --static int setup_populate_devs(const struct lxc_rootfs *rootfs, struct lxc_list *devs) -+static int setup_populate_devs(const struct lxc_rootfs *rootfs, struct lxc_list *devs, const char *mount_label) - { - int ret = 0; - char *pathdirname = NULL; -@@ -4104,7 +4253,7 @@ static int setup_populate_devs(const struct lxc_rootfs *rootfs, struct lxc_list - } - fclose(pathfile); - if (safe_mount(hostpath, path, 0, MS_BIND, NULL, -- rootfs->path ? rootfs->mount : NULL) != 0) { -+ rootfs->path ? rootfs->mount : NULL, mount_label) != 0) { - SYSERROR("Failed bind mounting device %s from host into container", - dev_elem->name); - ret = -1; -@@ -4761,7 +4910,8 @@ int lxc_setup(struct lxc_handler *handler) - - if (lxc_conf->autodev > 0) { - #ifdef HAVE_ISULAD -- ret = mount_autodev(name, &lxc_conf->rootfs, lxc_conf->autodevtmpfssize, lxcpath, lxc_conf->systemd); -+ ret = mount_autodev(name, &lxc_conf->rootfs, lxc_conf->autodevtmpfssize, lxcpath, -+ lxc_conf->systemd, lxc_conf->lsm_se_mount_context); - #else - ret = mount_autodev(name, &lxc_conf->rootfs, lxc_conf->autodevtmpfssize, lxcpath); - #endif -@@ -4775,14 +4925,22 @@ int lxc_setup(struct lxc_handler *handler) - ret = lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler); - if (ret < 0) - return log_error(-1, "Failed to setup first automatic mounts"); -- -+#ifdef HAVE_ISULAD -+ ret = setup_mount(lxc_conf, &lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath, lxc_conf->lsm_se_mount_context); -+#else - ret = setup_mount(lxc_conf, &lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath); -+#endif - if (ret < 0) - return log_error(-1, "Failed to setup mounts"); - - if (!lxc_list_empty(&lxc_conf->mount_list)) { -+#ifdef HAVE_ISULAD -+ ret = setup_mount_entries(lxc_conf, &lxc_conf->rootfs, -+ &lxc_conf->mount_list, name, lxcpath, lxc_conf->lsm_se_mount_context); -+#else - ret = setup_mount_entries(lxc_conf, &lxc_conf->rootfs, - &lxc_conf->mount_list, name, lxcpath); -+#endif - if (ret < 0) - return log_error(-1, "Failed to setup mount entries"); - #ifdef HAVE_ISULAD -@@ -4828,8 +4986,11 @@ int lxc_setup(struct lxc_handler *handler) - ret = run_lxc_hooks(name, "autodev", lxc_conf, NULL); - if (ret < 0) - return log_error(-1, "Failed to run autodev hooks"); -- -+#ifdef HAVE_ISULAD -+ ret = lxc_fill_autodev(&lxc_conf->rootfs, lxc_conf->lsm_se_mount_context); -+#else - ret = lxc_fill_autodev(&lxc_conf->rootfs); -+#endif - if (ret < 0) - return log_error(-1, "Failed to populate \"/dev\""); - } -@@ -4837,7 +4998,7 @@ int lxc_setup(struct lxc_handler *handler) - #ifdef HAVE_ISULAD - /* isulad: setup devices which will be populated in the container. */ - if (!lxc_list_empty(&lxc_conf->populate_devs) && setup_dev) { -- if (setup_populate_devs(&lxc_conf->rootfs, &lxc_conf->populate_devs) != 0) { -+ if (setup_populate_devs(&lxc_conf->rootfs, &lxc_conf->populate_devs, lxc_conf->lsm_se_mount_context) != 0) { - return log_error(-1, "Failed to setup devices in the container"); - } - } -@@ -4846,9 +5007,13 @@ int lxc_setup(struct lxc_handler *handler) - /* Make sure any start hooks are in the container */ - if (!verify_start_hooks(lxc_conf)) - return log_error(-1, "Failed to verify start hooks"); -- -+#ifdef HAVE_ISULAD -+ ret = lxc_setup_console(&lxc_conf->rootfs, &lxc_conf->console, -+ lxc_conf->ttys.dir, lxc_conf->lsm_se_mount_context); -+#else - ret = lxc_setup_console(&lxc_conf->rootfs, &lxc_conf->console, - lxc_conf->ttys.dir); -+#endif - if (ret < 0) - return log_error(-1, "Failed to setup console"); - -diff --git a/src/lxc/conf.h b/src/lxc/conf.h -index 4b6409e3e..c9265b65e 100644 ---- a/src/lxc/conf.h -+++ b/src/lxc/conf.h -@@ -442,31 +442,36 @@ struct lxc_conf { - } shmount; - - #ifdef HAVE_ISULAD -- /* -- * isulad: support oci hook -- * */ -+ /* support oci hook */ - oci_runtime_spec_hooks *ocihooks; - -- /* isulad add: init args used to repalce init_cmd*/ -+ /* init args used to repalce init_cmd */ - char **init_argv; - size_t init_argc; - - gid_t *init_groups; - size_t init_groups_len; - -- /* populate devices*/ -+ /* populate devices */ - struct lxc_list populate_devs; -- mode_t umask; //umask value -+ mode_t umask; // umask value - - char *container_info_file; - -- int exit_fd; /* exit fifo fd*/ -+ /* exit fifo fd*/ -+ int exit_fd; -+ -+ /* record error messages */ -+ char *errmsg; - -- char *errmsg; /* record error messages */ -+ /* pipdfd for get error message of child or grandchild process */ -+ int errpipe[2]; - -- int errpipe[2];//pipdfd for get error message of child or grandchild process. -+ /* systemd value */ -+ char *systemd; - -- char *systemd; //systemd value -+ /* Linux Security Modules SELinux context for device mount */ -+ char *lsm_se_mount_context; - #endif - - }; -diff --git a/src/lxc/confile.c b/src/lxc/confile.c -index b1d101a9d..f108b37b4 100644 ---- a/src/lxc/confile.c -+++ b/src/lxc/confile.c -@@ -158,6 +158,7 @@ lxc_config_define(systemd); - lxc_config_define(console_log_driver); - lxc_config_define(console_syslog_tag); - lxc_config_define(console_syslog_facility); -+lxc_config_define(selinux_mount_context); - #endif - - /* -@@ -247,7 +248,7 @@ static struct lxc_config_t config_jump_table[] = { - { "lxc.net.veth.ipv6.route", set_config_net_veth_ipv6_route, get_config_net_veth_ipv6_route, clr_config_net_veth_ipv6_route, }, - { "lxc.net.", set_config_net_nic, get_config_net_nic, clr_config_net_nic, }, - { "lxc.net", set_config_net, get_config_net, clr_config_net, }, -- { "lxc.no_new_privs", set_config_no_new_privs, get_config_no_new_privs, clr_config_no_new_privs, }, -+ { "lxc.no_new_privs", set_config_no_new_privs, get_config_no_new_privs, clr_config_no_new_privs, }, - { "lxc.prlimit", set_config_prlimit, get_config_prlimit, clr_config_prlimit, }, - { "lxc.pty.max", set_config_pty_max, get_config_pty_max, clr_config_pty_max, }, - { "lxc.rootfs.managed", set_config_rootfs_managed, get_config_rootfs_managed, clr_config_rootfs_managed, }, -@@ -282,6 +283,7 @@ static struct lxc_config_t config_jump_table[] = { - { "lxc.console.logdriver", set_config_console_log_driver, get_config_console_log_driver, clr_config_console_log_driver, }, - { "lxc.console.syslog_tag", set_config_console_syslog_tag, get_config_console_syslog_tag, clr_config_console_syslog_tag, }, - { "lxc.console.syslog_facility", set_config_console_syslog_facility, get_config_console_syslog_facility, clr_config_console_syslog_facility, }, -+ { "lxc.selinux.mount_context", set_config_selinux_mount_context, get_config_selinux_mount_context, clr_config_selinux_mount_context, }, - #endif - }; - -@@ -6685,6 +6687,16 @@ static int set_config_console_syslog_facility(const char *key, const char *value - return 0; - } - -+static int set_config_selinux_mount_context(const char *key, const char *value, -+ struct lxc_conf *lxc_conf, void *data) -+{ -+ if (value != NULL && strcmp(value, "unconfined_t") == 0) { -+ return set_config_string_item(&lxc_conf->lsm_se_mount_context, NULL); -+ } -+ -+ return set_config_string_item(&lxc_conf->lsm_se_mount_context, value); -+} -+ - static int get_config_console_log_driver(const char *key, char *retv, int inlen, - struct lxc_conf *c, void *data) - { -@@ -6703,6 +6715,12 @@ static int get_config_console_syslog_facility(const char *key, char *retv, int i - return lxc_get_conf_int(c, retv, inlen, c->console.log_syslog_facility); - } - -+static int get_config_selinux_mount_context(const char *key, char *retv, int inlen, -+ struct lxc_conf *c, void *data) -+{ -+ return lxc_get_conf_str(retv, inlen, c->lsm_se_mount_context); -+} -+ - static inline int clr_config_console_log_driver(const char *key, - struct lxc_conf *c, void *data) - { -@@ -6726,4 +6744,11 @@ static inline int clr_config_console_syslog_facility(const char *key, - return 0; - } - -+static inline int clr_config_selinux_mount_context(const char *key, -+ struct lxc_conf *c, void *data) -+{ -+ free(c->lsm_se_mount_context); -+ c->lsm_se_mount_context = NULL; -+ return 0; -+} - #endif -diff --git a/src/lxc/lsm/lsm.c b/src/lxc/lsm/lsm.c -index 553e0c99a..2f87dd68d 100644 ---- a/src/lxc/lsm/lsm.c -+++ b/src/lxc/lsm/lsm.c -@@ -168,6 +168,26 @@ int lsm_process_label_set(const char *label, struct lxc_conf *conf, - return drv->process_label_set(label, conf, on_exec); - } - -+#ifdef HAVE_ISULAD -+int lsm_file_label_set(const char *path, const char *label) -+{ -+ if (!drv) { -+ ERROR("LSM driver not inited"); -+ return -1; -+ } -+ return drv->file_label_set(path, label); -+} -+ -+int lsm_relabel(const char *path, const char *label, bool share) -+{ -+ if (!drv) { -+ ERROR("LSM driver not inited"); -+ return -1; -+ } -+ return drv->relabel(path, label, share); -+} -+#endif -+ - int lsm_process_prepare(struct lxc_conf *conf, const char *lxcpath) - { - if (!drv) { -diff --git a/src/lxc/lsm/lsm.h b/src/lxc/lsm/lsm.h -index ee578bb03..4872f5598 100644 ---- a/src/lxc/lsm/lsm.h -+++ b/src/lxc/lsm/lsm.h -@@ -17,6 +17,10 @@ struct lsm_drv { - char *(*process_label_get)(pid_t pid); - int (*process_label_set)(const char *label, struct lxc_conf *conf, - bool on_exec); -+#ifdef HAVE_ISULAD -+ int (*file_label_set)(const char *path, const char *label); -+ int (*relabel)(const char *path, const char *label, bool share); -+#endif - int (*keyring_label_set)(char* label); - int (*prepare)(struct lxc_conf *conf, const char *lxcpath); - void (*cleanup)(struct lxc_conf *conf, const char *lxcpath); -@@ -32,6 +36,10 @@ extern int lsm_process_label_set(const char *label, struct lxc_conf *conf, - extern int lsm_process_label_fd_get(pid_t pid, bool on_exec); - extern int lsm_process_label_set_at(int label_fd, const char *label, - bool on_exec); -+#ifdef HAVE_ISULAD -+extern int lsm_file_label_set(const char *path, const char *label); -+extern int lsm_relabel(const char *path, const char *label, bool share); -+#endif - extern void lsm_process_cleanup(struct lxc_conf *conf, const char *lxcpath); - extern int lsm_keyring_label_set(char *label); - -diff --git a/src/lxc/lsm/selinux.c b/src/lxc/lsm/selinux.c -index dba0ab584..5bc9843e4 100644 ---- a/src/lxc/lsm/selinux.c -+++ b/src/lxc/lsm/selinux.c -@@ -16,6 +16,10 @@ - #include "log.h" - #include "lsm.h" - -+#ifdef HAVE_ISULAD -+#include -+#endif -+ - #define DEFAULT_LABEL "unconfined_t" - - lxc_log_define(selinux, lsm); -@@ -85,6 +89,225 @@ static int selinux_process_label_set(const char *inlabel, struct lxc_conf *conf, - return 0; - } - -+#ifdef HAVE_ISULAD -+/* -+ * selinux_file_label_set: Set SELinux context of a file -+ * -+ * @path : a file -+ * @label : label string -+ * -+ * Returns 0 on success, < 0 on failure -+ */ -+static int selinux_file_label_set(const char *path, const char *label) -+{ -+ int ret; -+ -+ if (path == NULL || label == NULL || strcmp(label, "unconfined_t") == 0) { -+ return 0; -+ } -+ -+ ret = lsetfilecon(path, label); -+ if (ret != 0) { -+ SYSERROR("Failed to setSELinux context to \"%s\": %s", label, path); -+ return -1; -+ } -+ -+ INFO("Changed SELinux context to \"%s\": %s", label, path); -+ return 0; -+} -+ -+/* -+ * is_exclude_relabel_path: Determine whether it is a excluded path to label -+ * -+ * @path : a file or directory -+ * -+ * Returns 0 on success, < 0 on failure -+ */ -+static bool is_exclude_relabel_path(const char *path) -+{ -+ const char *exclude_path[] = { "/", "/usr", "/etc", "/tmp", "/home", "/run", "/var", "/root" }; -+ size_t i; -+ -+ for (i = 0; i < sizeof(exclude_path) / sizeof(char *); i++) { -+ if (strcmp(path, exclude_path[i]) == 0) { -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+/* -+ * bad_prefix: Prevent users from relabing system files -+ * -+ * @path : a file or directory -+ * -+ * Returns 0 on success, < 0 on failure -+ */ -+static int bad_prefix(const char *fpath) -+{ -+ const char *bad_prefixes = "/usr"; -+ -+ if (fpath == NULL) { -+ ERROR("Empty file path"); -+ return -1; -+ } -+ -+ if (strncmp(fpath, bad_prefixes, strlen(bad_prefixes)) == 0) { -+ ERROR("relabeling content in %s is not allowed", bad_prefixes); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+/* -+ * recurse_set_file_label: Recursively label files or folders -+ * -+ * @path : a file or directory -+ * @label : label string -+ * -+ * Returns 0 on success, < 0 on failure -+ */ -+static int recurse_set_file_label(const char *basePath, const char *label) -+{ -+ int ret = 0; -+ DIR *dir = NULL; -+ struct dirent *ptr = NULL; -+ char base[PATH_MAX] = { 0 }; -+ -+ if ((dir = opendir(basePath)) == NULL) { -+ ERROR("Failed to Open dir: %s", basePath); -+ return -1; -+ } -+ -+ ret = lsetfilecon(basePath, label); -+ if (ret != 0) { -+ ERROR("Failed to set file label"); -+ goto out; -+ } -+ -+ while ((ptr = readdir(dir)) != NULL) { -+ if (strcmp(ptr->d_name, ".") == 0 || strcmp(ptr->d_name, "..") == 0) { -+ continue; -+ } else { -+ int nret = snprintf(base, sizeof(base), "%s/%s", basePath, ptr->d_name); -+ if (nret < 0 || nret >= sizeof(base)) { -+ ERROR("Failed to get path"); -+ ret = -1; -+ goto out; -+ } -+ if (ptr->d_type == DT_DIR) { -+ ret = recurse_set_file_label(base, label); -+ if (ret != 0) { -+ ERROR("Failed to set dir label"); -+ goto out; -+ } -+ } else { -+ ret = lsetfilecon(base, label); -+ if (ret != 0) { -+ ERROR("Failed to set file label"); -+ goto out; -+ } -+ } -+ } -+ } -+ -+out: -+ closedir(dir); -+ return ret; -+} -+ -+/* -+ * selinux_chcon: Chcon changes the `fpath` file object to the SELinux label `label`. -+ * If `fpath` is a directory and `recurse`` is true, Chcon will walk the -+ * directory tree setting the label. -+ * -+ * @fpath : a file or directory -+ * @label : label string -+ * @recurse : whether to recurse -+ * -+ * Returns 0 on success, < 0 on failure -+ */ -+static int selinux_chcon(const char *fpath, const char *label, bool recurse) -+{ -+ struct stat s_buf; -+ -+ if (fpath == NULL) { -+ ERROR("Empty file path"); -+ return -1; -+ } -+ -+ if (label == NULL) { -+ return 0; -+ } -+ -+ if (bad_prefix(fpath) != 0) { -+ return -1; -+ } -+ if (stat(fpath, &s_buf) != 0) { -+ return -1; -+ } -+ if (recurse && S_ISDIR(s_buf.st_mode)) { -+ return recurse_set_file_label(fpath, label); -+ } -+ -+ if (lsetfilecon(fpath, label) != 0) { -+ ERROR("Failed to set file label"); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+/* -+ * selinux_relabel: Relabel changes the label of path to the filelabel string. -+ * It changes the MCS label to s0 if shared is true. -+ * This will allow all containers to share the content. -+ * -+ * @path : a file or directory -+ * @label : label string -+ * @shared : whether to use share mode -+ * -+ * Returns 0 on success, < 0 on failure -+ */ -+static int selinux_relabel(const char *path, const char *label, bool shared) -+{ -+ int ret = 0; -+ char *tmp_file_label = NULL; -+ -+ if (label == NULL) { -+ return 0; -+ } -+ -+ tmp_file_label = strdup(label); -+ if (is_exclude_relabel_path(path)) { -+ ERROR("SELinux relabeling of %s is not allowed", path); -+ ret = -1; -+ goto out; -+ } -+ -+ if (shared) { -+ context_t c = context_new(label); -+ context_range_set(c, "s0"); -+ free(tmp_file_label); -+ tmp_file_label = strdup(context_str(c)); -+ context_free(c); -+ } -+ -+ if (selinux_chcon(path, tmp_file_label, true) != 0) { -+ ERROR("Failed to modify %s's selinux context: %s", path, tmp_file_label); -+ ret = -1; -+ goto out; -+ } -+ -+out: -+ free(tmp_file_label); -+ return ret; -+} -+ -+#endif -+ - /* - * selinux_keyring_label_set: Set SELinux context that will be assigned to the keyring - * -@@ -103,6 +326,10 @@ static struct lsm_drv selinux_drv = { - .process_label_get = selinux_process_label_get, - .process_label_set = selinux_process_label_set, - .keyring_label_set = selinux_keyring_label_set, -+#ifdef HAVE_ISULAD -+ .file_label_set = selinux_file_label_set, -+ .relabel = selinux_relabel, -+#endif - }; - - struct lsm_drv *lsm_selinux_drv_init(void) -diff --git a/src/lxc/utils.c b/src/lxc/utils.c -index 4e418fbb9..032176b1b 100644 ---- a/src/lxc/utils.c -+++ b/src/lxc/utils.c -@@ -1097,6 +1097,37 @@ out: - return dirfd; - } - -+#ifdef HAVE_ISULAD -+static int format_mount_label(const char *data, const char *mount_label, char **mnt_opts) -+{ -+ int ret = 0; -+ -+ if (mount_label != NULL) { -+ if (data != NULL) { -+ ret = asprintf(mnt_opts, "%s,context=\"%s\"", data, mount_label); -+ } else { -+ ret = asprintf(mnt_opts, "context=\"%s\"", mount_label); -+ } -+ -+ return ret < 0 ? -1 : 0; -+ } -+ -+ *mnt_opts = data != NULL ? strdup(data) : NULL; -+ return 0; -+} -+ -+static int receive_mount_options(const char *data, const char *mount_label, -+ const char *fstype, char **mnt_opts) -+{ -+ // SELinux kernels don't support labeling of /proc or /sys -+ if (fstype != NULL && (strcmp(fstype, "proc") == 0 || strcmp(fstype, "sysfs") == 0)) { -+ return format_mount_label(data, NULL, mnt_opts); -+ } -+ -+ return format_mount_label(data, mount_label, mnt_opts); -+} -+#endif -+ - /* - * Safely mount a path into a container, ensuring that the mount target - * is under the container's @rootfs. (If @rootfs is NULL, then the container -@@ -1105,14 +1136,22 @@ out: - * CAVEAT: This function must not be used for other purposes than container - * setup before executing the container's init - */ -+#ifdef HAVE_ISULAD -+int safe_mount(const char *src, const char *dest, const char *fstype, -+ unsigned long flags, const void *data, const char *rootfs, const char *mount_label) -+#else - int safe_mount(const char *src, const char *dest, const char *fstype, - unsigned long flags, const void *data, const char *rootfs) -+#endif - { - int destfd, ret, saved_errno; - /* Only needs enough for /proc/self/fd/. */ - char srcbuf[50], destbuf[50]; - int srcfd = -1; - const char *mntsrc = src; -+#ifdef HAVE_ISULAD -+ __do_free char *mnt_opts = NULL; -+#endif - - if (!rootfs) - rootfs = ""; -@@ -1155,8 +1194,23 @@ int safe_mount(const char *src, const char *dest, const char *fstype, - return -EINVAL; - } - -+#ifdef HAVE_ISULAD -+ if (receive_mount_options(data, mount_label, fstype, &mnt_opts) != 0) { -+ ERROR("Failed to receive mount options"); -+ return -EINVAL; -+ } -+ -+ ret = mount(mntsrc, destbuf, fstype, flags, mnt_opts); -+ saved_errno = errno; -+ if (ret < 0 && strcmp(fstype, "mqueue") == 0) { -+ INFO("older kernels don't support labeling of /dev/mqueue, retry without selinux context"); -+ ret = mount(mntsrc, destbuf, fstype, flags, data); -+ saved_errno = errno; -+ } -+#else - ret = mount(mntsrc, destbuf, fstype, flags, data); - saved_errno = errno; -+#endif - if (srcfd != -1) - close(srcfd); - -@@ -1167,6 +1221,18 @@ int safe_mount(const char *src, const char *dest, const char *fstype, - return ret; - } - -+#ifdef HAVE_ISULAD -+ if (strcmp(fstype, "mqueue") == 0 && lsm_file_label_set(dest, mount_label) != 0) { -+ ERROR("Failed to set file label on %s", dest); -+ return -EINVAL; -+ } -+ -+ if (strcmp(fstype, "bind") == 0 && lsm_relabel(src, mount_label, false) != 0) { -+ ERROR("Failed to reabel %s with %s", src, mount_label); -+ return -EINVAL; -+ } -+#endif -+ - return 0; - } - -@@ -1233,7 +1299,11 @@ domount: - if (!strcmp(rootfs, "")) - ret = mount("proc", path, "proc", 0, NULL); - else -+#ifdef HAVE_ISULAD -+ ret = safe_mount("proc", path, "proc", 0, NULL, rootfs, NULL); -+#else - ret = safe_mount("proc", path, "proc", 0, NULL, rootfs); -+#endif - if (ret < 0) - return -1; - -diff --git a/src/lxc/utils.h b/src/lxc/utils.h -index 39ef5792f..4d1c49bab 100644 ---- a/src/lxc/utils.h -+++ b/src/lxc/utils.h -@@ -220,9 +220,15 @@ extern char *choose_init(const char *rootfs); - extern bool switch_to_ns(pid_t pid, const char *ns); - extern char *get_template_path(const char *t); - extern int open_without_symlink(const char *target, const char *prefix_skip); -+#ifdef HAVE_ISULAD -+extern int safe_mount(const char *src, const char *dest, const char *fstype, -+ unsigned long flags, const void *data, -+ const char *rootfs, const char *mount_label); -+#else - extern int safe_mount(const char *src, const char *dest, const char *fstype, - unsigned long flags, const void *data, - const char *rootfs); -+#endif - extern int lxc_mount_proc_if_needed(const char *rootfs); - extern int open_devnull(void); - extern int set_stdfds(int fd); --- -2.25.1 - diff --git a/0003-format-code-and-verify-mount-mode.patch b/0003-format-code-and-verify-mount-mode.patch deleted file mode 100644 index 463fa48..0000000 --- a/0003-format-code-and-verify-mount-mode.patch +++ /dev/null @@ -1,423 +0,0 @@ -From c0f37e083c49cfcb9441743a409fdee44d32d7c5 Mon Sep 17 00:00:00 2001 -From: wujing -Date: Thu, 16 Jul 2020 16:39:35 +0800 -Subject: [PATCH 03/10] format code and verify mount mode - -Signed-off-by: wujing ---- - src/lxc/lsm/apparmor.c | 14 +++ - src/lxc/lsm/nop.c | 14 +++ - src/lxc/lsm/selinux.c | 242 +++++++++++++++++++++-------------------- - src/lxc/utils.c | 30 ++++- - 4 files changed, 182 insertions(+), 118 deletions(-) - -diff --git a/src/lxc/lsm/apparmor.c b/src/lxc/lsm/apparmor.c -index f251e5e7e..591d37c27 100644 ---- a/src/lxc/lsm/apparmor.c -+++ b/src/lxc/lsm/apparmor.c -@@ -1186,6 +1186,16 @@ static int apparmor_process_label_set(const char *inlabel, struct lxc_conf *conf - return 0; - } - -+#ifdef HAVE_ISULAD -+static int apparmor_file_label_set(const char *path, const char *label) { -+ return 0; -+} -+ -+static int apparmor_relabel(const char *path, const char *label, bool shared) { -+ return 0; -+} -+#endif -+ - static struct lsm_drv apparmor_drv = { - .name = "AppArmor", - .enabled = apparmor_enabled, -@@ -1193,6 +1203,10 @@ static struct lsm_drv apparmor_drv = { - .process_label_set = apparmor_process_label_set, - .prepare = apparmor_prepare, - .cleanup = apparmor_cleanup, -+#ifdef HAVE_ISULAD -+ .file_label_set = apparmor_file_label_set, -+ .relabel = apparmor_relabel, -+#endif - }; - - struct lsm_drv *lsm_apparmor_drv_init(void) -diff --git a/src/lxc/lsm/nop.c b/src/lxc/lsm/nop.c -index 5b345b9a2..188945d51 100644 ---- a/src/lxc/lsm/nop.c -+++ b/src/lxc/lsm/nop.c -@@ -24,11 +24,25 @@ static int nop_enabled(void) - return 0; - } - -+#ifdef HAVE_ISULAD -+static int nop_file_label_set(const char *path, const char *label) { -+ return 0; -+} -+ -+static int nop_relabel(const char *path, const char *label, bool shared) { -+ return 0; -+} -+#endif -+ - static struct lsm_drv nop_drv = { - .name = "nop", - .enabled = nop_enabled, - .process_label_get = nop_process_label_get, - .process_label_set = nop_process_label_set, -+#ifdef HAVE_ISULAD -+ .file_label_set = nop_file_label_set, -+ .relabel = nop_relabel, -+#endif - }; - - struct lsm_drv *lsm_nop_drv_init(void) -diff --git a/src/lxc/lsm/selinux.c b/src/lxc/lsm/selinux.c -index 5bc9843e4..864b16be7 100644 ---- a/src/lxc/lsm/selinux.c -+++ b/src/lxc/lsm/selinux.c -@@ -106,6 +106,10 @@ static int selinux_file_label_set(const char *path, const char *label) - return 0; - } - -+ if (!is_selinux_enabled()) { -+ return 0; -+ } -+ - ret = lsetfilecon(path, label); - if (ret != 0) { - SYSERROR("Failed to setSELinux context to \"%s\": %s", label, path); -@@ -125,16 +129,16 @@ static int selinux_file_label_set(const char *path, const char *label) - */ - static bool is_exclude_relabel_path(const char *path) - { -- const char *exclude_path[] = { "/", "/usr", "/etc", "/tmp", "/home", "/run", "/var", "/root" }; -- size_t i; -+ const char *exclude_path[] = { "/", "/usr", "/etc", "/tmp", "/home", "/run", "/var", "/root" }; -+ size_t i; - -- for (i = 0; i < sizeof(exclude_path) / sizeof(char *); i++) { -- if (strcmp(path, exclude_path[i]) == 0) { -- return true; -- } -- } -+ for (i = 0; i < sizeof(exclude_path) / sizeof(char *); i++) { -+ if (strcmp(path, exclude_path[i]) == 0) { -+ return true; -+ } -+ } - -- return false; -+ return false; - } - - /* -@@ -146,19 +150,19 @@ static bool is_exclude_relabel_path(const char *path) - */ - static int bad_prefix(const char *fpath) - { -- const char *bad_prefixes = "/usr"; -+ const char *bad_prefixes = "/usr"; - -- if (fpath == NULL) { -- ERROR("Empty file path"); -- return -1; -- } -+ if (fpath == NULL) { -+ ERROR("Empty file path"); -+ return -1; -+ } - -- if (strncmp(fpath, bad_prefixes, strlen(bad_prefixes)) == 0) { -- ERROR("relabeling content in %s is not allowed", bad_prefixes); -- return -1; -- } -+ if (strncmp(fpath, bad_prefixes, strlen(bad_prefixes)) == 0) { -+ ERROR("relabeling content in %s is not allowed", bad_prefixes); -+ return -1; -+ } - -- return 0; -+ return 0; - } - - /* -@@ -171,51 +175,51 @@ static int bad_prefix(const char *fpath) - */ - static int recurse_set_file_label(const char *basePath, const char *label) - { -- int ret = 0; -- DIR *dir = NULL; -- struct dirent *ptr = NULL; -- char base[PATH_MAX] = { 0 }; -- -- if ((dir = opendir(basePath)) == NULL) { -- ERROR("Failed to Open dir: %s", basePath); -- return -1; -- } -- -- ret = lsetfilecon(basePath, label); -- if (ret != 0) { -- ERROR("Failed to set file label"); -- goto out; -- } -- -- while ((ptr = readdir(dir)) != NULL) { -- if (strcmp(ptr->d_name, ".") == 0 || strcmp(ptr->d_name, "..") == 0) { -- continue; -- } else { -- int nret = snprintf(base, sizeof(base), "%s/%s", basePath, ptr->d_name); -- if (nret < 0 || nret >= sizeof(base)) { -- ERROR("Failed to get path"); -- ret = -1; -- goto out; -- } -- if (ptr->d_type == DT_DIR) { -- ret = recurse_set_file_label(base, label); -- if (ret != 0) { -- ERROR("Failed to set dir label"); -- goto out; -- } -- } else { -- ret = lsetfilecon(base, label); -- if (ret != 0) { -- ERROR("Failed to set file label"); -- goto out; -- } -- } -- } -- } -+ int ret = 0; -+ DIR *dir = NULL; -+ struct dirent *ptr = NULL; -+ char base[PATH_MAX] = { 0 }; -+ -+ if ((dir = opendir(basePath)) == NULL) { -+ ERROR("Failed to Open dir: %s", basePath); -+ return -1; -+ } -+ -+ ret = lsetfilecon(basePath, label); -+ if (ret != 0) { -+ ERROR("Failed to set file label"); -+ goto out; -+ } -+ -+ while ((ptr = readdir(dir)) != NULL) { -+ if (strcmp(ptr->d_name, ".") == 0 || strcmp(ptr->d_name, "..") == 0) { -+ continue; -+ } else { -+ int nret = snprintf(base, sizeof(base), "%s/%s", basePath, ptr->d_name); -+ if (nret < 0 || nret >= sizeof(base)) { -+ ERROR("Failed to get path"); -+ ret = -1; -+ goto out; -+ } -+ if (ptr->d_type == DT_DIR) { -+ ret = recurse_set_file_label(base, label); -+ if (ret != 0) { -+ ERROR("Failed to set dir label"); -+ goto out; -+ } -+ } else { -+ ret = lsetfilecon(base, label); -+ if (ret != 0) { -+ ERROR("Failed to set file label"); -+ goto out; -+ } -+ } -+ } -+ } - - out: -- closedir(dir); -- return ret; -+ closedir(dir); -+ return ret; - } - - /* -@@ -231,33 +235,33 @@ out: - */ - static int selinux_chcon(const char *fpath, const char *label, bool recurse) - { -- struct stat s_buf; -- -- if (fpath == NULL) { -- ERROR("Empty file path"); -- return -1; -- } -- -- if (label == NULL) { -- return 0; -- } -- -- if (bad_prefix(fpath) != 0) { -- return -1; -- } -- if (stat(fpath, &s_buf) != 0) { -- return -1; -- } -- if (recurse && S_ISDIR(s_buf.st_mode)) { -- return recurse_set_file_label(fpath, label); -- } -- -- if (lsetfilecon(fpath, label) != 0) { -- ERROR("Failed to set file label"); -- return -1; -- } -- -- return 0; -+ struct stat s_buf; -+ -+ if (fpath == NULL) { -+ ERROR("Empty file path"); -+ return -1; -+ } -+ -+ if (label == NULL) { -+ return 0; -+ } -+ -+ if (bad_prefix(fpath) != 0) { -+ return -1; -+ } -+ if (stat(fpath, &s_buf) != 0) { -+ return -1; -+ } -+ if (recurse && S_ISDIR(s_buf.st_mode)) { -+ return recurse_set_file_label(fpath, label); -+ } -+ -+ if (lsetfilecon(fpath, label) != 0) { -+ ERROR("Failed to set file label"); -+ return -1; -+ } -+ -+ return 0; - } - - /* -@@ -273,37 +277,41 @@ static int selinux_chcon(const char *fpath, const char *label, bool recurse) - */ - static int selinux_relabel(const char *path, const char *label, bool shared) - { -- int ret = 0; -- char *tmp_file_label = NULL; -- -- if (label == NULL) { -- return 0; -- } -- -- tmp_file_label = strdup(label); -- if (is_exclude_relabel_path(path)) { -- ERROR("SELinux relabeling of %s is not allowed", path); -- ret = -1; -- goto out; -- } -- -- if (shared) { -- context_t c = context_new(label); -- context_range_set(c, "s0"); -- free(tmp_file_label); -- tmp_file_label = strdup(context_str(c)); -- context_free(c); -- } -- -- if (selinux_chcon(path, tmp_file_label, true) != 0) { -- ERROR("Failed to modify %s's selinux context: %s", path, tmp_file_label); -- ret = -1; -- goto out; -- } -+ int ret = 0; -+ char *tmp_file_label = NULL; -+ -+ if (label == NULL) { -+ return 0; -+ } -+ -+ if (!is_selinux_enabled()) { -+ return 0; -+ } -+ -+ tmp_file_label = strdup(label); -+ if (is_exclude_relabel_path(path)) { -+ ERROR("SELinux relabeling of %s is not allowed", path); -+ ret = -1; -+ goto out; -+ } -+ -+ if (shared) { -+ context_t c = context_new(label); -+ context_range_set(c, "s0"); -+ free(tmp_file_label); -+ tmp_file_label = strdup(context_str(c)); -+ context_free(c); -+ } -+ -+ if (selinux_chcon(path, tmp_file_label, true) != 0) { -+ ERROR("Failed to modify %s's selinux context: %s", path, tmp_file_label); -+ ret = -1; -+ goto out; -+ } - - out: -- free(tmp_file_label); -- return ret; -+ free(tmp_file_label); -+ return ret; - } - - #endif -diff --git a/src/lxc/utils.c b/src/lxc/utils.c -index 032176b1b..5ec6117f7 100644 ---- a/src/lxc/utils.c -+++ b/src/lxc/utils.c -@@ -1126,6 +1126,34 @@ static int receive_mount_options(const char *data, const char *mount_label, - - return format_mount_label(data, mount_label, mnt_opts); - } -+ -+static int relabel_bind_mount_source(const char *src, const char *fstype, const char *data, const char *mount_label) -+{ -+ __do_free_string_list char **parts = NULL; -+ ssize_t parts_len; -+ ssize_t i; -+ -+ if (data == NULL) { -+ return lsm_relabel(src, mount_label, false); -+ } -+ -+ parts = lxc_string_split(data, ','); -+ if (parts == NULL) { -+ return -1; -+ } -+ -+ parts_len = lxc_array_len((void **)parts); -+ for (i = 0; i < parts_len; i++) { -+ if (strcmp(parts[i], "z") == 0) { -+ return lsm_relabel(src, mount_label, true); -+ } else if (strcmp(parts[i], "Z") == 0) { -+ return lsm_relabel(src, mount_label, false); -+ } -+ } -+ -+ return lsm_relabel(src, mount_label, false); -+} -+ - #endif - - /* -@@ -1227,7 +1255,7 @@ int safe_mount(const char *src, const char *dest, const char *fstype, - return -EINVAL; - } - -- if (strcmp(fstype, "bind") == 0 && lsm_relabel(src, mount_label, false) != 0) { -+ if (strcmp(fstype, "bind") == 0 && relabel_bind_mount_source(src, fstype, (const char *)data, mount_label) != 0) { - ERROR("Failed to reabel %s with %s", src, mount_label); - return -EINVAL; - } --- -2.25.1 - diff --git a/0004-Removes-the-definition-of-the-thread-attributes-obje.patch b/0004-Removes-the-definition-of-the-thread-attributes-obje.patch deleted file mode 100644 index da3c7b9..0000000 --- a/0004-Removes-the-definition-of-the-thread-attributes-obje.patch +++ /dev/null @@ -1,162 +0,0 @@ -From b1ef723b4f437aad3c0c0497174bc7d3444426cd Mon Sep 17 00:00:00 2001 -From: wujing -Date: Mon, 20 Jul 2020 15:30:42 +0800 -Subject: [PATCH 04/10] Removes the definition of the thread attributes object - -Signed-off-by: wujing ---- - src/lxc/attach.c | 1 + - src/lxc/conf.c | 1 + - src/lxc/lsm/selinux.c | 33 +++++++++++---------------------- - src/lxc/start.c | 1 + - 4 files changed, 14 insertions(+), 22 deletions(-) - -diff --git a/src/lxc/attach.c b/src/lxc/attach.c -index 068cc5f8e..b33ff6325 100644 ---- a/src/lxc/attach.c -+++ b/src/lxc/attach.c -@@ -1188,6 +1188,7 @@ static int create_attach_timeout_thread(int64_t attach_timeout, pid_t pid) - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - ret = pthread_create(&ptid, &attr, wait_attach_timeout, timeout_conf); -+ pthread_attr_destroy(&attr); - if (ret != 0) { - ERROR("Create attach wait timeout thread failed"); - free(timeout_conf); -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 7e4af0a95..6a25b96ac 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -4660,6 +4660,7 @@ static int run_ocihook_buffer(struct oci_hook_conf *oconf, const char *inmsg) - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - err = pthread_create(&ptid, &attr, wait_ocihook_timeout, conf); -+ pthread_attr_destroy(&attr); - if (err != 0) { - ERROR("Create wait timeout thread failed"); - free(conf); -diff --git a/src/lxc/lsm/selinux.c b/src/lxc/lsm/selinux.c -index 864b16be7..ceac08891 100644 ---- a/src/lxc/lsm/selinux.c -+++ b/src/lxc/lsm/selinux.c -@@ -100,8 +100,6 @@ static int selinux_process_label_set(const char *inlabel, struct lxc_conf *conf, - */ - static int selinux_file_label_set(const char *path, const char *label) - { -- int ret; -- - if (path == NULL || label == NULL || strcmp(label, "unconfined_t") == 0) { - return 0; - } -@@ -110,8 +108,7 @@ static int selinux_file_label_set(const char *path, const char *label) - return 0; - } - -- ret = lsetfilecon(path, label); -- if (ret != 0) { -+ if (lsetfilecon(path, label) != 0) { - SYSERROR("Failed to setSELinux context to \"%s\": %s", label, path); - return -1; - } -@@ -176,7 +173,7 @@ static int bad_prefix(const char *fpath) - static int recurse_set_file_label(const char *basePath, const char *label) - { - int ret = 0; -- DIR *dir = NULL; -+ __do_closedir DIR *dir = NULL; - struct dirent *ptr = NULL; - char base[PATH_MAX] = { 0 }; - -@@ -188,7 +185,7 @@ static int recurse_set_file_label(const char *basePath, const char *label) - ret = lsetfilecon(basePath, label); - if (ret != 0) { - ERROR("Failed to set file label"); -- goto out; -+ return ret; - } - - while ((ptr = readdir(dir)) != NULL) { -@@ -198,28 +195,25 @@ static int recurse_set_file_label(const char *basePath, const char *label) - int nret = snprintf(base, sizeof(base), "%s/%s", basePath, ptr->d_name); - if (nret < 0 || nret >= sizeof(base)) { - ERROR("Failed to get path"); -- ret = -1; -- goto out; -+ return -1; - } - if (ptr->d_type == DT_DIR) { - ret = recurse_set_file_label(base, label); - if (ret != 0) { - ERROR("Failed to set dir label"); -- goto out; -+ return ret; - } - } else { - ret = lsetfilecon(base, label); - if (ret != 0) { - ERROR("Failed to set file label"); -- goto out; -+ return ret; - } - } - } - } - --out: -- closedir(dir); -- return ret; -+ return 0; - } - - /* -@@ -277,8 +271,7 @@ static int selinux_chcon(const char *fpath, const char *label, bool recurse) - */ - static int selinux_relabel(const char *path, const char *label, bool shared) - { -- int ret = 0; -- char *tmp_file_label = NULL; -+ __do_free char *tmp_file_label = NULL; - - if (label == NULL) { - return 0; -@@ -291,8 +284,7 @@ static int selinux_relabel(const char *path, const char *label, bool shared) - tmp_file_label = strdup(label); - if (is_exclude_relabel_path(path)) { - ERROR("SELinux relabeling of %s is not allowed", path); -- ret = -1; -- goto out; -+ return -1; - } - - if (shared) { -@@ -305,13 +297,10 @@ static int selinux_relabel(const char *path, const char *label, bool shared) - - if (selinux_chcon(path, tmp_file_label, true) != 0) { - ERROR("Failed to modify %s's selinux context: %s", path, tmp_file_label); -- ret = -1; -- goto out; -+ return -1; - } - --out: -- free(tmp_file_label); -- return ret; -+ return 0; - } - - #endif -diff --git a/src/lxc/start.c b/src/lxc/start.c -index 51d13254b..ab47420f1 100644 ---- a/src/lxc/start.c -+++ b/src/lxc/start.c -@@ -2484,6 +2484,7 @@ static int create_start_timeout_thread(struct lxc_conf *conf, unsigned int start - pthread_attr_init(&attr); - pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); - ret = pthread_create(&ptid, &attr, wait_start_timeout, timeout_conf); -+ pthread_attr_destroy(&attr); - if (ret != 0) { - ERROR("Create start wait timeout thread failed"); - free(timeout_conf); --- -2.25.1 - diff --git a/0005-solve-coredump-bug-caused-by-fstype-being-NULL-durin.patch b/0005-solve-coredump-bug-caused-by-fstype-being-NULL-durin.patch deleted file mode 100644 index 0a6740f..0000000 --- a/0005-solve-coredump-bug-caused-by-fstype-being-NULL-durin.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 405b048dc82a8695b8a400524787243f3898cbd6 Mon Sep 17 00:00:00 2001 -From: wujing -Date: Tue, 21 Jul 2020 17:30:17 +0800 -Subject: [PATCH 05/10] solve coredump bug caused by fstype being NULL during - mount - -Signed-off-by: wujing ---- - src/lxc/lsm/selinux.c | 3 +-- - src/lxc/utils.c | 7 ++++--- - 2 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/src/lxc/lsm/selinux.c b/src/lxc/lsm/selinux.c -index ceac08891..837a3da3d 100644 ---- a/src/lxc/lsm/selinux.c -+++ b/src/lxc/lsm/selinux.c -@@ -68,7 +68,6 @@ static int selinux_process_label_set(const char *inlabel, struct lxc_conf *conf, - - label = inlabel ? inlabel : conf->lsm_se_context; - if (!label) { -- - label = DEFAULT_LABEL; - } - -@@ -273,7 +272,7 @@ static int selinux_relabel(const char *path, const char *label, bool shared) - { - __do_free char *tmp_file_label = NULL; - -- if (label == NULL) { -+ if (path == NULL || label == NULL) { - return 0; - } - -diff --git a/src/lxc/utils.c b/src/lxc/utils.c -index 5ec6117f7..95c00cfed 100644 ---- a/src/lxc/utils.c -+++ b/src/lxc/utils.c -@@ -1230,7 +1230,7 @@ int safe_mount(const char *src, const char *dest, const char *fstype, - - ret = mount(mntsrc, destbuf, fstype, flags, mnt_opts); - saved_errno = errno; -- if (ret < 0 && strcmp(fstype, "mqueue") == 0) { -+ if (ret < 0 && fstype != NULL && strcmp(fstype, "mqueue") == 0) { - INFO("older kernels don't support labeling of /dev/mqueue, retry without selinux context"); - ret = mount(mntsrc, destbuf, fstype, flags, data); - saved_errno = errno; -@@ -1250,12 +1250,13 @@ int safe_mount(const char *src, const char *dest, const char *fstype, - } - - #ifdef HAVE_ISULAD -- if (strcmp(fstype, "mqueue") == 0 && lsm_file_label_set(dest, mount_label) != 0) { -+ if (fstype != NULL && strcmp(fstype, "mqueue") == 0 && lsm_file_label_set(dest, mount_label) != 0) { - ERROR("Failed to set file label on %s", dest); - return -EINVAL; - } - -- if (strcmp(fstype, "bind") == 0 && relabel_bind_mount_source(src, fstype, (const char *)data, mount_label) != 0) { -+ if (fstype != NULL && strcmp(fstype, "bind") == 0 && -+ relabel_bind_mount_source(src, fstype, (const char *)data, mount_label) != 0) { - ERROR("Failed to reabel %s with %s", src, mount_label); - return -EINVAL; - } --- -2.25.1 - diff --git a/0006-SIGTERM-do-not-catch-signal-SIGTERM-in-lxc-monitor.patch b/0006-SIGTERM-do-not-catch-signal-SIGTERM-in-lxc-monitor.patch deleted file mode 100644 index 28eb62f..0000000 --- a/0006-SIGTERM-do-not-catch-signal-SIGTERM-in-lxc-monitor.patch +++ /dev/null @@ -1,40 +0,0 @@ -From e21c6474901e3d12560eb389597e88b47fd46be5 Mon Sep 17 00:00:00 2001 -From: lifeng68 -Date: Fri, 11 Sep 2020 10:05:04 +0800 -Subject: [PATCH 06/10] SIGTERM: do not catch signal SIGTERM in [lxc monitor] - -Signed-off-by: lifeng68 ---- - src/lxc/attach.c | 2 +- - src/lxc/start.c | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/lxc/attach.c b/src/lxc/attach.c -index b33ff6325..72b3055c7 100644 ---- a/src/lxc/attach.c -+++ b/src/lxc/attach.c -@@ -1228,7 +1228,7 @@ static int isulad_setup_signal_fd(sigset_t *oldmask) - { - int ret; - sigset_t mask; -- const int signals[] = {SIGBUS, SIGILL, SIGSEGV, SIGWINCH}; -+ const int signals[] = {SIGBUS, SIGILL, SIGSEGV, SIGWINCH, SIGTERM}; - - /* Block everything except serious error signals. */ - ret = sigfillset(&mask); -diff --git a/src/lxc/start.c b/src/lxc/start.c -index ab47420f1..50a1a8203 100644 ---- a/src/lxc/start.c -+++ b/src/lxc/start.c -@@ -294,7 +294,7 @@ static int setup_signal_fd(sigset_t *oldmask) - { - int ret; - sigset_t mask; -- const int signals[] = {SIGBUS, SIGILL, SIGSEGV, SIGWINCH}; -+ const int signals[] = {SIGBUS, SIGILL, SIGSEGV, SIGWINCH, SIGTERM}; - - /* Block everything except serious error signals. */ - ret = sigfillset(&mask); --- -2.25.1 - diff --git a/0007-Using-string-type-instead-of-security_context_t-beca.patch b/0007-Using-string-type-instead-of-security_context_t-beca.patch deleted file mode 100644 index e8aed02..0000000 --- a/0007-Using-string-type-instead-of-security_context_t-beca.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 5a8c9b52ad3291feb87c2281e074b2c85c766245 Mon Sep 17 00:00:00 2001 -From: wujing -Date: Fri, 25 Sep 2020 10:21:37 +0800 -Subject: [PATCH 07/10] Using string type instead of security_context_t because - it is deprecated - -Signed-off-by: wujing ---- - src/lxc/lsm/selinux.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lxc/lsm/selinux.c b/src/lxc/lsm/selinux.c -index 837a3da3d..79697c583 100644 ---- a/src/lxc/lsm/selinux.c -+++ b/src/lxc/lsm/selinux.c -@@ -36,7 +36,7 @@ lxc_log_define(selinux, lsm); - */ - static char *selinux_process_label_get(pid_t pid) - { -- security_context_t ctx; -+ char *ctx; - char *label; - - if (getpidcon_raw(pid, &ctx) < 0) { --- -2.25.1 - diff --git a/0008-hook-pass-correct-mount-dir-as-root-to-hook.patch b/0008-hook-pass-correct-mount-dir-as-root-to-hook.patch deleted file mode 100644 index a7e2efe..0000000 --- a/0008-hook-pass-correct-mount-dir-as-root-to-hook.patch +++ /dev/null @@ -1,26 +0,0 @@ -From e8d9c6475eb42fdb1775a465353758f2c5418938 Mon Sep 17 00:00:00 2001 -From: lifeng68 -Date: Sat, 31 Oct 2020 17:38:04 +0800 -Subject: [PATCH 08/10] hook: pass correct mount dir as root to hook - -Signed-off-by: lifeng68 ---- - src/lxc/conf.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 6a25b96ac..3d8713954 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -4785,7 +4785,7 @@ static int do_run_oci_hooks(const char *name, const char *lxcpath, struct lxc_co - return 0; - } - -- rootpath = get_root_path(lc->rootfs.path, lc->rootfs.bdev_type); -+ rootpath = get_root_path(lc->rootfs.path ? lc->rootfs.mount : NULL, lc->rootfs.bdev_type); - if (!rootpath) { - ERROR("Get container %s rootpath failed.", name); - return -1; --- -2.25.1 - diff --git a/0009-cgroup-refact-cgroup-manager-to-single-file.patch b/0009-cgroup-refact-cgroup-manager-to-single-file.patch deleted file mode 100644 index 3ead31c..0000000 --- a/0009-cgroup-refact-cgroup-manager-to-single-file.patch +++ /dev/null @@ -1,4416 +0,0 @@ -From 4592fbcbd0be862cf37a3090f58a4491c430e71a Mon Sep 17 00:00:00 2001 -From: lifeng68 -Date: Mon, 2 Nov 2020 16:53:19 +0800 -Subject: [PATCH 09/10] cgroup: refact cgroup manager to single file - -Signed-off-by: lifeng68 ---- - src/lxc/Makefile.am | 5 +- - src/lxc/cgroups/cgfsng.c | 1030 +--------- - src/lxc/cgroups/isulad_cgfsng.c | 3115 +++++++++++++++++++++++++++++++ - 3 files changed, 3147 insertions(+), 1003 deletions(-) - create mode 100644 src/lxc/cgroups/isulad_cgfsng.c - -diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am -index 0e1ba8da9..dc49c7e22 100644 ---- a/src/lxc/Makefile.am -+++ b/src/lxc/Makefile.am -@@ -107,7 +107,6 @@ liblxc_la_SOURCES = af_unix.c af_unix.h \ - api_extensions.h \ - attach.c attach.h \ - caps.c caps.h \ -- cgroups/cgfsng.c \ - cgroups/cgroup.c cgroups/cgroup.h \ - cgroups/cgroup2_devices.c cgroups/cgroup2_devices.h \ - cgroups/cgroup_utils.c cgroups/cgroup_utils.h \ -@@ -174,7 +173,11 @@ liblxc_la_SOURCES += isulad_utils.c isulad_utils.h \ - json/logger_json_file.c json/logger_json_file.h \ - json/oci_runtime_spec.c json/oci_runtime_spec.h \ - json/read-file.c json/read-file.h \ -+ cgroups/isulad_cgfsng.c \ - exec_commands.c exec_commands.h -+ -+else -+liblxc_la_SOURCES += cgroups/cgfsng.c - endif - - if IS_BIONIC -diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c -index 1ff3d9812..9b9aaf6c3 100644 ---- a/src/lxc/cgroups/cgfsng.c -+++ b/src/lxc/cgroups/cgfsng.c -@@ -214,7 +214,6 @@ static char *read_file(const char *fnam) - return move_ptr(buf); - } - --#ifndef HAVE_ISULAD - /* Taken over modified from the kernel sources. */ - #define NBITS 32 /* bits in uint32_t */ - #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) -@@ -477,14 +476,13 @@ static bool copy_parent_file(const char *parent_cgroup, - value, child_cgroup, file); - return true; - } --#endif -+ - - static inline bool is_unified_hierarchy(const struct hierarchy *h) - { - return h->version == CGROUP2_SUPER_MAGIC; - } - --#ifndef HAVE_ISULAD - /* - * Initialize the cpuset hierarchy in first directory of @cgroup_leaf and set - * cgroup.clone_children so that children inherit settings. Since the -@@ -564,7 +562,6 @@ static int cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, - - return fret; - } --#endif - - /* Given two null-terminated lists of strings, return true if any string is in - * both. -@@ -958,107 +955,6 @@ struct generic_userns_exec_data { - char *path; - }; - --#ifdef HAVE_ISULAD -- --static int isulad_cgroup_tree_remove(struct hierarchy **hierarchies, -- const char *container_cgroup) --{ -- if (!container_cgroup || !hierarchies) -- return 0; -- -- for (int i = 0; hierarchies[i]; i++) { -- struct hierarchy *h = hierarchies[i]; -- int ret; -- -- if (!h->container_full_path) { -- h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, container_cgroup, NULL); -- } -- -- ret = lxc_rm_rf(h->container_full_path); -- if (ret < 0) { -- SYSERROR("Failed to destroy \"%s\"", h->container_full_path); -- return -1; -- } -- -- free_disarm(h->container_full_path); -- } -- -- return 0; --} -- --static int isulad_cgroup_tree_remove_wrapper(void *data) --{ -- struct generic_userns_exec_data *arg = data; -- uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid; -- gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid; -- int ret; -- -- if (!lxc_setgroups(0, NULL) && errno != EPERM) -- return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)"); -- -- ret = setresgid(nsgid, nsgid, nsgid); -- if (ret < 0) -- return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)", -- (int)nsgid, (int)nsgid, (int)nsgid); -- -- ret = setresuid(nsuid, nsuid, nsuid); -- if (ret < 0) -- return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)", -- (int)nsuid, (int)nsuid, (int)nsuid); -- -- return isulad_cgroup_tree_remove(arg->hierarchies, arg->container_cgroup); --} -- --__cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops, -- struct lxc_handler *handler) --{ -- int ret; -- -- if (!ops) { -- ERROR("Called with uninitialized cgroup operations"); -- return false; -- } -- -- if (!ops->hierarchies) { -- return false; -- } -- -- if (!handler) { -- ERROR("Called with uninitialized handler"); -- return false; -- } -- -- if (!handler->conf) { -- ERROR("Called with uninitialized conf"); -- return false; -- } -- --#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX -- ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices); -- if (ret < 0) -- WARN("Failed to detach bpf program from cgroup"); --#endif -- -- if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) { -- struct generic_userns_exec_data wrap = { -- .conf = handler->conf, -- .container_cgroup = ops->container_cgroup, -- .hierarchies = ops->hierarchies, -- .origuid = 0, -- }; -- ret = userns_exec_1(handler->conf, isulad_cgroup_tree_remove_wrapper, -- &wrap, "cgroup_tree_remove_wrapper"); -- } else { -- ret = isulad_cgroup_tree_remove(ops->hierarchies, ops->container_cgroup); -- } -- if (ret < 0) { -- SYSWARN("Failed to destroy cgroups"); -- return false; -- } -- -- return true; --} --#else - static int cgroup_tree_remove(struct hierarchy **hierarchies, - const char *container_cgroup) - { -@@ -1149,15 +1045,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops, - if (ret < 0) - SYSWARN("Failed to destroy cgroups"); - } --#endif - --#ifdef HAVE_ISULAD --__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, -- struct lxc_handler *handler) --{ -- return; --} --#else - __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops, - struct lxc_handler *handler) - { -@@ -1230,15 +1118,6 @@ try_lxc_rm_rf: - WARN("Failed to destroy \"%s\"", h->monitor_full_path); - } - } --#endif -- --#ifdef HAVE_ISULAD --__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, -- struct lxc_handler *handler) --{ -- return true; --} --#else - - static int mkdir_eexist_on_last(const char *dir, mode_t mode) - { -@@ -1398,227 +1277,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops, - ops->monitor_cgroup = move_ptr(monitor_cgroup); - return log_info(true, "The monitor process uses \"%s\" as cgroup", ops->monitor_cgroup); - } --#endif -- --#ifdef HAVE_ISULAD -- --static bool isulad_copy_parent_file(char *path, char *file) --{ -- int ret; -- int len = 0; -- char *value = NULL; -- char *current = NULL; -- char *fpath = NULL; -- char *lastslash = NULL; -- char oldv; -- -- fpath = must_make_path(path, file, NULL); -- current = read_file(fpath); -- -- if (current == NULL) { -- SYSERROR("Failed to read file \"%s\"", fpath); -- free(fpath); -- return false; -- } -- -- if (strcmp(current, "\n") != 0) { -- free(fpath); -- free(current); -- return true; -- } -- -- free(fpath); -- free(current); -- -- lastslash = strrchr(path, '/'); -- if (lastslash == NULL) { -- ERROR("Failed to detect \"/\" in \"%s\"", path); -- return false; -- } -- oldv = *lastslash; -- *lastslash = '\0'; -- fpath = must_make_path(path, file, NULL); -- *lastslash = oldv; -- len = lxc_read_from_file(fpath, NULL, 0); -- if (len <= 0) -- goto on_error; -- -- value = must_realloc(NULL, len + 1); -- ret = lxc_read_from_file(fpath, value, len); -- if (ret != len) -- goto on_error; -- free(fpath); -- -- fpath = must_make_path(path, file, NULL); -- ret = lxc_write_to_file(fpath, value, len, false, 0666); -- if (ret < 0) -- SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath); -- free(fpath); -- free(value); -- return ret >= 0; -- --on_error: -- SYSERROR("Failed to read file \"%s\"", fpath); -- free(fpath); -- free(value); -- return false; --} -- --static bool build_sub_cpuset_cgroup_dir(char *cgpath) --{ -- int ret; -- -- ret = mkdir_p(cgpath, 0755); -- if (ret < 0) { -- if (errno != EEXIST) { -- SYSERROR("Failed to create directory \"%s\"", cgpath); -- return false; -- } -- } -- -- /* copy parent's settings */ -- if (!isulad_copy_parent_file(cgpath, "cpuset.cpus")) { -- SYSERROR("Failed to copy \"cpuset.cpus\" settings"); -- return false; -- } -- -- /* copy parent's settings */ -- if (!isulad_copy_parent_file(cgpath, "cpuset.mems")) { -- SYSERROR("Failed to copy \"cpuset.mems\" settings"); -- return false; -- } -- -- return true; --} -- --static bool isulad_cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname) --{ -- char *cgpath, *slash; -- bool sub_mk_success = false; -- -- if (!string_in_list(h->controllers, "cpuset")) -- return true; -- -- cgname += strspn(cgname, "/"); -- -- slash = strchr(cgname, '/'); -- -- if (slash != NULL) { -- while (slash) { -- *slash = '\0'; -- cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); -- sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath); -- free(cgpath); -- *slash = '/'; -- if (!sub_mk_success) { -- return false; -- } -- slash = strchr(slash + 1, '/'); -- } -- } -- -- cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); -- sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath); -- free(cgpath); -- if (!sub_mk_success) { -- return false; -- } -- -- return true; --} -- --static int isulad_mkdir_eexist_on_last(const char *dir, mode_t mode) --{ -- const char *tmp = dir; -- const char *orig = dir; -- -- do { -- int ret; -- size_t cur_len; -- char *makeme; -- -- dir = tmp + strspn(tmp, "/"); -- tmp = dir + strcspn(dir, "/"); -- -- errno = ENOMEM; -- cur_len = dir - orig; -- makeme = strndup(orig, cur_len); -- if (!makeme) -- return -1; -- -- ret = mkdir(makeme, mode); -- if (ret < 0) { -- if (errno != EEXIST) { -- SYSERROR("Failed to create directory \"%s\"", makeme); -- free(makeme); -- return -1; -- } -- } -- free(makeme); -- -- } while (tmp != dir); - -- return 0; --} -- --static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, int errfd) --{ -- int ret; -- __do_free char *path = NULL; -- -- path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); -- -- if (file_exists(path)) { // it must not already exist -- ERROR("Cgroup path \"%s\" already exist.", path); -- lxc_write_error_message(errfd, "%s:%d: Cgroup path \"%s\" already exist.", -- __FILE__, __LINE__, path); -- return false; -- } -- -- if (!isulad_cg_legacy_handle_cpuset_hierarchy(h, cgname)) { -- ERROR("Failed to handle legacy cpuset controller"); -- return false; -- } -- -- ret = isulad_mkdir_eexist_on_last(path, 0755); -- if (ret < 0) { -- ERROR("Failed to create cgroup \"%s\"", path); -- return false; -- } -- -- h->cgfd_con = lxc_open_dirfd(path); -- if (h->cgfd_con < 0) -- return log_error_errno(false, errno, "Failed to open %s", path); -- -- if (h->container_full_path == NULL) { -- h->container_full_path = move_ptr(path); -- } -- -- return true; --} -- --/* isulad: create hierarchies path, if fail, return the error */ --__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, -- struct lxc_handler *handler) --{ -- int i; -- char *container_cgroup = ops->container_cgroup; -- -- if (!container_cgroup) { -- ERROR("cgfsng_create container_cgroup is invalid"); -- return false; -- } -- -- for (i = 0; ops->hierarchies[i]; i++) { -- if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, ops->errfd)) { -- SYSERROR("Failed to create %s", ops->hierarchies[i]->container_full_path); -- return false; -- } -- } -- -- return true; --} --#else - /* - * Try to create the same cgroup in all hierarchies. Start with cgroup_pattern; - * next cgroup_pattern-1, -2, ..., -999. -@@ -1698,15 +1357,7 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops, - INFO("The container process uses \"%s\" as cgroup", ops->container_cgroup); - return true; - } --#endif - --#ifdef HAVE_ISULAD --__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, -- struct lxc_handler *handler) --{ -- return true; --} --#else - __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, - struct lxc_handler *handler) - { -@@ -1758,58 +1409,7 @@ __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops, - - return true; - } --#endif - --#ifdef HAVE_ISULAD --__cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops, -- struct lxc_handler *handler) --{ -- int len; -- char pidstr[INTTYPE_TO_STRLEN(pid_t)]; -- -- if (!ops) -- return ret_set_errno(false, ENOENT); -- -- if (!ops->hierarchies) -- return true; -- -- if (!ops->container_cgroup) -- return ret_set_errno(false, ENOENT); -- -- if (!handler || !handler->conf) -- return ret_set_errno(false, EINVAL); -- -- len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid); -- -- for (int i = 0; ops->hierarchies[i]; i++) { -- int ret; -- char *fullpath; -- int retry_count = 0; -- int max_retry = 10; -- -- fullpath = must_make_path(ops->hierarchies[i]->container_full_path, -- "cgroup.procs", NULL); --retry: -- ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666); -- if (ret != 0) { -- if (retry_count < max_retry) { -- SYSERROR("Failed to enter cgroup \"%s\" with retry count:%d", fullpath, retry_count); -- (void)isulad_cg_legacy_handle_cpuset_hierarchy(ops->hierarchies[i], ops->container_cgroup); -- (void)isulad_mkdir_eexist_on_last(ops->hierarchies[i]->container_full_path, 0755); -- usleep(100 * 1000); /* 100 millisecond */ -- retry_count++; -- goto retry; -- } -- SYSERROR("Failed to enter cgroup \"%s\"", fullpath); -- free(fullpath); -- return false; -- } -- free(fullpath); -- } -- -- return true; --} --#else - __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops, - struct lxc_handler *handler) - { -@@ -1841,7 +1441,6 @@ __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops, - - return true; - } --#endif - - static int fchowmodat(int dirfd, const char *path, uid_t chown_uid, - gid_t chown_gid, mode_t chmod_mode) -@@ -2056,234 +1655,39 @@ static int __cg_mount_direct(int type, struct hierarchy *h, - flags |= MS_RELATIME; - - if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO) -- flags |= MS_RDONLY; -- -- if (h->version != CGROUP2_SUPER_MAGIC) { -- controllers = lxc_string_join(",", (const char **)h->controllers, false); -- if (!controllers) -- return -ENOMEM; -- fstype = "cgroup"; -- } -- -- ret = mount("cgroup", controllerpath, fstype, flags, controllers); -- if (ret < 0) -- return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s", -- controllerpath, fstype); -- -- DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype); -- return 0; --} -- --static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h, -- const char *controllerpath) --{ -- return __cg_mount_direct(type, h, controllerpath); --} -- --static inline int cg_mount_cgroup_full(int type, struct hierarchy *h, -- const char *controllerpath) --{ -- if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) -- return 0; -- -- return __cg_mount_direct(type, h, controllerpath); --} -- --#ifdef HAVE_ISULAD --__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, -- struct lxc_handler *handler, -- const char *root, int type) --{ -- int i, ret; -- char *tmpfspath = NULL; -- char *systemdpath = NULL; -- char *unifiedpath = NULL; -- bool has_cgns = false, retval = false, wants_force_mount = false; -- char **merged = NULL; -- -- if ((type & LXC_AUTO_CGROUP_MASK) == 0) -- return true; -- -- if (type & LXC_AUTO_CGROUP_FORCE) { -- type &= ~LXC_AUTO_CGROUP_FORCE; -- wants_force_mount = true; -- } -- -- if (!wants_force_mount) { -- if (!lxc_list_empty(&handler->conf->keepcaps)) -- wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps); -- else -- wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps); -- } -- -- has_cgns = cgns_supported(); -- if (has_cgns && !wants_force_mount) -- return true; -- -- if (type == LXC_AUTO_CGROUP_NOSPEC) -- type = LXC_AUTO_CGROUP_MIXED; -- else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC) -- type = LXC_AUTO_CGROUP_FULL_MIXED; -- -- /* Mount tmpfs */ -- tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL); -- if (mkdir_p(tmpfspath, 0755) < 0) { -- ERROR("Failed to create directory: %s", tmpfspath); -- goto on_error; -- } -- ret = safe_mount(NULL, tmpfspath, "tmpfs", -- MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, -- "size=10240k,mode=755", root, handler->conf->lsm_se_mount_context); -- if (ret < 0) -- goto on_error; -- -- for (i = 0; ops->hierarchies[i]; i++) { -- char *controllerpath = NULL; -- char *path2 = NULL; -- struct hierarchy *h = ops->hierarchies[i]; -- char *controller = strrchr(h->mountpoint, '/'); -- -- if (!controller) -- continue; -- controller++; -- -- // isulad: symlink subcgroup -- if (strchr(controller, ',') != NULL) { -- int pret; -- pret = lxc_append_string(&merged, controller); -- if (pret < 0) -- goto on_error; -- } -- -- controllerpath = must_make_path(tmpfspath, controller, NULL); -- if (dir_exists(controllerpath)) { -- free(controllerpath); -- continue; -- } -- -- ret = mkdir(controllerpath, 0755); -- if (ret < 0) { -- SYSERROR("Error creating cgroup path: %s", controllerpath); -- free(controllerpath); -- goto on_error; -- } -- -- if (has_cgns && wants_force_mount) { -- /* If cgroup namespaces are supported but the container -- * will not have CAP_SYS_ADMIN after it has started we -- * need to mount the cgroups manually. -- */ -- ret = cg_mount_in_cgroup_namespace(type, h, controllerpath); -- free(controllerpath); -- if (ret < 0) -- goto on_error; -- -- continue; -- } -- -- ret = cg_mount_cgroup_full(type, h, controllerpath); -- if (ret < 0) { -- free(controllerpath); -- goto on_error; -- } -- -- if (!cg_mount_needs_subdirs(type)) { -- free(controllerpath); -- continue; -- } -- -- // isulad: ignore ops->container_cgroup so we will not see directory lxc after /sys/fs/cgroup/xxx in container, -- // isulad: ignore h->container_base_path so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container -- path2 = must_make_path(controllerpath, NULL); -- ret = mkdir_p(path2, 0755); -- if (ret < 0) { -- free(controllerpath); -- free(path2); -- goto on_error; -- } -- -- ret = cg_legacy_mount_controllers(type, h, controllerpath, -- path2, ops->container_cgroup); -- free(controllerpath); -- free(path2); -- if (ret < 0) -- goto on_error; -- } -- -- // isulad: symlink subcgroup -- if (merged) { -- char **mc = NULL; -- for (mc = merged; *mc; mc++) { -- char *token = NULL; -- char *copy = must_copy_string(*mc); -- lxc_iterate_parts(token, copy, ",") { -- int mret; -- char *link; -- link = must_make_path(tmpfspath, token, NULL); -- mret = symlink(*mc, link); -- if (mret < 0 && errno != EEXIST) { -- SYSERROR("Failed to create link %s for target %s", link, *mc); -- free(copy); -- free(link); -- goto on_error; -- } -- free(link); -- } -- free(copy); -- } -- } -- -+ flags |= MS_RDONLY; - -- // isulad: remount /sys/fs/cgroup to readonly -- if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_RO) { -- ret = mount(tmpfspath, tmpfspath, "bind", -- MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_RDONLY|MS_BIND|MS_REMOUNT, NULL); -- if (ret < 0) { -- SYSERROR("Failed to remount /sys/fs/cgroup."); -- goto on_error; -- } -+ if (h->version != CGROUP2_SUPER_MAGIC) { -+ controllers = lxc_string_join(",", (const char **)h->controllers, false); -+ if (!controllers) -+ return -ENOMEM; -+ fstype = "cgroup"; - } - -- // isulad: remount /sys/fs/cgroup/systemd to readwrite for system container -- if (handler->conf->systemd != NULL && strcmp(handler->conf->systemd, "true") == 0) -- { -- unifiedpath = must_make_path(root, "/sys/fs/cgroup/unified", NULL); -- if (dir_exists(unifiedpath)) -- { -- ret = umount2(unifiedpath, MNT_DETACH); -- if (ret < 0) -- { -- SYSERROR("Failed to umount /sys/fs/cgroup/unified."); -- goto on_error; -- } -- } -+ ret = mount("cgroup", controllerpath, fstype, flags, controllers); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s", -+ controllerpath, fstype); - -- systemdpath = must_make_path(root, "/sys/fs/cgroup/systemd", NULL); -- ret = mount(systemdpath, systemdpath, "bind", -- MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME | MS_BIND | MS_REMOUNT, NULL); -- if (ret < 0) -- { -- SYSERROR("Failed to remount /sys/fs/cgroup/systemd."); -- goto on_error; -- } -- } -+ DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype); -+ return 0; -+} - -- retval = true; -+static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h, -+ const char *controllerpath) -+{ -+ return __cg_mount_direct(type, h, controllerpath); -+} - --on_error: -- free(tmpfspath); -- if (systemdpath != NULL) -- { -- free(systemdpath); -- } -- if (unifiedpath != NULL) -- { -- free(unifiedpath); -- } -- lxc_free_array((void **)merged, free); -- return retval; -+static inline int cg_mount_cgroup_full(int type, struct hierarchy *h, -+ const char *controllerpath) -+{ -+ if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) -+ return 0; -+ -+ return __cg_mount_direct(type, h, controllerpath); - } --#else -+ - __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, - struct lxc_handler *handler, - const char *root, int type) -@@ -2396,7 +1800,6 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops, - - return true; - } --#endif - - /* Only root needs to escape to the cgroup of its init. */ - __cgfsng_ops static bool cgfsng_escape(const struct cgroup_ops *ops, -@@ -2647,34 +2050,11 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops, - return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"", - controller ? controller : "(null)"); - --#ifdef HAVE_ISULAD -- if (!h->container_full_path) -- h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL); --#endif -- - return h->container_full_path - ? h->container_full_path + strlen(h->mountpoint) - : NULL; - } - --#ifdef HAVE_ISULAD --__cgfsng_ops static const char *cgfsng_get_cgroup_full_path(struct cgroup_ops *ops, -- const char *controller) --{ -- struct hierarchy *h; -- -- h = get_hierarchy(ops, controller); -- if (!h) -- return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"", -- controller ? controller : "(null)"); -- -- if (!h->container_full_path) -- h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL); -- -- return h->container_full_path; --} --#endif -- - /* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path, - * which must be freed by the caller. - */ -@@ -2981,44 +2361,6 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops, - return true; - } - --#ifdef HAVE_ISULAD --__cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename, -- char *value, size_t len, const char *name, -- const char *lxcpath) --{ -- int ret = -1; -- size_t controller_len; -- char *controller, *p, *path; -- struct hierarchy *h; -- -- controller_len = strlen(filename); -- controller = alloca(controller_len + 1); -- (void)strlcpy(controller, filename, controller_len + 1); -- -- p = strchr(controller, '.'); -- if (p) -- *p = '\0'; -- -- const char *ori_path = ops->get_cgroup(ops, controller); -- if (ori_path == NULL) { -- ERROR("Failed to get cgroup path:%s", controller); -- return -1; -- } -- path = safe_strdup(ori_path); -- -- h = get_hierarchy(ops, controller); -- if (h) { -- char *fullpath; -- -- fullpath = build_full_cgpath_from_monitorpath(h, path, filename); -- ret = lxc_read_from_file(fullpath, value, len); -- free(fullpath); -- } -- free(path); -- -- return ret; --} --#else - /* Called externally (i.e. from 'lxc-cgroup') to query cgroup limits. Here we - * don't have a cgroup_data set up, so we ask the running container through the - * commands API for the cgroup path. -@@ -3056,7 +2398,6 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename, - - return ret; - } --#endif - - static int device_cgroup_parse_access(struct device_item *device, const char *val) - { -@@ -3170,44 +2511,6 @@ int device_cgroup_rule_parse(struct device_item *device, const char *key, - return device_cgroup_parse_access(device, ++val); - } - --#ifdef HAVE_ISULAD --__cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, -- const char *filename, const char *value, -- const char *name, const char *lxcpath) --{ -- int ret = -1; -- size_t controller_len; -- char *controller, *p, *path; -- struct hierarchy *h; -- -- controller_len = strlen(filename); -- controller = alloca(controller_len + 1); -- (void)strlcpy(controller, filename, controller_len + 1); -- -- p = strchr(controller, '.'); -- if (p) -- *p = '\0'; -- -- const char *ori_path = ops->get_cgroup(ops, controller); -- if (ori_path == NULL) { -- ERROR("Failed to get cgroup path:%s", controller); -- return -1; -- } -- path = safe_strdup(ori_path); -- -- h = get_hierarchy(ops, controller); -- if (h) { -- char *fullpath; -- -- fullpath = build_full_cgpath_from_monitorpath(h, path, filename); -- ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); -- free(fullpath); -- } -- free(path); -- -- return ret; --} --#else - /* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits. Here we - * don't have a cgroup_data set up, so we ask the running container through the - * commands API for the cgroup path. -@@ -3260,7 +2563,6 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops, - - return ret; - } --#endif - - /* take devices cgroup line - * /dev/foo rwx -@@ -3352,7 +2654,6 @@ static int convert_devpath(const char *invalue, char *dest) - return 0; - } - --#ifndef HAVE_ISULAD - /* Called from setup_limits - here we have the container's cgroup_data because - * we created the cgroups. - */ -@@ -3385,212 +2686,7 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, - - return lxc_write_openat(h->container_full_path, filename, value, strlen(value)); - } --#endif -- --#ifdef HAVE_ISULAD --/* Called from setup_limits - here we have the container's cgroup_data because -- * we created the cgroups. -- */ --static int isulad_cg_legacy_get_data(struct cgroup_ops *ops, const char *filename, -- char *value, size_t len) --{ -- char *fullpath = NULL; -- char *p = NULL; -- struct hierarchy *h = NULL; -- int ret = 0; -- char *controller = NULL; -- -- len = strlen(filename); -- if (SIZE_MAX - 1 < len) { -- errno = EINVAL; -- return -1; -- } -- controller = calloc(1, len + 1); -- if (controller == NULL) { -- errno = ENOMEM; -- return -1; -- } -- (void)strlcpy(controller, filename, len + 1); -- -- p = strchr(controller, '.'); -- if (p) -- *p = '\0'; -- -- -- h = get_hierarchy(ops, controller); -- if (!h) { -- ERROR("Failed to setup limits for the \"%s\" controller. " -- "The controller seems to be unused by \"cgfsng\" cgroup " -- "driver or not enabled on the cgroup hierarchy", -- controller); -- errno = ENOENT; -- free(controller); -- return -ENOENT; -- } -- -- fullpath = must_make_path(h->container_full_path, filename, NULL); -- ret = lxc_read_from_file(fullpath, value, len); -- free(fullpath); -- free(controller); -- return ret; --} -- --static int isulad_cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, -- const char *value) --{ -- size_t len; -- char *fullpath, *p; -- /* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */ -- char converted_value[50]; -- struct hierarchy *h; -- int ret = 0; -- char *controller = NULL; -- int retry_count = 0; -- int max_retry = 10; -- char *container_cgroup = ops->container_cgroup; -- -- len = strlen(filename); -- controller = alloca(len + 1); -- (void)strlcpy(controller, filename, len + 1); -- -- p = strchr(controller, '.'); -- if (p) -- *p = '\0'; -- -- if (strcmp("devices.allow", filename) == 0 && value[0] == '/') { -- ret = convert_devpath(value, converted_value); -- if (ret < 0) -- return ret; -- value = converted_value; -- } -- -- h = get_hierarchy(ops, controller); -- if (!h) { -- ERROR("Failed to setup limits for the \"%s\" controller. " -- "The controller seems to be unused by \"cgfsng\" cgroup " -- "driver or not enabled on the cgroup hierarchy", -- controller); -- errno = ENOENT; -- return -ENOENT; -- } -- -- fullpath = must_make_path(h->container_full_path, filename, NULL); -- --retry: -- ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); -- if (ret != 0) { -- if (retry_count < max_retry) { -- SYSERROR("setting cgroup config for ready process caused \"failed to write %s to %s\".", value, fullpath); -- (void)isulad_cg_legacy_handle_cpuset_hierarchy(h, container_cgroup); -- (void)isulad_mkdir_eexist_on_last(h->container_full_path, 0755); -- usleep(100 * 1000); /* 100 millisecond */ -- retry_count++; -- goto retry; -- } -- lxc_write_error_message(ops->errfd, -- "%s:%d: setting cgroup config for ready process caused \"failed to write %s to %s: %s\".", -- __FILE__, __LINE__, value, fullpath, strerror(errno)); -- } -- free(fullpath); -- return ret; --} -- --__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, -- struct lxc_conf *conf, -- bool do_devices) --{ -- __do_free struct lxc_list *sorted_cgroup_settings = NULL; -- struct lxc_list *cgroup_settings = &conf->cgroup; -- struct lxc_list *iterator, *next; -- struct lxc_cgroup *cg; -- bool ret = false; -- char value[21 + 1] = { 0 }; -- long long int readvalue, setvalue; -- -- if (!ops) -- return ret_set_errno(false, ENOENT); -- -- if (!conf) -- return ret_set_errno(false, EINVAL); -- -- cgroup_settings = &conf->cgroup; -- if (lxc_list_empty(cgroup_settings)) -- return true; -- -- if (!ops->hierarchies) -- return ret_set_errno(false, EINVAL); -- -- sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings); -- if (!sorted_cgroup_settings) -- return false; -- -- lxc_list_for_each(iterator, sorted_cgroup_settings) { -- cg = iterator->elem; -- -- if (do_devices == !strncmp("devices", cg->subsystem, 7)) { -- const char *cgvalue = cg->value; -- if (strcmp(cg->subsystem, "files.limit") == 0) { -- if (lxc_safe_long_long(cgvalue, &setvalue) != 0) { -- SYSERROR("Invalid integer value %s", cgvalue); -- goto out; -- } -- if (setvalue <= 0) { -- cgvalue = "max"; -- } -- } -- if (isulad_cg_legacy_set_data(ops, cg->subsystem, cgvalue)) { -- if (do_devices && (errno == EACCES || errno == EPERM)) { -- SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue); -- continue; -- } -- SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue); -- goto out; -- } -- DEBUG("Set controller \"%s\" set to \"%s\"", cg->subsystem, cgvalue); -- } -- -- // isulad: check cpu shares -- if (strcmp(cg->subsystem, "cpu.shares") == 0) { -- if (isulad_cg_legacy_get_data(ops, cg->subsystem, value, sizeof(value) - 1) < 0) { -- SYSERROR("Error get %s", cg->subsystem); -- goto out; -- } -- trim(value); -- if (lxc_safe_long_long(cg->value, &setvalue) != 0) { -- SYSERROR("Invalid value %s", cg->value); -- goto out; -- } -- if (lxc_safe_long_long(value, &readvalue) != 0) { -- SYSERROR("Invalid value %s", value); -- goto out; -- } -- if (setvalue > readvalue) { -- ERROR("The maximum allowed cpu-shares is %s", value); -- lxc_write_error_message(ops->errfd, -- "%s:%d: setting cgroup config for ready process caused \"The maximum allowed cpu-shares is %s\".", -- __FILE__, __LINE__, value); -- goto out; -- } else if (setvalue < readvalue) { -- ERROR("The minimum allowed cpu-shares is %s", value); -- lxc_write_error_message(ops->errfd, -- "%s:%d: setting cgroup config for ready process caused \"The minimum allowed cpu-shares is %s\".", -- __FILE__, __LINE__, value); -- goto out; -- } -- } -- } -- -- ret = true; -- INFO("Limits for the legacy cgroup hierarchies have been setup"); --out: -- lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) { -- lxc_list_del(iterator); -- free(iterator); -- } - -- return ret; --} --#else - __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops, - struct lxc_conf *conf, - bool do_devices) -@@ -3644,7 +2740,6 @@ out: - - return ret; - } --#endif - - /* - * Some of the parsing logic comes from the original cgroup device v1 -@@ -3856,12 +2951,6 @@ bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup) - return true; - } - --#ifdef HAVE_ISULAD --__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) --{ -- return true; --} --#else - __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) - { - if (!ops) -@@ -3869,7 +2958,6 @@ __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) - - return __cgfsng_delegate_controllers(ops, ops->monitor_cgroup); - } --#endif - - __cgfsng_ops bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops) - { -@@ -4019,22 +3107,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg - - trim(base_cgroup); - prune_init_scope(base_cgroup); --#ifdef HAVE_ISULAD -- /* isulad: do not test writeable, if we run isulad in docker without cgroup namespace. -- * the base_cgroup will be docker/XXX.., mountpoint+base_cgroup may be not exist */ -- -- /* -- * reason:base cgroup may be started with /system.slice when cg_hybrid_init -- * read /proc/1/cgroup on host, and cgroup init will set all containers -- * cgroup path under /sys/fs/cgroup//system.slice/xxx/lxc -- * directory, this is not consistent with docker. The default cgroup path -- * should be under /sys/fs/cgroup//lxc directory. -- */ - -- if (strlen(base_cgroup) > 1 && base_cgroup[0] == '/') { -- base_cgroup[1] = '\0'; -- } --#else - bool writeable; - if (type == CGROUP2_SUPER_MAGIC) - writeable = test_writeable_v2(mountpoint, base_cgroup); -@@ -4044,7 +3117,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg - TRACE("The %s group is not writeable", base_cgroup); - continue; - } --#endif -+ - if (type == CGROUP2_SUPER_MAGIC) { - char *cgv2_ctrl_path; - -@@ -4197,44 +3270,6 @@ static int cg_init(struct cgroup_ops *ops, struct lxc_conf *conf) - return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map)); - } - --#ifdef HAVE_ISULAD --__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf) --{ -- const char *cgroup_pattern; -- const char *cgroup_tree; -- __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL; -- size_t len; -- -- if (!ops) -- return ret_set_errno(-1, ENOENT); -- -- /* copy system-wide cgroup information */ -- cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern"); -- if (cgroup_pattern && strcmp(cgroup_pattern, "") != 0) -- ops->cgroup_pattern = must_copy_string(cgroup_pattern); -- -- if (conf->cgroup_meta.dir) { -- cgroup_tree = conf->cgroup_meta.dir; -- container_cgroup = must_concat(&len, cgroup_tree, "/", conf->name, NULL); -- } else if (ops->cgroup_pattern) { -- __cgroup_tree = lxc_string_replace("%n", conf->name, ops->cgroup_pattern); -- if (!__cgroup_tree) -- return ret_set_errno(-1, ENOMEM); -- -- cgroup_tree = __cgroup_tree; -- container_cgroup = must_concat(&len, cgroup_tree, NULL); -- } else { -- cgroup_tree = NULL; -- container_cgroup = must_concat(&len, conf->name, NULL); -- } -- if (!container_cgroup) -- return ret_set_errno(-1, ENOMEM); -- -- ops->container_cgroup = move_ptr(container_cgroup); -- -- return 0; --} --#else - __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf) - { - const char *cgroup_pattern; -@@ -4249,7 +3284,6 @@ __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf - - return 0; - } --#endif - - struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) - { -@@ -4266,12 +3300,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) - return NULL; - - cgfsng_ops->data_init = cgfsng_data_init; --#ifdef HAVE_ISULAD -- cgfsng_ops->errfd = conf ? conf->errpipe[1] : -1; -- cgfsng_ops->payload_destroy = isulad_cgfsng_payload_destroy; --#else - cgfsng_ops->payload_destroy = cgfsng_payload_destroy; --#endif - cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy; - cgfsng_ops->monitor_create = cgfsng_monitor_create; - cgfsng_ops->monitor_enter = cgfsng_monitor_enter; -@@ -4284,9 +3313,6 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) - cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies; - cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies; - cgfsng_ops->get_cgroup = cgfsng_get_cgroup; --#ifdef HAVE_ISULAD -- cgfsng_ops->get_cgroup_full_path = cgfsng_get_cgroup_full_path; --#endif - cgfsng_ops->get = cgfsng_get; - cgfsng_ops->set = cgfsng_set; - cgfsng_ops->freeze = cgfsng_freeze; -diff --git a/src/lxc/cgroups/isulad_cgfsng.c b/src/lxc/cgroups/isulad_cgfsng.c -new file mode 100644 -index 000000000..82a4333f3 ---- /dev/null -+++ b/src/lxc/cgroups/isulad_cgfsng.c -@@ -0,0 +1,3115 @@ -+/****************************************************************************** -+ * Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved. -+ * Author: lifeng -+ * Create: 2020-11-02 -+ * Description: provide container definition -+ * lxc: linux Container library -+ * This library is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * This library is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with this library; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ ******************************************************************************/ -+ -+#ifndef _GNU_SOURCE -+#define _GNU_SOURCE 1 -+#endif -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "af_unix.h" -+#include "caps.h" -+#include "cgroup.h" -+#include "cgroup2_devices.h" -+#include "cgroup_utils.h" -+#include "commands.h" -+#include "conf.h" -+#include "config.h" -+#include "log.h" -+#include "macro.h" -+#include "mainloop.h" -+#include "memory_utils.h" -+#include "storage/storage.h" -+#include "utils.h" -+ -+#ifndef HAVE_STRLCPY -+#include "include/strlcpy.h" -+#endif -+ -+#ifndef HAVE_STRLCAT -+#include "include/strlcat.h" -+#endif -+ -+lxc_log_define(isulad_cgfsng, cgroup); -+ -+/* Given a pointer to a null-terminated array of pointers, realloc to add one -+ * entry, and point the new entry to NULL. Do not fail. Return the index to the -+ * second-to-last entry - that is, the one which is now available for use -+ * (keeping the list null-terminated). -+ */ -+static int append_null_to_list(void ***list) -+{ -+ int newentry = 0; -+ -+ if (*list) -+ for (; (*list)[newentry]; newentry++) -+ ; -+ -+ *list = must_realloc(*list, (newentry + 2) * sizeof(void **)); -+ (*list)[newentry + 1] = NULL; -+ return newentry; -+} -+ -+/* Given a null-terminated array of strings, check whether @entry is one of the -+ * strings. -+ */ -+static bool string_in_list(char **list, const char *entry) -+{ -+ if (!list) -+ return false; -+ -+ for (int i = 0; list[i]; i++) -+ if (strcmp(list[i], entry) == 0) -+ return true; -+ -+ return false; -+} -+ -+/* Return a copy of @entry prepending "name=", i.e. turn "systemd" into -+ * "name=systemd". Do not fail. -+ */ -+static char *cg_legacy_must_prefix_named(char *entry) -+{ -+ size_t len; -+ char *prefixed; -+ -+ len = strlen(entry); -+ prefixed = must_realloc(NULL, len + 6); -+ -+ memcpy(prefixed, "name=", STRLITERALLEN("name=")); -+ memcpy(prefixed + STRLITERALLEN("name="), entry, len); -+ prefixed[len + 5] = '\0'; -+ -+ return prefixed; -+} -+ -+/* Append an entry to the clist. Do not fail. @clist must be NULL the first time -+ * we are called. -+ * -+ * We also handle named subsystems here. Any controller which is not a kernel -+ * subsystem, we prefix "name=". Any which is both a kernel and named subsystem, -+ * we refuse to use because we're not sure which we have here. -+ * (TODO: We could work around this in some cases by just remounting to be -+ * unambiguous, or by comparing mountpoint contents with current cgroup.) -+ * -+ * The last entry will always be NULL. -+ */ -+static void must_append_controller(char **klist, char **nlist, char ***clist, -+ char *entry) -+{ -+ int newentry; -+ char *copy; -+ -+ if (string_in_list(klist, entry) && string_in_list(nlist, entry)) { -+ ERROR("Refusing to use ambiguous controller \"%s\"", entry); -+ ERROR("It is both a named and kernel subsystem"); -+ return; -+ } -+ -+ newentry = append_null_to_list((void ***)clist); -+ -+ if (strncmp(entry, "name=", 5) == 0) -+ copy = must_copy_string(entry); -+ else if (string_in_list(klist, entry)) -+ copy = must_copy_string(entry); -+ else -+ copy = cg_legacy_must_prefix_named(entry); -+ -+ (*clist)[newentry] = copy; -+} -+ -+/* Given a handler's cgroup data, return the struct hierarchy for the controller -+ * @c, or NULL if there is none. -+ */ -+struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller) -+{ -+ if (!ops->hierarchies) -+ return log_trace_errno(NULL, errno, "There are no useable cgroup controllers"); -+ -+ for (int i = 0; ops->hierarchies[i]; i++) { -+ if (!controller) { -+ /* This is the empty unified hierarchy. */ -+ if (ops->hierarchies[i]->controllers && -+ !ops->hierarchies[i]->controllers[0]) -+ return ops->hierarchies[i]; -+ continue; -+ } else if (pure_unified_layout(ops) && -+ strcmp(controller, "devices") == 0) { -+ if (ops->unified->bpf_device_controller) -+ return ops->unified; -+ break; -+ } -+ -+ if (string_in_list(ops->hierarchies[i]->controllers, controller)) -+ return ops->hierarchies[i]; -+ } -+ -+ if (controller) -+ WARN("There is no useable %s controller", controller); -+ else -+ WARN("There is no empty unified cgroup hierarchy"); -+ -+ return ret_set_errno(NULL, ENOENT); -+} -+ -+#define BATCH_SIZE 50 -+static void batch_realloc(char **mem, size_t oldlen, size_t newlen) -+{ -+ int newbatches = (newlen / BATCH_SIZE) + 1; -+ int oldbatches = (oldlen / BATCH_SIZE) + 1; -+ -+ if (!*mem || newbatches > oldbatches) -+ *mem = must_realloc(*mem, newbatches * BATCH_SIZE); -+} -+ -+static void append_line(char **dest, size_t oldlen, char *new, size_t newlen) -+{ -+ size_t full = oldlen + newlen; -+ -+ batch_realloc(dest, oldlen, full + 1); -+ -+ memcpy(*dest + oldlen, new, newlen + 1); -+} -+ -+/* Slurp in a whole file */ -+static char *read_file(const char *fnam) -+{ -+ __do_free char *buf = NULL, *line = NULL; -+ __do_fclose FILE *f = NULL; -+ size_t len = 0, fulllen = 0; -+ int linelen; -+ -+ f = fopen(fnam, "re"); -+ if (!f) -+ return NULL; -+ -+ while ((linelen = getline(&line, &len, f)) != -1) { -+ append_line(&buf, fulllen, line, linelen); -+ fulllen += linelen; -+ } -+ -+ return move_ptr(buf); -+} -+ -+static inline bool is_unified_hierarchy(const struct hierarchy *h) -+{ -+ return h->version == CGROUP2_SUPER_MAGIC; -+} -+ -+/* Given two null-terminated lists of strings, return true if any string is in -+ * both. -+ */ -+static bool controller_lists_intersect(char **l1, char **l2) -+{ -+ if (!l1 || !l2) -+ return false; -+ -+ for (int i = 0; l1[i]; i++) -+ if (string_in_list(l2, l1[i])) -+ return true; -+ -+ return false; -+} -+ -+/* For a null-terminated list of controllers @clist, return true if any of those -+ * controllers is already listed the null-terminated list of hierarchies @hlist. -+ * Realistically, if one is present, all must be present. -+ */ -+static bool controller_list_is_dup(struct hierarchy **hlist, char **clist) -+{ -+ if (!hlist) -+ return false; -+ -+ for (int i = 0; hlist[i]; i++) -+ if (controller_lists_intersect(hlist[i]->controllers, clist)) -+ return true; -+ -+ return false; -+} -+ -+/* Return true if the controller @entry is found in the null-terminated list of -+ * hierarchies @hlist. -+ */ -+static bool controller_found(struct hierarchy **hlist, char *entry) -+{ -+ if (!hlist) -+ return false; -+ -+ for (int i = 0; hlist[i]; i++) -+ if (string_in_list(hlist[i]->controllers, entry)) -+ return true; -+ -+ return false; -+} -+ -+/* Return true if all of the controllers which we require have been found. The -+ * required list is freezer and anything in lxc.cgroup.use. -+ */ -+static bool all_controllers_found(struct cgroup_ops *ops) -+{ -+ struct hierarchy **hlist; -+ -+ if (!ops->cgroup_use) -+ return true; -+ -+ hlist = ops->hierarchies; -+ for (char **cur = ops->cgroup_use; cur && *cur; cur++) -+ if (!controller_found(hlist, *cur)) -+ return log_error(false, "No %s controller mountpoint found", *cur); -+ -+ return true; -+} -+ -+/* Get the controllers from a mountinfo line There are other ways we could get -+ * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we -+ * could parse the mount options. But we simply assume that the mountpoint must -+ * be /sys/fs/cgroup/controller-list -+ */ -+static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line, -+ int type) -+{ -+ /* The fourth field is /sys/fs/cgroup/comma-delimited-controller-list -+ * for legacy hierarchies. -+ */ -+ __do_free_string_list char **aret = NULL; -+ int i; -+ char *p2, *tok; -+ char *p = line, *sep = ","; -+ -+ for (i = 0; i < 4; i++) { -+ p = strchr(p, ' '); -+ if (!p) -+ return NULL; -+ p++; -+ } -+ -+ /* Note, if we change how mountinfo works, then our caller will need to -+ * verify /sys/fs/cgroup/ in this field. -+ */ -+ if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0) -+ return log_error(NULL, "Found hierarchy not under " DEFAULT_CGROUP_MOUNTPOINT ": \"%s\"", p); -+ -+ p += 15; -+ p2 = strchr(p, ' '); -+ if (!p2) -+ return log_error(NULL, "Corrupt mountinfo"); -+ *p2 = '\0'; -+ -+ if (type == CGROUP_SUPER_MAGIC) { -+ __do_free char *dup = NULL; -+ -+ /* strdup() here for v1 hierarchies. Otherwise -+ * lxc_iterate_parts() will destroy mountpoints such as -+ * "/sys/fs/cgroup/cpu,cpuacct". -+ */ -+ dup = must_copy_string(p); -+ if (!dup) -+ return NULL; -+ -+ lxc_iterate_parts (tok, dup, sep) -+ must_append_controller(klist, nlist, &aret, tok); -+ } -+ *p2 = ' '; -+ -+ return move_ptr(aret); -+} -+ -+static char **cg_unified_make_empty_controller(void) -+{ -+ __do_free_string_list char **aret = NULL; -+ int newentry; -+ -+ newentry = append_null_to_list((void ***)&aret); -+ aret[newentry] = NULL; -+ return move_ptr(aret); -+} -+ -+static char **cg_unified_get_controllers(const char *file) -+{ -+ __do_free char *buf = NULL; -+ __do_free_string_list char **aret = NULL; -+ char *sep = " \t\n"; -+ char *tok; -+ -+ buf = read_file(file); -+ if (!buf) -+ return NULL; -+ -+ lxc_iterate_parts(tok, buf, sep) { -+ int newentry; -+ char *copy; -+ -+ newentry = append_null_to_list((void ***)&aret); -+ copy = must_copy_string(tok); -+ aret[newentry] = copy; -+ } -+ -+ return move_ptr(aret); -+} -+ -+static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char *mountpoint, -+ char *container_base_path, int type) -+{ -+ struct hierarchy *new; -+ int newentry; -+ -+ new = zalloc(sizeof(*new)); -+ new->controllers = clist; -+ new->mountpoint = mountpoint; -+ new->container_base_path = container_base_path; -+ new->version = type; -+ new->cgfd_con = -EBADF; -+ new->cgfd_mon = -EBADF; -+ -+ newentry = append_null_to_list((void ***)h); -+ (*h)[newentry] = new; -+ return new; -+} -+ -+/* Get a copy of the mountpoint from @line, which is a line from -+ * /proc/self/mountinfo. -+ */ -+static char *cg_hybrid_get_mountpoint(char *line) -+{ -+ char *p = line, *sret = NULL; -+ size_t len; -+ char *p2; -+ -+ for (int i = 0; i < 4; i++) { -+ p = strchr(p, ' '); -+ if (!p) -+ return NULL; -+ p++; -+ } -+ -+ if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0) -+ return NULL; -+ -+ p2 = strchr(p + 15, ' '); -+ if (!p2) -+ return NULL; -+ *p2 = '\0'; -+ -+ len = strlen(p); -+ sret = must_realloc(NULL, len + 1); -+ memcpy(sret, p, len); -+ sret[len] = '\0'; -+ -+ return sret; -+} -+ -+/* Given a multi-line string, return a null-terminated copy of the current line. */ -+static char *copy_to_eol(char *p) -+{ -+ char *p2, *sret; -+ size_t len; -+ -+ p2 = strchr(p, '\n'); -+ if (!p2) -+ return NULL; -+ -+ len = p2 - p; -+ sret = must_realloc(NULL, len + 1); -+ memcpy(sret, p, len); -+ sret[len] = '\0'; -+ -+ return sret; -+} -+ -+/* cgline: pointer to character after the first ':' in a line in a \n-terminated -+ * /proc/self/cgroup file. Check whether controller c is present. -+ */ -+static bool controller_in_clist(char *cgline, char *c) -+{ -+ __do_free char *tmp = NULL; -+ char *tok, *eol; -+ size_t len; -+ -+ eol = strchr(cgline, ':'); -+ if (!eol) -+ return false; -+ -+ len = eol - cgline; -+ tmp = must_realloc(NULL, len + 1); -+ memcpy(tmp, cgline, len); -+ tmp[len] = '\0'; -+ -+ lxc_iterate_parts(tok, tmp, ",") -+ if (strcmp(tok, c) == 0) -+ return true; -+ -+ return false; -+} -+ -+/* @basecginfo is a copy of /proc/$$/cgroup. Return the current cgroup for -+ * @controller. -+ */ -+static char *cg_hybrid_get_current_cgroup(char *basecginfo, char *controller, -+ int type) -+{ -+ char *p = basecginfo; -+ -+ for (;;) { -+ bool is_cgv2_base_cgroup = false; -+ -+ /* cgroup v2 entry in "/proc//cgroup": "0::/some/path" */ -+ if ((type == CGROUP2_SUPER_MAGIC) && (*p == '0')) -+ is_cgv2_base_cgroup = true; -+ -+ p = strchr(p, ':'); -+ if (!p) -+ return NULL; -+ p++; -+ -+ if (is_cgv2_base_cgroup || (controller && controller_in_clist(p, controller))) { -+ p = strchr(p, ':'); -+ if (!p) -+ return NULL; -+ p++; -+ return copy_to_eol(p); -+ } -+ -+ p = strchr(p, '\n'); -+ if (!p) -+ return NULL; -+ p++; -+ } -+} -+ -+static void must_append_string(char ***list, char *entry) -+{ -+ int newentry; -+ char *copy; -+ -+ newentry = append_null_to_list((void ***)list); -+ copy = must_copy_string(entry); -+ (*list)[newentry] = copy; -+} -+ -+static int get_existing_subsystems(char ***klist, char ***nlist) -+{ -+ __do_free char *line = NULL; -+ __do_fclose FILE *f = NULL; -+ size_t len = 0; -+ -+ f = fopen("/proc/self/cgroup", "re"); -+ if (!f) -+ return -1; -+ -+ while (getline(&line, &len, f) != -1) { -+ char *p, *p2, *tok; -+ p = strchr(line, ':'); -+ if (!p) -+ continue; -+ p++; -+ p2 = strchr(p, ':'); -+ if (!p2) -+ continue; -+ *p2 = '\0'; -+ -+ /* If the kernel has cgroup v2 support, then /proc/self/cgroup -+ * contains an entry of the form: -+ * -+ * 0::/some/path -+ * -+ * In this case we use "cgroup2" as controller name. -+ */ -+ if ((p2 - p) == 0) { -+ must_append_string(klist, "cgroup2"); -+ continue; -+ } -+ -+ lxc_iterate_parts(tok, p, ",") { -+ if (strncmp(tok, "name=", 5) == 0) -+ must_append_string(nlist, tok); -+ else -+ must_append_string(klist, tok); -+ } -+ } -+ -+ return 0; -+} -+ -+static char *trim(char *s) -+{ -+ size_t len; -+ -+ len = strlen(s); -+ while ((len > 1) && (s[len - 1] == '\n')) -+ s[--len] = '\0'; -+ -+ return s; -+} -+ -+static void lxc_cgfsng_print_hierarchies(struct cgroup_ops *ops) -+{ -+ int i; -+ struct hierarchy **it; -+ -+ if (!ops->hierarchies) { -+ TRACE(" No hierarchies found"); -+ return; -+ } -+ -+ TRACE(" Hierarchies:"); -+ for (i = 0, it = ops->hierarchies; it && *it; it++, i++) { -+ int j; -+ char **cit; -+ -+ TRACE(" %d: base_cgroup: %s", i, (*it)->container_base_path ? (*it)->container_base_path : "(null)"); -+ TRACE(" mountpoint: %s", (*it)->mountpoint ? (*it)->mountpoint : "(null)"); -+ TRACE(" controllers:"); -+ for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++) -+ TRACE(" %d: %s", j, *cit); -+ } -+} -+ -+static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist, -+ char **nlist) -+{ -+ int k; -+ char **it; -+ -+ TRACE("basecginfo is:"); -+ TRACE("%s", basecginfo); -+ -+ for (k = 0, it = klist; it && *it; it++, k++) -+ TRACE("kernel subsystem %d: %s", k, *it); -+ -+ for (k = 0, it = nlist; it && *it; it++, k++) -+ TRACE("named subsystem %d: %s", k, *it); -+} -+ -+struct generic_userns_exec_data { -+ struct hierarchy **hierarchies; -+ const char *container_cgroup; -+ struct lxc_conf *conf; -+ uid_t origuid; /* target uid in parent namespace */ -+ char *path; -+}; -+ -+static int isulad_cgroup_tree_remove(struct hierarchy **hierarchies, -+ const char *container_cgroup) -+{ -+ if (!container_cgroup || !hierarchies) -+ return 0; -+ -+ for (int i = 0; hierarchies[i]; i++) { -+ struct hierarchy *h = hierarchies[i]; -+ int ret; -+ -+ if (!h->container_full_path) { -+ h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, container_cgroup, NULL); -+ } -+ -+ ret = lxc_rm_rf(h->container_full_path); -+ if (ret < 0) { -+ SYSERROR("Failed to destroy \"%s\"", h->container_full_path); -+ return -1; -+ } -+ -+ free_disarm(h->container_full_path); -+ } -+ -+ return 0; -+} -+ -+static int isulad_cgroup_tree_remove_wrapper(void *data) -+{ -+ struct generic_userns_exec_data *arg = data; -+ uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid; -+ gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid; -+ int ret; -+ -+ if (!lxc_setgroups(0, NULL) && errno != EPERM) -+ return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)"); -+ -+ ret = setresgid(nsgid, nsgid, nsgid); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)", -+ (int)nsgid, (int)nsgid, (int)nsgid); -+ -+ ret = setresuid(nsuid, nsuid, nsuid); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)", -+ (int)nsuid, (int)nsuid, (int)nsuid); -+ -+ return isulad_cgroup_tree_remove(arg->hierarchies, arg->container_cgroup); -+} -+ -+__cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ int ret; -+ -+ if (!ops) { -+ ERROR("Called with uninitialized cgroup operations"); -+ return false; -+ } -+ -+ if (!ops->hierarchies) { -+ return false; -+ } -+ -+ if (!handler) { -+ ERROR("Called with uninitialized handler"); -+ return false; -+ } -+ -+ if (!handler->conf) { -+ ERROR("Called with uninitialized conf"); -+ return false; -+ } -+ -+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX -+ ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices); -+ if (ret < 0) -+ WARN("Failed to detach bpf program from cgroup"); -+#endif -+ -+ if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) { -+ struct generic_userns_exec_data wrap = { -+ .conf = handler->conf, -+ .container_cgroup = ops->container_cgroup, -+ .hierarchies = ops->hierarchies, -+ .origuid = 0, -+ }; -+ ret = userns_exec_1(handler->conf, isulad_cgroup_tree_remove_wrapper, -+ &wrap, "cgroup_tree_remove_wrapper"); -+ } else { -+ ret = isulad_cgroup_tree_remove(ops->hierarchies, ops->container_cgroup); -+ } -+ if (ret < 0) { -+ SYSWARN("Failed to destroy cgroups"); -+ return false; -+ } -+ -+ return true; -+} -+ -+__cgfsng_ops static void isulad_cgfsng_monitor_destroy(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ return; -+} -+ -+__cgfsng_ops static inline bool isulad_cgfsng_monitor_create(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ return true; -+} -+ -+static bool isulad_copy_parent_file(char *path, char *file) -+{ -+ int ret; -+ int len = 0; -+ char *value = NULL; -+ char *current = NULL; -+ char *fpath = NULL; -+ char *lastslash = NULL; -+ char oldv; -+ -+ fpath = must_make_path(path, file, NULL); -+ current = read_file(fpath); -+ -+ if (current == NULL) { -+ SYSERROR("Failed to read file \"%s\"", fpath); -+ free(fpath); -+ return false; -+ } -+ -+ if (strcmp(current, "\n") != 0) { -+ free(fpath); -+ free(current); -+ return true; -+ } -+ -+ free(fpath); -+ free(current); -+ -+ lastslash = strrchr(path, '/'); -+ if (lastslash == NULL) { -+ ERROR("Failed to detect \"/\" in \"%s\"", path); -+ return false; -+ } -+ oldv = *lastslash; -+ *lastslash = '\0'; -+ fpath = must_make_path(path, file, NULL); -+ *lastslash = oldv; -+ len = lxc_read_from_file(fpath, NULL, 0); -+ if (len <= 0) -+ goto on_error; -+ -+ value = must_realloc(NULL, len + 1); -+ ret = lxc_read_from_file(fpath, value, len); -+ if (ret != len) -+ goto on_error; -+ free(fpath); -+ -+ fpath = must_make_path(path, file, NULL); -+ ret = lxc_write_to_file(fpath, value, len, false, 0666); -+ if (ret < 0) -+ SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath); -+ free(fpath); -+ free(value); -+ return ret >= 0; -+ -+on_error: -+ SYSERROR("Failed to read file \"%s\"", fpath); -+ free(fpath); -+ free(value); -+ return false; -+} -+ -+static bool build_sub_cpuset_cgroup_dir(char *cgpath) -+{ -+ int ret; -+ -+ ret = mkdir_p(cgpath, 0755); -+ if (ret < 0) { -+ if (errno != EEXIST) { -+ SYSERROR("Failed to create directory \"%s\"", cgpath); -+ return false; -+ } -+ } -+ -+ /* copy parent's settings */ -+ if (!isulad_copy_parent_file(cgpath, "cpuset.cpus")) { -+ SYSERROR("Failed to copy \"cpuset.cpus\" settings"); -+ return false; -+ } -+ -+ /* copy parent's settings */ -+ if (!isulad_copy_parent_file(cgpath, "cpuset.mems")) { -+ SYSERROR("Failed to copy \"cpuset.mems\" settings"); -+ return false; -+ } -+ -+ return true; -+} -+ -+static bool isulad_cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname) -+{ -+ char *cgpath, *slash; -+ bool sub_mk_success = false; -+ -+ if (!string_in_list(h->controllers, "cpuset")) -+ return true; -+ -+ cgname += strspn(cgname, "/"); -+ -+ slash = strchr(cgname, '/'); -+ -+ if (slash != NULL) { -+ while (slash) { -+ *slash = '\0'; -+ cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); -+ sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath); -+ free(cgpath); -+ *slash = '/'; -+ if (!sub_mk_success) { -+ return false; -+ } -+ slash = strchr(slash + 1, '/'); -+ } -+ } -+ -+ cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); -+ sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath); -+ free(cgpath); -+ if (!sub_mk_success) { -+ return false; -+ } -+ -+ return true; -+} -+ -+static int isulad_mkdir_eexist_on_last(const char *dir, mode_t mode) -+{ -+ const char *tmp = dir; -+ const char *orig = dir; -+ -+ do { -+ int ret; -+ size_t cur_len; -+ char *makeme; -+ -+ dir = tmp + strspn(tmp, "/"); -+ tmp = dir + strcspn(dir, "/"); -+ -+ errno = ENOMEM; -+ cur_len = dir - orig; -+ makeme = strndup(orig, cur_len); -+ if (!makeme) -+ return -1; -+ -+ ret = mkdir(makeme, mode); -+ if (ret < 0) { -+ if (errno != EEXIST) { -+ SYSERROR("Failed to create directory \"%s\"", makeme); -+ free(makeme); -+ return -1; -+ } -+ } -+ free(makeme); -+ -+ } while (tmp != dir); -+ -+ return 0; -+} -+ -+static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, int errfd) -+{ -+ int ret; -+ __do_free char *path = NULL; -+ -+ path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL); -+ -+ if (file_exists(path)) { // it must not already exist -+ ERROR("Cgroup path \"%s\" already exist.", path); -+ lxc_write_error_message(errfd, "%s:%d: Cgroup path \"%s\" already exist.", -+ __FILE__, __LINE__, path); -+ return false; -+ } -+ -+ if (!isulad_cg_legacy_handle_cpuset_hierarchy(h, cgname)) { -+ ERROR("Failed to handle legacy cpuset controller"); -+ return false; -+ } -+ -+ ret = isulad_mkdir_eexist_on_last(path, 0755); -+ if (ret < 0) { -+ ERROR("Failed to create cgroup \"%s\"", path); -+ return false; -+ } -+ -+ h->cgfd_con = lxc_open_dirfd(path); -+ if (h->cgfd_con < 0) -+ return log_error_errno(false, errno, "Failed to open %s", path); -+ -+ if (h->container_full_path == NULL) { -+ h->container_full_path = move_ptr(path); -+ } -+ -+ return true; -+} -+ -+/* isulad: create hierarchies path, if fail, return the error */ -+__cgfsng_ops static inline bool isulad_cgfsng_payload_create(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ int i; -+ char *container_cgroup = ops->container_cgroup; -+ -+ if (!container_cgroup) { -+ ERROR("cgfsng_create container_cgroup is invalid"); -+ return false; -+ } -+ -+ for (i = 0; ops->hierarchies[i]; i++) { -+ if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, ops->errfd)) { -+ SYSERROR("Failed to create %s", ops->hierarchies[i]->container_full_path); -+ return false; -+ } -+ } -+ -+ return true; -+} -+ -+__cgfsng_ops static bool isulad_cgfsng_monitor_enter(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ return true; -+} -+ -+__cgfsng_ops static bool isulad_cgfsng_payload_enter(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ int len; -+ char pidstr[INTTYPE_TO_STRLEN(pid_t)]; -+ -+ if (!ops) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!ops->hierarchies) -+ return true; -+ -+ if (!ops->container_cgroup) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!handler || !handler->conf) -+ return ret_set_errno(false, EINVAL); -+ -+ len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid); -+ -+ for (int i = 0; ops->hierarchies[i]; i++) { -+ int ret; -+ char *fullpath; -+ int retry_count = 0; -+ int max_retry = 10; -+ -+ fullpath = must_make_path(ops->hierarchies[i]->container_full_path, -+ "cgroup.procs", NULL); -+retry: -+ ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666); -+ if (ret != 0) { -+ if (retry_count < max_retry) { -+ SYSERROR("Failed to enter cgroup \"%s\" with retry count:%d", fullpath, retry_count); -+ (void)isulad_cg_legacy_handle_cpuset_hierarchy(ops->hierarchies[i], ops->container_cgroup); -+ (void)isulad_mkdir_eexist_on_last(ops->hierarchies[i]->container_full_path, 0755); -+ usleep(100 * 1000); /* 100 millisecond */ -+ retry_count++; -+ goto retry; -+ } -+ SYSERROR("Failed to enter cgroup \"%s\"", fullpath); -+ free(fullpath); -+ return false; -+ } -+ free(fullpath); -+ } -+ -+ return true; -+} -+ -+static int fchowmodat(int dirfd, const char *path, uid_t chown_uid, -+ gid_t chown_gid, mode_t chmod_mode) -+{ -+ int ret; -+ -+ ret = fchownat(dirfd, path, chown_uid, chown_gid, -+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); -+ if (ret < 0) -+ return log_warn_errno(-1, -+ errno, "Failed to fchownat(%d, %s, %d, %d, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW )", -+ dirfd, path, (int)chown_uid, -+ (int)chown_gid); -+ -+ ret = fchmodat(dirfd, (*path != '\0') ? path : ".", chmod_mode, 0); -+ if (ret < 0) -+ return log_warn_errno(-1, errno, "Failed to fchmodat(%d, %s, %d, AT_SYMLINK_NOFOLLOW)", -+ dirfd, path, (int)chmod_mode); -+ -+ return 0; -+} -+ -+/* chgrp the container cgroups to container group. We leave -+ * the container owner as cgroup owner. So we must make the -+ * directories 775 so that the container can create sub-cgroups. -+ * -+ * Also chown the tasks and cgroup.procs files. Those may not -+ * exist depending on kernel version. -+ */ -+static int chown_cgroup_wrapper(void *data) -+{ -+ int ret; -+ uid_t destuid; -+ struct generic_userns_exec_data *arg = data; -+ uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid; -+ gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid; -+ -+ if (!lxc_setgroups(0, NULL) && errno != EPERM) -+ return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)"); -+ -+ ret = setresgid(nsgid, nsgid, nsgid); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)", -+ (int)nsgid, (int)nsgid, (int)nsgid); -+ -+ ret = setresuid(nsuid, nsuid, nsuid); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)", -+ (int)nsuid, (int)nsuid, (int)nsuid); -+ -+ destuid = get_ns_uid(arg->origuid); -+ if (destuid == LXC_INVALID_UID) -+ destuid = 0; -+ -+ for (int i = 0; arg->hierarchies[i]; i++) { -+ int dirfd = arg->hierarchies[i]->cgfd_con; -+ -+ (void)fchowmodat(dirfd, "", destuid, nsgid, 0775); -+ -+ /* -+ * Failures to chown() these are inconvenient but not -+ * detrimental We leave these owned by the container launcher, -+ * so that container root can write to the files to attach. We -+ * chmod() them 664 so that container systemd can write to the -+ * files (which systemd in wily insists on doing). -+ */ -+ -+ if (arg->hierarchies[i]->version == CGROUP_SUPER_MAGIC) -+ (void)fchowmodat(dirfd, "tasks", destuid, nsgid, 0664); -+ -+ (void)fchowmodat(dirfd, "cgroup.procs", destuid, nsgid, 0664); -+ -+ if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC) -+ continue; -+ -+ for (char **p = arg->hierarchies[i]->cgroup2_chown; p && *p; p++) -+ (void)fchowmodat(dirfd, *p, destuid, nsgid, 0664); -+ } -+ -+ return 0; -+} -+ -+__cgfsng_ops static bool isulad_cgfsng_chown(struct cgroup_ops *ops, -+ struct lxc_conf *conf) -+{ -+ struct generic_userns_exec_data wrap; -+ -+ if (!ops) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!ops->hierarchies) -+ return true; -+ -+ if (!ops->container_cgroup) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!conf) -+ return ret_set_errno(false, EINVAL); -+ -+ if (lxc_list_empty(&conf->id_map)) -+ return true; -+ -+ wrap.origuid = geteuid(); -+ wrap.path = NULL; -+ wrap.hierarchies = ops->hierarchies; -+ wrap.conf = conf; -+ -+ if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap, "chown_cgroup_wrapper") < 0) -+ return log_error_errno(false, errno, "Error requesting cgroup chown in new user namespace"); -+ -+ return true; -+} -+ -+__cgfsng_ops void isulad_cgfsng_payload_finalize(struct cgroup_ops *ops) -+{ -+ if (!ops) -+ return; -+ -+ if (!ops->hierarchies) -+ return; -+ -+ for (int i = 0; ops->hierarchies[i]; i++) { -+ struct hierarchy *h = ops->hierarchies[i]; -+ /* -+ * we don't keep the fds for non-unified hierarchies around -+ * mainly because we don't make use of them anymore after the -+ * core cgroup setup is done but also because there are quite a -+ * lot of them. -+ */ -+ if (!is_unified_hierarchy(h)) -+ close_prot_errno_disarm(h->cgfd_con); -+ } -+} -+ -+/* cgroup-full:* is done, no need to create subdirs */ -+static inline bool cg_mount_needs_subdirs(int type) -+{ -+ return !(type >= LXC_AUTO_CGROUP_FULL_RO); -+} -+ -+/* After $rootfs/sys/fs/container/controller/the/cg/path has been created, -+ * remount controller ro if needed and bindmount the cgroupfs onto -+ * control/the/cg/path. -+ */ -+static int cg_legacy_mount_controllers(int type, struct hierarchy *h, -+ char *controllerpath, char *cgpath, -+ const char *container_cgroup) -+{ -+ __do_free char *sourcepath = NULL; -+ int ret, remount_flags; -+ int flags = MS_BIND; -+ -+ if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_MIXED) { -+ ret = mount(controllerpath, controllerpath, "cgroup", MS_BIND, NULL); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to bind mount \"%s\" onto \"%s\"", -+ controllerpath, controllerpath); -+ -+ remount_flags = add_required_remount_flags(controllerpath, -+ controllerpath, -+ flags | MS_REMOUNT); -+ ret = mount(controllerpath, controllerpath, "cgroup", -+ remount_flags | MS_REMOUNT | MS_BIND | MS_RDONLY, -+ NULL); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to remount \"%s\" ro", controllerpath); -+ -+ INFO("Remounted %s read-only", controllerpath); -+ } -+ -+ sourcepath = must_make_path(h->mountpoint, h->container_base_path, -+ container_cgroup, NULL); -+ if (type == LXC_AUTO_CGROUP_RO) -+ flags |= MS_RDONLY; -+ -+ ret = mount(sourcepath, cgpath, "cgroup", flags, NULL); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to mount \"%s\" onto \"%s\"", -+ h->controllers[0], cgpath); -+ INFO("Mounted \"%s\" onto \"%s\"", h->controllers[0], cgpath); -+ -+ if (flags & MS_RDONLY) { -+ remount_flags = add_required_remount_flags(sourcepath, cgpath, -+ flags | MS_REMOUNT); -+ ret = mount(sourcepath, cgpath, "cgroup", remount_flags, NULL); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to remount \"%s\" ro", cgpath); -+ INFO("Remounted %s read-only", cgpath); -+ } -+ -+ INFO("Completed second stage cgroup automounts for \"%s\"", cgpath); -+ return 0; -+} -+ -+/* __cg_mount_direct -+ * -+ * Mount cgroup hierarchies directly without using bind-mounts. The main -+ * uses-cases are mounting cgroup hierarchies in cgroup namespaces and mounting -+ * cgroups for the LXC_AUTO_CGROUP_FULL option. -+ */ -+static int __cg_mount_direct(int type, struct hierarchy *h, -+ const char *controllerpath) -+{ -+ __do_free char *controllers = NULL; -+ char *fstype = "cgroup2"; -+ unsigned long flags = 0; -+ int ret; -+ -+ flags |= MS_NOSUID; -+ flags |= MS_NOEXEC; -+ flags |= MS_NODEV; -+ flags |= MS_RELATIME; -+ -+ if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO) -+ flags |= MS_RDONLY; -+ -+ if (h->version != CGROUP2_SUPER_MAGIC) { -+ controllers = lxc_string_join(",", (const char **)h->controllers, false); -+ if (!controllers) -+ return -ENOMEM; -+ fstype = "cgroup"; -+ } -+ -+ ret = mount("cgroup", controllerpath, fstype, flags, controllers); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s", -+ controllerpath, fstype); -+ -+ DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype); -+ return 0; -+} -+ -+static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h, -+ const char *controllerpath) -+{ -+ return __cg_mount_direct(type, h, controllerpath); -+} -+ -+static inline int cg_mount_cgroup_full(int type, struct hierarchy *h, -+ const char *controllerpath) -+{ -+ if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED) -+ return 0; -+ -+ return __cg_mount_direct(type, h, controllerpath); -+} -+ -+__cgfsng_ops static bool isulad_cgfsng_mount(struct cgroup_ops *ops, -+ struct lxc_handler *handler, -+ const char *root, int type) -+{ -+ int i, ret; -+ char *tmpfspath = NULL; -+ char *systemdpath = NULL; -+ char *unifiedpath = NULL; -+ bool has_cgns = false, retval = false, wants_force_mount = false; -+ char **merged = NULL; -+ -+ if ((type & LXC_AUTO_CGROUP_MASK) == 0) -+ return true; -+ -+ if (type & LXC_AUTO_CGROUP_FORCE) { -+ type &= ~LXC_AUTO_CGROUP_FORCE; -+ wants_force_mount = true; -+ } -+ -+ if (!wants_force_mount) { -+ if (!lxc_list_empty(&handler->conf->keepcaps)) -+ wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps); -+ else -+ wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps); -+ } -+ -+ has_cgns = cgns_supported(); -+ if (has_cgns && !wants_force_mount) -+ return true; -+ -+ if (type == LXC_AUTO_CGROUP_NOSPEC) -+ type = LXC_AUTO_CGROUP_MIXED; -+ else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC) -+ type = LXC_AUTO_CGROUP_FULL_MIXED; -+ -+ /* Mount tmpfs */ -+ tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL); -+ if (mkdir_p(tmpfspath, 0755) < 0) { -+ ERROR("Failed to create directory: %s", tmpfspath); -+ goto on_error; -+ } -+ ret = safe_mount(NULL, tmpfspath, "tmpfs", -+ MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, -+ "size=10240k,mode=755", root, handler->conf->lsm_se_mount_context); -+ if (ret < 0) -+ goto on_error; -+ -+ for (i = 0; ops->hierarchies[i]; i++) { -+ char *controllerpath = NULL; -+ char *path2 = NULL; -+ struct hierarchy *h = ops->hierarchies[i]; -+ char *controller = strrchr(h->mountpoint, '/'); -+ -+ if (!controller) -+ continue; -+ controller++; -+ -+ // isulad: symlink subcgroup -+ if (strchr(controller, ',') != NULL) { -+ int pret; -+ pret = lxc_append_string(&merged, controller); -+ if (pret < 0) -+ goto on_error; -+ } -+ -+ controllerpath = must_make_path(tmpfspath, controller, NULL); -+ if (dir_exists(controllerpath)) { -+ free(controllerpath); -+ continue; -+ } -+ -+ ret = mkdir(controllerpath, 0755); -+ if (ret < 0) { -+ SYSERROR("Error creating cgroup path: %s", controllerpath); -+ free(controllerpath); -+ goto on_error; -+ } -+ -+ if (has_cgns && wants_force_mount) { -+ /* If cgroup namespaces are supported but the container -+ * will not have CAP_SYS_ADMIN after it has started we -+ * need to mount the cgroups manually. -+ */ -+ ret = cg_mount_in_cgroup_namespace(type, h, controllerpath); -+ free(controllerpath); -+ if (ret < 0) -+ goto on_error; -+ -+ continue; -+ } -+ -+ ret = cg_mount_cgroup_full(type, h, controllerpath); -+ if (ret < 0) { -+ free(controllerpath); -+ goto on_error; -+ } -+ -+ if (!cg_mount_needs_subdirs(type)) { -+ free(controllerpath); -+ continue; -+ } -+ -+ // isulad: ignore ops->container_cgroup so we will not see directory lxc after /sys/fs/cgroup/xxx in container, -+ // isulad: ignore h->container_base_path so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container -+ path2 = must_make_path(controllerpath, NULL); -+ ret = mkdir_p(path2, 0755); -+ if (ret < 0) { -+ free(controllerpath); -+ free(path2); -+ goto on_error; -+ } -+ -+ ret = cg_legacy_mount_controllers(type, h, controllerpath, -+ path2, ops->container_cgroup); -+ free(controllerpath); -+ free(path2); -+ if (ret < 0) -+ goto on_error; -+ } -+ -+ // isulad: symlink subcgroup -+ if (merged) { -+ char **mc = NULL; -+ for (mc = merged; *mc; mc++) { -+ char *token = NULL; -+ char *copy = must_copy_string(*mc); -+ lxc_iterate_parts(token, copy, ",") { -+ int mret; -+ char *link; -+ link = must_make_path(tmpfspath, token, NULL); -+ mret = symlink(*mc, link); -+ if (mret < 0 && errno != EEXIST) { -+ SYSERROR("Failed to create link %s for target %s", link, *mc); -+ free(copy); -+ free(link); -+ goto on_error; -+ } -+ free(link); -+ } -+ free(copy); -+ } -+ } -+ -+ -+ // isulad: remount /sys/fs/cgroup to readonly -+ if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_RO) { -+ ret = mount(tmpfspath, tmpfspath, "bind", -+ MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_RDONLY|MS_BIND|MS_REMOUNT, NULL); -+ if (ret < 0) { -+ SYSERROR("Failed to remount /sys/fs/cgroup."); -+ goto on_error; -+ } -+ } -+ -+ // isulad: remount /sys/fs/cgroup/systemd to readwrite for system container -+ if (handler->conf->systemd != NULL && strcmp(handler->conf->systemd, "true") == 0) -+ { -+ unifiedpath = must_make_path(root, "/sys/fs/cgroup/unified", NULL); -+ if (dir_exists(unifiedpath)) -+ { -+ ret = umount2(unifiedpath, MNT_DETACH); -+ if (ret < 0) -+ { -+ SYSERROR("Failed to umount /sys/fs/cgroup/unified."); -+ goto on_error; -+ } -+ } -+ -+ systemdpath = must_make_path(root, "/sys/fs/cgroup/systemd", NULL); -+ ret = mount(systemdpath, systemdpath, "bind", -+ MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME | MS_BIND | MS_REMOUNT, NULL); -+ if (ret < 0) -+ { -+ SYSERROR("Failed to remount /sys/fs/cgroup/systemd."); -+ goto on_error; -+ } -+ } -+ -+ retval = true; -+ -+on_error: -+ free(tmpfspath); -+ if (systemdpath != NULL) -+ { -+ free(systemdpath); -+ } -+ if (unifiedpath != NULL) -+ { -+ free(unifiedpath); -+ } -+ lxc_free_array((void **)merged, free); -+ return retval; -+} -+ -+/* Only root needs to escape to the cgroup of its init. */ -+__cgfsng_ops static bool isulad_cgfsng_escape(const struct cgroup_ops *ops, -+ struct lxc_conf *conf) -+{ -+ if (!ops) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!ops->hierarchies) -+ return true; -+ -+ if (!conf) -+ return ret_set_errno(false, EINVAL); -+ -+ if (conf->cgroup_meta.relative || geteuid()) -+ return true; -+ -+ for (int i = 0; ops->hierarchies[i]; i++) { -+ __do_free char *fullpath = NULL; -+ int ret; -+ -+ fullpath = -+ must_make_path(ops->hierarchies[i]->mountpoint, -+ ops->hierarchies[i]->container_base_path, -+ "cgroup.procs", NULL); -+ ret = lxc_write_to_file(fullpath, "0", 2, false, 0666); -+ if (ret != 0) -+ return log_error_errno(false, errno, "Failed to escape to cgroup \"%s\"", fullpath); -+ } -+ -+ return true; -+} -+ -+__cgfsng_ops static int isulad_cgfsng_num_hierarchies(struct cgroup_ops *ops) -+{ -+ int i = 0; -+ -+ if (!ops) -+ return ret_set_errno(-1, ENOENT); -+ -+ if (!ops->hierarchies) -+ return 0; -+ -+ for (; ops->hierarchies[i]; i++) -+ ; -+ -+ return i; -+} -+ -+__cgfsng_ops static bool isulad_cgfsng_get_hierarchies(struct cgroup_ops *ops, int n, -+ char ***out) -+{ -+ int i; -+ -+ if (!ops) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!ops->hierarchies) -+ return ret_set_errno(false, ENOENT); -+ -+ /* sanity check n */ -+ for (i = 0; i < n; i++) -+ if (!ops->hierarchies[i]) -+ return ret_set_errno(false, ENOENT); -+ -+ *out = ops->hierarchies[i]->controllers; -+ -+ return true; -+} -+ -+static bool cg_legacy_freeze(struct cgroup_ops *ops) -+{ -+ struct hierarchy *h; -+ -+ h = get_hierarchy(ops, "freezer"); -+ if (!h) -+ return ret_set_errno(-1, ENOENT); -+ -+ return lxc_write_openat(h->container_full_path, "freezer.state", -+ "FROZEN", STRLITERALLEN("FROZEN")); -+} -+ -+static int freezer_cgroup_events_cb(int fd, uint32_t events, void *cbdata, -+ struct lxc_epoll_descr *descr) -+{ -+ __do_close int duped_fd = -EBADF; -+ __do_free char *line = NULL; -+ __do_fclose FILE *f = NULL; -+ int state = PTR_TO_INT(cbdata); -+ size_t len; -+ const char *state_string; -+ -+ duped_fd = dup(fd); -+ if (duped_fd < 0) -+ return LXC_MAINLOOP_ERROR; -+ -+ if (lseek(duped_fd, 0, SEEK_SET) < (off_t)-1) -+ return LXC_MAINLOOP_ERROR; -+ -+ f = fdopen(duped_fd, "re"); -+ if (!f) -+ return LXC_MAINLOOP_ERROR; -+ move_fd(duped_fd); -+ -+ if (state == 1) -+ state_string = "frozen 1"; -+ else -+ state_string = "frozen 0"; -+ -+ while (getline(&line, &len, f) != -1) -+ if (strncmp(line, state_string, STRLITERALLEN("frozen") + 2) == 0) -+ return LXC_MAINLOOP_CLOSE; -+ -+ return LXC_MAINLOOP_CONTINUE; -+} -+ -+static int cg_unified_freeze(struct cgroup_ops *ops, int timeout) -+{ -+ __do_close int fd = -EBADF; -+ call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL; -+ int ret; -+ struct lxc_epoll_descr descr; -+ struct hierarchy *h; -+ -+ h = ops->unified; -+ if (!h) -+ return ret_set_errno(-1, ENOENT); -+ -+ if (!h->container_full_path) -+ return ret_set_errno(-1, EEXIST); -+ -+ if (timeout != 0) { -+ __do_free char *events_file = NULL; -+ -+ events_file = must_make_path(h->container_full_path, "cgroup.events", NULL); -+ fd = open(events_file, O_RDONLY | O_CLOEXEC); -+ if (fd < 0) -+ return log_error_errno(-1, errno, "Failed to open cgroup.events file"); -+ -+ ret = lxc_mainloop_open(&descr); -+ if (ret) -+ return log_error_errno(-1, errno, "Failed to create epoll instance to wait for container freeze"); -+ -+ /* automatically cleaned up now */ -+ descr_ptr = &descr; -+ -+ ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, INT_TO_PTR((int){1})); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop"); -+ } -+ -+ ret = lxc_write_openat(h->container_full_path, "cgroup.freeze", "1", 1); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to open cgroup.freeze file"); -+ -+ if (timeout != 0 && lxc_mainloop(&descr, timeout)) -+ return log_error_errno(-1, errno, "Failed to wait for container to be frozen"); -+ -+ return 0; -+} -+ -+__cgfsng_ops static int isulad_cgfsng_freeze(struct cgroup_ops *ops, int timeout) -+{ -+ if (!ops->hierarchies) -+ return ret_set_errno(-1, ENOENT); -+ -+ if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED) -+ return cg_legacy_freeze(ops); -+ -+ return cg_unified_freeze(ops, timeout); -+} -+ -+static int cg_legacy_unfreeze(struct cgroup_ops *ops) -+{ -+ struct hierarchy *h; -+ -+ h = get_hierarchy(ops, "freezer"); -+ if (!h) -+ return ret_set_errno(-1, ENOENT); -+ -+ return lxc_write_openat(h->container_full_path, "freezer.state", -+ "THAWED", STRLITERALLEN("THAWED")); -+} -+ -+static int cg_unified_unfreeze(struct cgroup_ops *ops, int timeout) -+{ -+ __do_close int fd = -EBADF; -+ call_cleaner(lxc_mainloop_close)struct lxc_epoll_descr *descr_ptr = NULL; -+ int ret; -+ struct lxc_epoll_descr descr; -+ struct hierarchy *h; -+ -+ h = ops->unified; -+ if (!h) -+ return ret_set_errno(-1, ENOENT); -+ -+ if (!h->container_full_path) -+ return ret_set_errno(-1, EEXIST); -+ -+ if (timeout != 0) { -+ __do_free char *events_file = NULL; -+ -+ events_file = must_make_path(h->container_full_path, "cgroup.events", NULL); -+ fd = open(events_file, O_RDONLY | O_CLOEXEC); -+ if (fd < 0) -+ return log_error_errno(-1, errno, "Failed to open cgroup.events file"); -+ -+ ret = lxc_mainloop_open(&descr); -+ if (ret) -+ return log_error_errno(-1, errno, "Failed to create epoll instance to wait for container unfreeze"); -+ -+ /* automatically cleaned up now */ -+ descr_ptr = &descr; -+ -+ ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, INT_TO_PTR((int){0})); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop"); -+ } -+ -+ ret = lxc_write_openat(h->container_full_path, "cgroup.freeze", "0", 1); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to open cgroup.freeze file"); -+ -+ if (timeout != 0 && lxc_mainloop(&descr, timeout)) -+ return log_error_errno(-1, errno, "Failed to wait for container to be unfrozen"); -+ -+ return 0; -+} -+ -+__cgfsng_ops static int isulad_cgfsng_unfreeze(struct cgroup_ops *ops, int timeout) -+{ -+ if (!ops->hierarchies) -+ return ret_set_errno(-1, ENOENT); -+ -+ if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED) -+ return cg_legacy_unfreeze(ops); -+ -+ return cg_unified_unfreeze(ops, timeout); -+} -+ -+__cgfsng_ops static const char *isulad_cgfsng_get_cgroup(struct cgroup_ops *ops, -+ const char *controller) -+{ -+ struct hierarchy *h; -+ -+ h = get_hierarchy(ops, controller); -+ if (!h) -+ return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"", -+ controller ? controller : "(null)"); -+ -+ if (!h->container_full_path) -+ h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL); -+ -+ return h->container_full_path -+ ? h->container_full_path + strlen(h->mountpoint) -+ : NULL; -+} -+ -+__cgfsng_ops static const char *isulad_cgfsng_get_cgroup_full_path(struct cgroup_ops *ops, -+ const char *controller) -+{ -+ struct hierarchy *h; -+ -+ h = get_hierarchy(ops, controller); -+ if (!h) -+ return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"", -+ controller ? controller : "(null)"); -+ -+ if (!h->container_full_path) -+ h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL); -+ -+ return h->container_full_path; -+} -+ -+/* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path, -+ * which must be freed by the caller. -+ */ -+static inline char *build_full_cgpath_from_monitorpath(struct hierarchy *h, -+ const char *inpath, -+ const char *filename) -+{ -+ return must_make_path(h->mountpoint, inpath, filename, NULL); -+} -+ -+static int cgroup_attach_leaf(const struct lxc_conf *conf, int unified_fd, pid_t pid) -+{ -+ int idx = 1; -+ int ret; -+ char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1]; -+ size_t pidstr_len; -+ -+ /* Create leaf cgroup. */ -+ ret = mkdirat(unified_fd, ".lxc", 0755); -+ if (ret < 0 && errno != EEXIST) -+ return log_error_errno(-1, errno, "Failed to create leaf cgroup \".lxc\""); -+ -+ pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid); -+ ret = lxc_writeat(unified_fd, ".lxc/cgroup.procs", pidstr, pidstr_len); -+ if (ret < 0) -+ ret = lxc_writeat(unified_fd, "cgroup.procs", pidstr, pidstr_len); -+ if (ret == 0) -+ return 0; -+ -+ /* this is a non-leaf node */ -+ if (errno != EBUSY) -+ return log_error_errno(-1, errno, "Failed to attach to unified cgroup"); -+ -+ do { -+ bool rm = false; -+ char attach_cgroup[STRLITERALLEN(".lxc-1000/cgroup.procs") + 1]; -+ char *slash; -+ -+ ret = snprintf(attach_cgroup, sizeof(attach_cgroup), ".lxc-%d/cgroup.procs", idx); -+ if (ret < 0 || (size_t)ret >= sizeof(attach_cgroup)) -+ return ret_errno(EIO); -+ -+ slash = &attach_cgroup[ret] - STRLITERALLEN("/cgroup.procs"); -+ *slash = '\0'; -+ -+ ret = mkdirat(unified_fd, attach_cgroup, 0755); -+ if (ret < 0 && errno != EEXIST) -+ return log_error_errno(-1, errno, "Failed to create cgroup %s", attach_cgroup); -+ if (ret == 0) -+ rm = true; -+ -+ *slash = '/'; -+ -+ ret = lxc_writeat(unified_fd, attach_cgroup, pidstr, pidstr_len); -+ if (ret == 0) -+ return 0; -+ -+ if (rm && unlinkat(unified_fd, attach_cgroup, AT_REMOVEDIR)) -+ SYSERROR("Failed to remove cgroup \"%d(%s)\"", unified_fd, attach_cgroup); -+ -+ /* this is a non-leaf node */ -+ if (errno != EBUSY) -+ return log_error_errno(-1, errno, "Failed to attach to unified cgroup"); -+ -+ idx++; -+ } while (idx < 1000); -+ -+ return log_error_errno(-1, errno, "Failed to attach to unified cgroup"); -+} -+ -+static int cgroup_attach_create_leaf(const struct lxc_conf *conf, -+ int unified_fd, int *sk_fd) -+{ -+ __do_close int sk = *sk_fd, target_fd0 = -EBADF, target_fd1 = -EBADF; -+ int target_fds[2]; -+ ssize_t ret; -+ -+ /* Create leaf cgroup. */ -+ ret = mkdirat(unified_fd, ".lxc", 0755); -+ if (ret < 0 && errno != EEXIST) -+ return log_error_errno(-1, errno, "Failed to create leaf cgroup \".lxc\""); -+ -+ target_fd0 = openat(unified_fd, ".lxc/cgroup.procs", O_WRONLY | O_CLOEXEC | O_NOFOLLOW); -+ if (target_fd0 < 0) -+ return log_error_errno(-errno, errno, "Failed to open \".lxc/cgroup.procs\""); -+ target_fds[0] = target_fd0; -+ -+ target_fd1 = openat(unified_fd, "cgroup.procs", O_WRONLY | O_CLOEXEC | O_NOFOLLOW); -+ if (target_fd1 < 0) -+ return log_error_errno(-errno, errno, "Failed to open \".lxc/cgroup.procs\""); -+ target_fds[1] = target_fd1; -+ -+ ret = lxc_abstract_unix_send_fds(sk, target_fds, 2, NULL, 0); -+ if (ret <= 0) -+ return log_error_errno(-errno, errno, "Failed to send \".lxc/cgroup.procs\" fds %d and %d", -+ target_fd0, target_fd1); -+ -+ return log_debug(0, "Sent target cgroup fds %d and %d", target_fd0, target_fd1); -+} -+ -+static int cgroup_attach_move_into_leaf(const struct lxc_conf *conf, -+ int *sk_fd, pid_t pid) -+{ -+ __do_close int sk = *sk_fd, target_fd0 = -EBADF, target_fd1 = -EBADF; -+ int target_fds[2]; -+ char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1]; -+ size_t pidstr_len; -+ ssize_t ret; -+ -+ ret = lxc_abstract_unix_recv_fds(sk, target_fds, 2, NULL, 0); -+ if (ret <= 0) -+ return log_error_errno(-1, errno, "Failed to receive target cgroup fd"); -+ target_fd0 = target_fds[0]; -+ target_fd1 = target_fds[1]; -+ -+ pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid); -+ -+ ret = lxc_write_nointr(target_fd0, pidstr, pidstr_len); -+ if (ret > 0 && ret == pidstr_len) -+ return log_debug(0, "Moved process into target cgroup via fd %d", target_fd0); -+ -+ ret = lxc_write_nointr(target_fd1, pidstr, pidstr_len); -+ if (ret > 0 && ret == pidstr_len) -+ return log_debug(0, "Moved process into target cgroup via fd %d", target_fd1); -+ -+ return log_debug_errno(-1, errno, "Failed to move process into target cgroup via fd %d and %d", -+ target_fd0, target_fd1); -+} -+ -+struct userns_exec_unified_attach_data { -+ const struct lxc_conf *conf; -+ int unified_fd; -+ int sk_pair[2]; -+ pid_t pid; -+}; -+ -+static int cgroup_unified_attach_child_wrapper(void *data) -+{ -+ struct userns_exec_unified_attach_data *args = data; -+ -+ if (!args->conf || args->unified_fd < 0 || args->pid <= 0 || -+ args->sk_pair[0] < 0 || args->sk_pair[1] < 0) -+ return ret_errno(EINVAL); -+ -+ close_prot_errno_disarm(args->sk_pair[0]); -+ return cgroup_attach_create_leaf(args->conf, args->unified_fd, -+ &args->sk_pair[1]); -+} -+ -+static int cgroup_unified_attach_parent_wrapper(void *data) -+{ -+ struct userns_exec_unified_attach_data *args = data; -+ -+ if (!args->conf || args->unified_fd < 0 || args->pid <= 0 || -+ args->sk_pair[0] < 0 || args->sk_pair[1] < 0) -+ return ret_errno(EINVAL); -+ -+ close_prot_errno_disarm(args->sk_pair[1]); -+ return cgroup_attach_move_into_leaf(args->conf, &args->sk_pair[0], -+ args->pid); -+} -+ -+int cgroup_attach(const struct lxc_conf *conf, const char *name, -+ const char *lxcpath, pid_t pid) -+{ -+ __do_close int unified_fd = -EBADF; -+ int ret; -+ -+ if (!conf || !name || !lxcpath || pid <= 0) -+ return ret_errno(EINVAL); -+ -+ unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath); -+ if (unified_fd < 0) -+ return ret_errno(EBADF); -+ -+ if (!lxc_list_empty(&conf->id_map)) { -+ struct userns_exec_unified_attach_data args = { -+ .conf = conf, -+ .unified_fd = unified_fd, -+ .pid = pid, -+ }; -+ -+ ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair); -+ if (ret < 0) -+ return -errno; -+ -+ ret = userns_exec_minimal(conf, -+ cgroup_unified_attach_parent_wrapper, -+ &args, -+ cgroup_unified_attach_child_wrapper, -+ &args); -+ } else { -+ ret = cgroup_attach_leaf(conf, unified_fd, pid); -+ } -+ -+ return ret; -+} -+ -+/* Technically, we're always at a delegation boundary here (This is especially -+ * true when cgroup namespaces are available.). The reasoning is that in order -+ * for us to have been able to start a container in the first place the root -+ * cgroup must have been a leaf node. Now, either the container's init system -+ * has populated the cgroup and kept it as a leaf node or it has created -+ * subtrees. In the former case we will simply attach to the leaf node we -+ * created when we started the container in the latter case we create our own -+ * cgroup for the attaching process. -+ */ -+static int __cg_unified_attach(const struct hierarchy *h, -+ const struct lxc_conf *conf, const char *name, -+ const char *lxcpath, pid_t pid, -+ const char *controller) -+{ -+ __do_close int unified_fd = -EBADF; -+ __do_free char *path = NULL, *cgroup = NULL; -+ int ret; -+ -+ if (!conf || !name || !lxcpath || pid <= 0) -+ return ret_errno(EINVAL); -+ -+ ret = cgroup_attach(conf, name, lxcpath, pid); -+ if (ret == 0) -+ return log_trace(0, "Attached to unified cgroup via command handler"); -+ if (ret != -EBADF) -+ return log_error_errno(ret, errno, "Failed to attach to unified cgroup"); -+ -+ /* Fall back to retrieving the path for the unified cgroup. */ -+ cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller); -+ /* not running */ -+ if (!cgroup) -+ return 0; -+ -+ path = must_make_path(h->mountpoint, cgroup, NULL); -+ -+ unified_fd = open(path, O_PATH | O_DIRECTORY | O_CLOEXEC); -+ if (unified_fd < 0) -+ return ret_errno(EBADF); -+ -+ if (!lxc_list_empty(&conf->id_map)) { -+ struct userns_exec_unified_attach_data args = { -+ .conf = conf, -+ .unified_fd = unified_fd, -+ .pid = pid, -+ }; -+ -+ ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair); -+ if (ret < 0) -+ return -errno; -+ -+ ret = userns_exec_minimal(conf, -+ cgroup_unified_attach_parent_wrapper, -+ &args, -+ cgroup_unified_attach_child_wrapper, -+ &args); -+ } else { -+ ret = cgroup_attach_leaf(conf, unified_fd, pid); -+ } -+ -+ return ret; -+} -+ -+__cgfsng_ops static bool isulad_cgfsng_attach(struct cgroup_ops *ops, -+ const struct lxc_conf *conf, -+ const char *name, const char *lxcpath, -+ pid_t pid) -+{ -+ int len, ret; -+ char pidstr[INTTYPE_TO_STRLEN(pid_t)]; -+ -+ if (!ops) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!ops->hierarchies) -+ return true; -+ -+ len = snprintf(pidstr, sizeof(pidstr), "%d", pid); -+ if (len < 0 || (size_t)len >= sizeof(pidstr)) -+ return false; -+ -+ for (int i = 0; ops->hierarchies[i]; i++) { -+ __do_free char *fullpath = NULL, *path = NULL; -+ struct hierarchy *h = ops->hierarchies[i]; -+ -+ if (h->version == CGROUP2_SUPER_MAGIC) { -+ ret = __cg_unified_attach(h, conf, name, lxcpath, pid, -+ h->controllers[0]); -+ if (ret < 0) -+ return false; -+ -+ continue; -+ } -+ -+ path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]); -+ /* not running */ -+ if (!path) -+ return false; -+ -+ fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs"); -+ ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666); -+ if (ret < 0) -+ return log_error_errno(false, errno, "Failed to attach %d to %s", -+ (int)pid, fullpath); -+ } -+ -+ return true; -+} -+ -+__cgfsng_ops static int isulad_cgfsng_get(struct cgroup_ops *ops, const char *filename, -+ char *value, size_t len, const char *name, -+ const char *lxcpath) -+{ -+ int ret = -1; -+ size_t controller_len; -+ char *controller, *p, *path; -+ struct hierarchy *h; -+ -+ controller_len = strlen(filename); -+ controller = alloca(controller_len + 1); -+ (void)strlcpy(controller, filename, controller_len + 1); -+ -+ p = strchr(controller, '.'); -+ if (p) -+ *p = '\0'; -+ -+ const char *ori_path = ops->get_cgroup(ops, controller); -+ if (ori_path == NULL) { -+ ERROR("Failed to get cgroup path:%s", controller); -+ return -1; -+ } -+ path = safe_strdup(ori_path); -+ -+ h = get_hierarchy(ops, controller); -+ if (h) { -+ char *fullpath; -+ -+ fullpath = build_full_cgpath_from_monitorpath(h, path, filename); -+ ret = lxc_read_from_file(fullpath, value, len); -+ free(fullpath); -+ } -+ free(path); -+ -+ return ret; -+} -+ -+static int device_cgroup_parse_access(struct device_item *device, const char *val) -+{ -+ for (int count = 0; count < 3; count++, val++) { -+ switch (*val) { -+ case 'r': -+ device->access[count] = *val; -+ break; -+ case 'w': -+ device->access[count] = *val; -+ break; -+ case 'm': -+ device->access[count] = *val; -+ break; -+ case '\n': -+ case '\0': -+ count = 3; -+ break; -+ default: -+ return ret_errno(EINVAL); -+ } -+ } -+ -+ return 0; -+} -+ -+int device_cgroup_rule_parse(struct device_item *device, const char *key, -+ const char *val) -+{ -+ int count, ret; -+ char temp[50]; -+ -+ if (strcmp("devices.allow", key) == 0) -+ device->allow = 1; -+ else -+ device->allow = 0; -+ -+ if (strcmp(val, "a") == 0) { -+ /* global rule */ -+ device->type = 'a'; -+ device->major = -1; -+ device->minor = -1; -+ device->global_rule = device->allow -+ ? LXC_BPF_DEVICE_CGROUP_BLACKLIST -+ : LXC_BPF_DEVICE_CGROUP_WHITELIST; -+ device->allow = -1; -+ return 0; -+ } -+ -+ /* local rule */ -+ device->global_rule = LXC_BPF_DEVICE_CGROUP_LOCAL_RULE; -+ -+ switch (*val) { -+ case 'a': -+ __fallthrough; -+ case 'b': -+ __fallthrough; -+ case 'c': -+ device->type = *val; -+ break; -+ default: -+ return -1; -+ } -+ -+ val++; -+ if (!isspace(*val)) -+ return -1; -+ val++; -+ if (*val == '*') { -+ device->major = -1; -+ val++; -+ } else if (isdigit(*val)) { -+ memset(temp, 0, sizeof(temp)); -+ for (count = 0; count < sizeof(temp) - 1; count++) { -+ temp[count] = *val; -+ val++; -+ if (!isdigit(*val)) -+ break; -+ } -+ ret = lxc_safe_int(temp, &device->major); -+ if (ret) -+ return -1; -+ } else { -+ return -1; -+ } -+ if (*val != ':') -+ return -1; -+ val++; -+ -+ /* read minor */ -+ if (*val == '*') { -+ device->minor = -1; -+ val++; -+ } else if (isdigit(*val)) { -+ memset(temp, 0, sizeof(temp)); -+ for (count = 0; count < sizeof(temp) - 1; count++) { -+ temp[count] = *val; -+ val++; -+ if (!isdigit(*val)) -+ break; -+ } -+ ret = lxc_safe_int(temp, &device->minor); -+ if (ret) -+ return -1; -+ } else { -+ return -1; -+ } -+ if (!isspace(*val)) -+ return -1; -+ -+ return device_cgroup_parse_access(device, ++val); -+} -+ -+__cgfsng_ops static int isulad_cgfsng_set(struct cgroup_ops *ops, -+ const char *filename, const char *value, -+ const char *name, const char *lxcpath) -+{ -+ int ret = -1; -+ size_t controller_len; -+ char *controller, *p, *path; -+ struct hierarchy *h; -+ -+ controller_len = strlen(filename); -+ controller = alloca(controller_len + 1); -+ (void)strlcpy(controller, filename, controller_len + 1); -+ -+ p = strchr(controller, '.'); -+ if (p) -+ *p = '\0'; -+ -+ const char *ori_path = ops->get_cgroup(ops, controller); -+ if (ori_path == NULL) { -+ ERROR("Failed to get cgroup path:%s", controller); -+ return -1; -+ } -+ path = safe_strdup(ori_path); -+ -+ h = get_hierarchy(ops, controller); -+ if (h) { -+ char *fullpath; -+ -+ fullpath = build_full_cgpath_from_monitorpath(h, path, filename); -+ ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); -+ free(fullpath); -+ } -+ free(path); -+ -+ return ret; -+} -+ -+/* take devices cgroup line -+ * /dev/foo rwx -+ * and convert it to a valid -+ * type major:minor mode -+ * line. Return <0 on error. Dest is a preallocated buffer long enough to hold -+ * the output. -+ */ -+static int device_cgroup_rule_parse_devpath(struct device_item *device, -+ const char *devpath) -+{ -+ __do_free char *path = NULL; -+ char *mode = NULL; -+ int n_parts, ret; -+ char *p; -+ struct stat sb; -+ -+ path = must_copy_string(devpath); -+ -+ /* -+ * Read path followed by mode. Ignore any trailing text. -+ * A ' # comment' would be legal. Technically other text is not -+ * legal, we could check for that if we cared to. -+ */ -+ for (n_parts = 1, p = path; *p; p++) { -+ if (*p != ' ') -+ continue; -+ *p = '\0'; -+ -+ if (n_parts != 1) -+ break; -+ p++; -+ n_parts++; -+ -+ while (*p == ' ') -+ p++; -+ -+ mode = p; -+ -+ if (*p == '\0') -+ return ret_set_errno(-1, EINVAL); -+ } -+ -+ if (device_cgroup_parse_access(device, mode) < 0) -+ return -1; -+ -+ if (n_parts == 1) -+ return ret_set_errno(-1, EINVAL); -+ -+ ret = stat(path, &sb); -+ if (ret < 0) -+ return ret_set_errno(-1, errno); -+ -+ mode_t m = sb.st_mode & S_IFMT; -+ switch (m) { -+ case S_IFBLK: -+ device->type = 'b'; -+ break; -+ case S_IFCHR: -+ device->type = 'c'; -+ break; -+ default: -+ return log_error_errno(-1, EINVAL, "Unsupported device type %i for \"%s\"", m, path); -+ } -+ -+ device->major = MAJOR(sb.st_rdev); -+ device->minor = MINOR(sb.st_rdev); -+ device->allow = 1; -+ device->global_rule = LXC_BPF_DEVICE_CGROUP_LOCAL_RULE; -+ -+ return 0; -+} -+ -+static int convert_devpath(const char *invalue, char *dest) -+{ -+ struct device_item device = {0}; -+ int ret; -+ -+ ret = device_cgroup_rule_parse_devpath(&device, invalue); -+ if (ret < 0) -+ return -1; -+ -+ ret = snprintf(dest, 50, "%c %d:%d %s", device.type, device.major, -+ device.minor, device.access); -+ if (ret < 0 || ret >= 50) -+ return log_error_errno(-1, ENAMETOOLONG, "Error on configuration value \"%c %d:%d %s\" (max 50 chars)", -+ device.type, device.major, device.minor, device.access); -+ -+ return 0; -+} -+ -+/* Called from setup_limits - here we have the container's cgroup_data because -+ * we created the cgroups. -+ */ -+static int isulad_cg_legacy_get_data(struct cgroup_ops *ops, const char *filename, -+ char *value, size_t len) -+{ -+ char *fullpath = NULL; -+ char *p = NULL; -+ struct hierarchy *h = NULL; -+ int ret = 0; -+ char *controller = NULL; -+ -+ len = strlen(filename); -+ if (SIZE_MAX - 1 < len) { -+ errno = EINVAL; -+ return -1; -+ } -+ controller = calloc(1, len + 1); -+ if (controller == NULL) { -+ errno = ENOMEM; -+ return -1; -+ } -+ (void)strlcpy(controller, filename, len + 1); -+ -+ p = strchr(controller, '.'); -+ if (p) -+ *p = '\0'; -+ -+ -+ h = get_hierarchy(ops, controller); -+ if (!h) { -+ ERROR("Failed to setup limits for the \"%s\" controller. " -+ "The controller seems to be unused by \"cgfsng\" cgroup " -+ "driver or not enabled on the cgroup hierarchy", -+ controller); -+ errno = ENOENT; -+ free(controller); -+ return -ENOENT; -+ } -+ -+ fullpath = must_make_path(h->container_full_path, filename, NULL); -+ ret = lxc_read_from_file(fullpath, value, len); -+ free(fullpath); -+ free(controller); -+ return ret; -+} -+ -+static int isulad_cg_legacy_set_data(struct cgroup_ops *ops, const char *filename, -+ const char *value) -+{ -+ size_t len; -+ char *fullpath, *p; -+ /* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */ -+ char converted_value[50]; -+ struct hierarchy *h; -+ int ret = 0; -+ char *controller = NULL; -+ int retry_count = 0; -+ int max_retry = 10; -+ char *container_cgroup = ops->container_cgroup; -+ -+ len = strlen(filename); -+ controller = alloca(len + 1); -+ (void)strlcpy(controller, filename, len + 1); -+ -+ p = strchr(controller, '.'); -+ if (p) -+ *p = '\0'; -+ -+ if (strcmp("devices.allow", filename) == 0 && value[0] == '/') { -+ ret = convert_devpath(value, converted_value); -+ if (ret < 0) -+ return ret; -+ value = converted_value; -+ } -+ -+ h = get_hierarchy(ops, controller); -+ if (!h) { -+ ERROR("Failed to setup limits for the \"%s\" controller. " -+ "The controller seems to be unused by \"cgfsng\" cgroup " -+ "driver or not enabled on the cgroup hierarchy", -+ controller); -+ errno = ENOENT; -+ return -ENOENT; -+ } -+ -+ fullpath = must_make_path(h->container_full_path, filename, NULL); -+ -+retry: -+ ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); -+ if (ret != 0) { -+ if (retry_count < max_retry) { -+ SYSERROR("setting cgroup config for ready process caused \"failed to write %s to %s\".", value, fullpath); -+ (void)isulad_cg_legacy_handle_cpuset_hierarchy(h, container_cgroup); -+ (void)isulad_mkdir_eexist_on_last(h->container_full_path, 0755); -+ usleep(100 * 1000); /* 100 millisecond */ -+ retry_count++; -+ goto retry; -+ } -+ lxc_write_error_message(ops->errfd, -+ "%s:%d: setting cgroup config for ready process caused \"failed to write %s to %s: %s\".", -+ __FILE__, __LINE__, value, fullpath, strerror(errno)); -+ } -+ free(fullpath); -+ return ret; -+} -+ -+__cgfsng_ops static bool isulad_cgfsng_setup_limits_legacy(struct cgroup_ops *ops, -+ struct lxc_conf *conf, -+ bool do_devices) -+{ -+ __do_free struct lxc_list *sorted_cgroup_settings = NULL; -+ struct lxc_list *cgroup_settings = &conf->cgroup; -+ struct lxc_list *iterator, *next; -+ struct lxc_cgroup *cg; -+ bool ret = false; -+ char value[21 + 1] = { 0 }; -+ long long int readvalue, setvalue; -+ -+ if (!ops) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!conf) -+ return ret_set_errno(false, EINVAL); -+ -+ cgroup_settings = &conf->cgroup; -+ if (lxc_list_empty(cgroup_settings)) -+ return true; -+ -+ if (!ops->hierarchies) -+ return ret_set_errno(false, EINVAL); -+ -+ sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings); -+ if (!sorted_cgroup_settings) -+ return false; -+ -+ lxc_list_for_each(iterator, sorted_cgroup_settings) { -+ cg = iterator->elem; -+ -+ if (do_devices == !strncmp("devices", cg->subsystem, 7)) { -+ const char *cgvalue = cg->value; -+ if (strcmp(cg->subsystem, "files.limit") == 0) { -+ if (lxc_safe_long_long(cgvalue, &setvalue) != 0) { -+ SYSERROR("Invalid integer value %s", cgvalue); -+ goto out; -+ } -+ if (setvalue <= 0) { -+ cgvalue = "max"; -+ } -+ } -+ if (isulad_cg_legacy_set_data(ops, cg->subsystem, cgvalue)) { -+ if (do_devices && (errno == EACCES || errno == EPERM)) { -+ SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue); -+ continue; -+ } -+ SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue); -+ goto out; -+ } -+ DEBUG("Set controller \"%s\" set to \"%s\"", cg->subsystem, cgvalue); -+ } -+ -+ // isulad: check cpu shares -+ if (strcmp(cg->subsystem, "cpu.shares") == 0) { -+ if (isulad_cg_legacy_get_data(ops, cg->subsystem, value, sizeof(value) - 1) < 0) { -+ SYSERROR("Error get %s", cg->subsystem); -+ goto out; -+ } -+ trim(value); -+ if (lxc_safe_long_long(cg->value, &setvalue) != 0) { -+ SYSERROR("Invalid value %s", cg->value); -+ goto out; -+ } -+ if (lxc_safe_long_long(value, &readvalue) != 0) { -+ SYSERROR("Invalid value %s", value); -+ goto out; -+ } -+ if (setvalue > readvalue) { -+ ERROR("The maximum allowed cpu-shares is %s", value); -+ lxc_write_error_message(ops->errfd, -+ "%s:%d: setting cgroup config for ready process caused \"The maximum allowed cpu-shares is %s\".", -+ __FILE__, __LINE__, value); -+ goto out; -+ } else if (setvalue < readvalue) { -+ ERROR("The minimum allowed cpu-shares is %s", value); -+ lxc_write_error_message(ops->errfd, -+ "%s:%d: setting cgroup config for ready process caused \"The minimum allowed cpu-shares is %s\".", -+ __FILE__, __LINE__, value); -+ goto out; -+ } -+ } -+ } -+ -+ ret = true; -+ INFO("Limits for the legacy cgroup hierarchies have been setup"); -+out: -+ lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) { -+ lxc_list_del(iterator); -+ free(iterator); -+ } -+ -+ return ret; -+} -+ -+/* -+ * Some of the parsing logic comes from the original cgroup device v1 -+ * implementation in the kernel. -+ */ -+static int bpf_device_cgroup_prepare(struct cgroup_ops *ops, -+ struct lxc_conf *conf, const char *key, -+ const char *val) -+{ -+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX -+ struct device_item device_item = {0}; -+ int ret; -+ -+ if (strcmp("devices.allow", key) == 0 && *val == '/') -+ ret = device_cgroup_rule_parse_devpath(&device_item, val); -+ else -+ ret = device_cgroup_rule_parse(&device_item, key, val); -+ if (ret < 0) -+ return log_error_errno(-1, EINVAL, "Failed to parse device string %s=%s", key, val); -+ -+ ret = bpf_list_add_device(conf, &device_item); -+ if (ret < 0) -+ return -1; -+#endif -+ return 0; -+} -+ -+__cgfsng_ops static bool isulad_cgfsng_setup_limits(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+ struct lxc_list *cgroup_settings, *iterator; -+ struct hierarchy *h; -+ struct lxc_conf *conf; -+ -+ if (!ops) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!ops->hierarchies) -+ return true; -+ -+ if (!ops->container_cgroup) -+ return ret_set_errno(false, EINVAL); -+ -+ if (!handler || !handler->conf) -+ return ret_set_errno(false, EINVAL); -+ conf = handler->conf; -+ -+ if (lxc_list_empty(&conf->cgroup2)) -+ return true; -+ cgroup_settings = &conf->cgroup2; -+ -+ if (!ops->unified) -+ return false; -+ h = ops->unified; -+ -+ lxc_list_for_each (iterator, cgroup_settings) { -+ struct lxc_cgroup *cg = iterator->elem; -+ int ret; -+ -+ if (strncmp("devices", cg->subsystem, 7) == 0) { -+ ret = bpf_device_cgroup_prepare(ops, conf, cg->subsystem, -+ cg->value); -+ } else { -+ ret = lxc_write_openat(h->container_full_path, -+ cg->subsystem, cg->value, -+ strlen(cg->value)); -+ if (ret < 0) -+ return log_error_errno(false, errno, "Failed to set \"%s\" to \"%s\"", -+ cg->subsystem, cg->value); -+ } -+ TRACE("Set \"%s\" to \"%s\"", cg->subsystem, cg->value); -+ } -+ -+ return log_info(true, "Limits for the unified cgroup hierarchy have been setup"); -+} -+ -+__cgfsng_ops bool isulad_cgfsng_devices_activate(struct cgroup_ops *ops, -+ struct lxc_handler *handler) -+{ -+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX -+ __do_bpf_program_free struct bpf_program *devices = NULL; -+ int ret; -+ struct lxc_conf *conf; -+ struct hierarchy *unified; -+ struct lxc_list *it; -+ struct bpf_program *devices_old; -+ -+ if (!ops) -+ return ret_set_errno(false, ENOENT); -+ -+ if (!ops->hierarchies) -+ return true; -+ -+ if (!ops->container_cgroup) -+ return ret_set_errno(false, EEXIST); -+ -+ if (!handler || !handler->conf) -+ return ret_set_errno(false, EINVAL); -+ conf = handler->conf; -+ -+ unified = ops->unified; -+ if (!unified || !unified->bpf_device_controller || -+ !unified->container_full_path || lxc_list_empty(&conf->devices)) -+ return true; -+ -+ devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE); -+ if (!devices) -+ return log_error_errno(false, ENOMEM, "Failed to create new bpf program"); -+ -+ ret = bpf_program_init(devices); -+ if (ret) -+ return log_error_errno(false, ENOMEM, "Failed to initialize bpf program"); -+ -+ lxc_list_for_each(it, &conf->devices) { -+ struct device_item *cur = it->elem; -+ -+ ret = bpf_program_append_device(devices, cur); -+ if (ret) -+ return log_error_errno(false, ENOMEM, "Failed to add new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d", -+ cur->type, -+ cur->major, -+ cur->minor, -+ cur->access, -+ cur->allow, -+ cur->global_rule); -+ TRACE("Added rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d", -+ cur->type, -+ cur->major, -+ cur->minor, -+ cur->access, -+ cur->allow, -+ cur->global_rule); -+ } -+ -+ ret = bpf_program_finalize(devices); -+ if (ret) -+ return log_error_errno(false, ENOMEM, "Failed to finalize bpf program"); -+ -+ ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE, -+ unified->container_full_path, -+ BPF_F_ALLOW_MULTI); -+ if (ret) -+ return log_error_errno(false, ENOMEM, "Failed to attach bpf program"); -+ -+ /* Replace old bpf program. */ -+ devices_old = move_ptr(conf->cgroup2_devices); -+ conf->cgroup2_devices = move_ptr(devices); -+ devices = move_ptr(devices_old); -+#endif -+ return true; -+} -+ -+bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup) -+{ -+ __do_free char *add_controllers = NULL, *base_path = NULL; -+ __do_free_string_list char **parts = NULL; -+ struct hierarchy *unified = ops->unified; -+ ssize_t parts_len; -+ char **it; -+ size_t full_len = 0; -+ -+ if (!ops->hierarchies || !pure_unified_layout(ops) || -+ !unified->controllers[0]) -+ return true; -+ -+ /* For now we simply enable all controllers that we have detected by -+ * creating a string like "+memory +pids +cpu +io". -+ * TODO: In the near future we might want to support "-" -+ * etc. but whether supporting semantics like this make sense will need -+ * some thinking. -+ */ -+ for (it = unified->controllers; it && *it; it++) { -+ full_len += strlen(*it) + 2; -+ add_controllers = must_realloc(add_controllers, full_len + 1); -+ -+ if (unified->controllers[0] == *it) -+ add_controllers[0] = '\0'; -+ -+ (void)strlcat(add_controllers, "+", full_len + 1); -+ (void)strlcat(add_controllers, *it, full_len + 1); -+ -+ if ((it + 1) && *(it + 1)) -+ (void)strlcat(add_controllers, " ", full_len + 1); -+ } -+ -+ parts = lxc_string_split(cgroup, '/'); -+ if (!parts) -+ return false; -+ -+ parts_len = lxc_array_len((void **)parts); -+ if (parts_len > 0) -+ parts_len--; -+ -+ base_path = must_make_path(unified->mountpoint, unified->container_base_path, NULL); -+ for (ssize_t i = -1; i < parts_len; i++) { -+ int ret; -+ __do_free char *target = NULL; -+ -+ if (i >= 0) -+ base_path = must_append_path(base_path, parts[i], NULL); -+ target = must_make_path(base_path, "cgroup.subtree_control", NULL); -+ ret = lxc_writeat(-1, target, add_controllers, full_len); -+ if (ret < 0) -+ return log_error_errno(false, errno, "Could not enable \"%s\" controllers in the unified cgroup \"%s\"", -+ add_controllers, target); -+ TRACE("Enable \"%s\" controllers in the unified cgroup \"%s\"", add_controllers, target); -+ } -+ -+ return true; -+} -+ -+__cgfsng_ops bool isulad_cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops) -+{ -+ return true; -+} -+ -+__cgfsng_ops bool isulad_cgfsng_payload_delegate_controllers(struct cgroup_ops *ops) -+{ -+ if (!ops) -+ return ret_set_errno(false, ENOENT); -+ -+ return __cgfsng_delegate_controllers(ops, ops->container_cgroup); -+} -+ -+static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops, -+ char **controllers) -+{ -+ if (!ops->cgroup_use) -+ return true; -+ -+ for (char **cur_ctrl = controllers; cur_ctrl && *cur_ctrl; cur_ctrl++) { -+ bool found = false; -+ -+ for (char **cur_use = ops->cgroup_use; cur_use && *cur_use; cur_use++) { -+ if (strcmp(*cur_use, *cur_ctrl) != 0) -+ continue; -+ -+ found = true; -+ break; -+ } -+ -+ if (found) -+ continue; -+ -+ return false; -+ } -+ -+ return true; -+} -+ -+static void cg_unified_delegate(char ***delegate) -+{ -+ __do_free char *buf = NULL; -+ char *standard[] = {"cgroup.subtree_control", "cgroup.threads", NULL}; -+ char *token; -+ int idx; -+ -+ buf = read_file("/sys/kernel/cgroup/delegate"); -+ if (!buf) { -+ for (char **p = standard; p && *p; p++) { -+ idx = append_null_to_list((void ***)delegate); -+ (*delegate)[idx] = must_copy_string(*p); -+ } -+ SYSWARN("Failed to read /sys/kernel/cgroup/delegate"); -+ return; -+ } -+ -+ lxc_iterate_parts (token, buf, " \t\n") { -+ /* -+ * We always need to chown this for both cgroup and -+ * cgroup2. -+ */ -+ if (strcmp(token, "cgroup.procs") == 0) -+ continue; -+ -+ idx = append_null_to_list((void ***)delegate); -+ (*delegate)[idx] = must_copy_string(token); -+ } -+} -+ -+/* At startup, parse_hierarchies finds all the info we need about cgroup -+ * mountpoints and current cgroups, and stores it in @d. -+ */ -+static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileged) -+{ -+ __do_free char *basecginfo = NULL, *line = NULL; -+ __do_free_string_list char **klist = NULL, **nlist = NULL; -+ __do_fclose FILE *f = NULL; -+ int ret; -+ size_t len = 0; -+ -+ /* Root spawned containers escape the current cgroup, so use init's -+ * cgroups as our base in that case. -+ */ -+ if (!relative && (geteuid() == 0)) -+ basecginfo = read_file("/proc/1/cgroup"); -+ else -+ basecginfo = read_file("/proc/self/cgroup"); -+ if (!basecginfo) -+ return ret_set_errno(-1, ENOMEM); -+ -+ ret = get_existing_subsystems(&klist, &nlist); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to retrieve available legacy cgroup controllers"); -+ -+ f = fopen("/proc/self/mountinfo", "re"); -+ if (!f) -+ return log_error_errno(-1, errno, "Failed to open \"/proc/self/mountinfo\""); -+ -+ lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist); -+ -+ while (getline(&line, &len, f) != -1) { -+ __do_free char *base_cgroup = NULL, *mountpoint = NULL; -+ __do_free_string_list char **controller_list = NULL; -+ int type; -+ struct hierarchy *new; -+ -+ type = get_cgroup_version(line); -+ if (type == 0) -+ continue; -+ -+ if (type == CGROUP2_SUPER_MAGIC && ops->unified) -+ continue; -+ -+ if (ops->cgroup_layout == CGROUP_LAYOUT_UNKNOWN) { -+ if (type == CGROUP2_SUPER_MAGIC) -+ ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED; -+ else if (type == CGROUP_SUPER_MAGIC) -+ ops->cgroup_layout = CGROUP_LAYOUT_LEGACY; -+ } else if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) { -+ if (type == CGROUP_SUPER_MAGIC) -+ ops->cgroup_layout = CGROUP_LAYOUT_HYBRID; -+ } else if (ops->cgroup_layout == CGROUP_LAYOUT_LEGACY) { -+ if (type == CGROUP2_SUPER_MAGIC) -+ ops->cgroup_layout = CGROUP_LAYOUT_HYBRID; -+ } -+ -+ controller_list = cg_hybrid_get_controllers(klist, nlist, line, type); -+ if (!controller_list && type == CGROUP_SUPER_MAGIC) -+ continue; -+ -+ if (type == CGROUP_SUPER_MAGIC) -+ if (controller_list_is_dup(ops->hierarchies, controller_list)) { -+ TRACE("Skipping duplicating controller"); -+ continue; -+ } -+ -+ mountpoint = cg_hybrid_get_mountpoint(line); -+ if (!mountpoint) { -+ ERROR("Failed parsing mountpoint from \"%s\"", line); -+ continue; -+ } -+ -+ if (type == CGROUP_SUPER_MAGIC) -+ base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC); -+ else -+ base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC); -+ if (!base_cgroup) { -+ ERROR("Failed to find current cgroup"); -+ continue; -+ } -+ -+ trim(base_cgroup); -+ prune_init_scope(base_cgroup); -+ -+ /* isulad: do not test writeable, if we run isulad in docker without cgroup namespace. -+ * the base_cgroup will be docker/XXX.., mountpoint+base_cgroup may be not exist */ -+ -+ /* -+ * reason:base cgroup may be started with /system.slice when cg_hybrid_init -+ * read /proc/1/cgroup on host, and cgroup init will set all containers -+ * cgroup path under /sys/fs/cgroup//system.slice/xxx/lxc -+ * directory, this is not consistent with docker. The default cgroup path -+ * should be under /sys/fs/cgroup//lxc directory. -+ */ -+ -+ if (strlen(base_cgroup) > 1 && base_cgroup[0] == '/') { -+ base_cgroup[1] = '\0'; -+ } -+ -+ if (type == CGROUP2_SUPER_MAGIC) { -+ char *cgv2_ctrl_path; -+ -+ cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup, -+ "cgroup.controllers", -+ NULL); -+ -+ controller_list = cg_unified_get_controllers(cgv2_ctrl_path); -+ free(cgv2_ctrl_path); -+ if (!controller_list) { -+ controller_list = cg_unified_make_empty_controller(); -+ TRACE("No controllers are enabled for " -+ "delegation in the unified hierarchy"); -+ } -+ } -+ -+ /* Exclude all controllers that cgroup use does not want. */ -+ if (!cgroup_use_wants_controllers(ops, controller_list)) { -+ TRACE("Skipping controller"); -+ continue; -+ } -+ -+ new = add_hierarchy(&ops->hierarchies, move_ptr(controller_list), move_ptr(mountpoint), move_ptr(base_cgroup), type); -+ if (type == CGROUP2_SUPER_MAGIC && !ops->unified) { -+ if (unprivileged) -+ cg_unified_delegate(&new->cgroup2_chown); -+ ops->unified = new; -+ } -+ } -+ -+ TRACE("Writable cgroup hierarchies:"); -+ lxc_cgfsng_print_hierarchies(ops); -+ -+ /* verify that all controllers in cgroup.use and all crucial -+ * controllers are accounted for -+ */ -+ if (!all_controllers_found(ops)) -+ return log_error_errno(-1, ENOENT, "Failed to find all required controllers"); -+ -+ return 0; -+} -+ -+/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */ -+static char *cg_unified_get_current_cgroup(bool relative) -+{ -+ __do_free char *basecginfo = NULL; -+ char *copy; -+ char *base_cgroup; -+ -+ if (!relative && (geteuid() == 0)) -+ basecginfo = read_file("/proc/1/cgroup"); -+ else -+ basecginfo = read_file("/proc/self/cgroup"); -+ if (!basecginfo) -+ return NULL; -+ -+ base_cgroup = strstr(basecginfo, "0::/"); -+ if (!base_cgroup) -+ return NULL; -+ -+ base_cgroup = base_cgroup + 3; -+ copy = copy_to_eol(base_cgroup); -+ if (!copy) -+ return NULL; -+ -+ return trim(copy); -+} -+ -+static int cg_unified_init(struct cgroup_ops *ops, bool relative, -+ bool unprivileged) -+{ -+ __do_free char *subtree_path = NULL; -+ int ret; -+ char *mountpoint; -+ char **delegatable; -+ struct hierarchy *new; -+ char *base_cgroup = NULL; -+ -+ ret = unified_cgroup_hierarchy(); -+ if (ret == -ENOMEDIUM) -+ return ret_errno(ENOMEDIUM); -+ -+ if (ret != CGROUP2_SUPER_MAGIC) -+ return 0; -+ -+ base_cgroup = cg_unified_get_current_cgroup(relative); -+ if (!base_cgroup) -+ return ret_errno(EINVAL); -+ if (!relative) -+ prune_init_scope(base_cgroup); -+ -+ /* -+ * We assume that the cgroup we're currently in has been delegated to -+ * us and we are free to further delege all of the controllers listed -+ * in cgroup.controllers further down the hierarchy. -+ */ -+ mountpoint = must_copy_string(DEFAULT_CGROUP_MOUNTPOINT); -+ subtree_path = must_make_path(mountpoint, base_cgroup, "cgroup.controllers", NULL); -+ delegatable = cg_unified_get_controllers(subtree_path); -+ if (!delegatable) -+ delegatable = cg_unified_make_empty_controller(); -+ if (!delegatable[0]) -+ TRACE("No controllers are enabled for delegation"); -+ -+ /* TODO: If the user requested specific controllers via lxc.cgroup.use -+ * we should verify here. The reason I'm not doing it right is that I'm -+ * not convinced that lxc.cgroup.use will be the future since it is a -+ * global property. I much rather have an option that lets you request -+ * controllers per container. -+ */ -+ -+ new = add_hierarchy(&ops->hierarchies, delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC); -+ if (unprivileged) -+ cg_unified_delegate(&new->cgroup2_chown); -+ -+ if (bpf_devices_cgroup_supported()) -+ new->bpf_device_controller = 1; -+ -+ ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED; -+ ops->unified = new; -+ -+ return CGROUP2_SUPER_MAGIC; -+} -+ -+static int isulad_cg_init(struct cgroup_ops *ops, struct lxc_conf *conf) -+{ -+ int ret; -+ const char *tmp; -+ bool relative = conf->cgroup_meta.relative; -+ -+ tmp = lxc_global_config_value("lxc.cgroup.use"); -+ if (tmp) { -+ __do_free char *pin = NULL; -+ char *chop, *cur; -+ -+ pin = must_copy_string(tmp); -+ chop = pin; -+ -+ lxc_iterate_parts(cur, chop, ",") -+ must_append_string(&ops->cgroup_use, cur); -+ } -+ -+ ret = cg_unified_init(ops, relative, !lxc_list_empty(&conf->id_map)); -+ if (ret < 0) -+ return -1; -+ -+ if (ret == CGROUP2_SUPER_MAGIC) -+ return 0; -+ -+ return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map)); -+} -+ -+__cgfsng_ops static int isulad_cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf) -+{ -+ const char *cgroup_pattern; -+ const char *cgroup_tree; -+ __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL; -+ size_t len; -+ -+ if (!ops) -+ return ret_set_errno(-1, ENOENT); -+ -+ /* copy system-wide cgroup information */ -+ cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern"); -+ if (cgroup_pattern && strcmp(cgroup_pattern, "") != 0) -+ ops->cgroup_pattern = must_copy_string(cgroup_pattern); -+ -+ if (conf->cgroup_meta.dir) { -+ cgroup_tree = conf->cgroup_meta.dir; -+ container_cgroup = must_concat(&len, cgroup_tree, "/", conf->name, NULL); -+ } else if (ops->cgroup_pattern) { -+ __cgroup_tree = lxc_string_replace("%n", conf->name, ops->cgroup_pattern); -+ if (!__cgroup_tree) -+ return ret_set_errno(-1, ENOMEM); -+ -+ cgroup_tree = __cgroup_tree; -+ container_cgroup = must_concat(&len, cgroup_tree, NULL); -+ } else { -+ cgroup_tree = NULL; -+ container_cgroup = must_concat(&len, conf->name, NULL); -+ } -+ if (!container_cgroup) -+ return ret_set_errno(-1, ENOMEM); -+ -+ ops->container_cgroup = move_ptr(container_cgroup); -+ -+ return 0; -+} -+ -+struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf) -+{ -+ __do_free struct cgroup_ops *cgfsng_ops = NULL; -+ -+ cgfsng_ops = malloc(sizeof(struct cgroup_ops)); -+ if (!cgfsng_ops) -+ return ret_set_errno(NULL, ENOMEM); -+ -+ memset(cgfsng_ops, 0, sizeof(struct cgroup_ops)); -+ cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN; -+ -+ if (isulad_cg_init(cgfsng_ops, conf)) -+ return NULL; -+ -+ cgfsng_ops->data_init = isulad_cgfsng_data_init; -+ -+ cgfsng_ops->errfd = conf ? conf->errpipe[1] : -1; -+ cgfsng_ops->get_cgroup_full_path = isulad_cgfsng_get_cgroup_full_path; -+ cgfsng_ops->payload_destroy = isulad_cgfsng_payload_destroy; -+ cgfsng_ops->monitor_destroy = isulad_cgfsng_monitor_destroy; -+ cgfsng_ops->monitor_create = isulad_cgfsng_monitor_create; -+ cgfsng_ops->monitor_enter = isulad_cgfsng_monitor_enter; -+ cgfsng_ops->monitor_delegate_controllers = isulad_cgfsng_monitor_delegate_controllers; -+ cgfsng_ops->payload_delegate_controllers = isulad_cgfsng_payload_delegate_controllers; -+ cgfsng_ops->payload_create = isulad_cgfsng_payload_create; -+ cgfsng_ops->payload_enter = isulad_cgfsng_payload_enter; -+ cgfsng_ops->payload_finalize = isulad_cgfsng_payload_finalize; -+ cgfsng_ops->escape = isulad_cgfsng_escape; -+ cgfsng_ops->num_hierarchies = isulad_cgfsng_num_hierarchies; -+ cgfsng_ops->get_hierarchies = isulad_cgfsng_get_hierarchies; -+ cgfsng_ops->get_cgroup = isulad_cgfsng_get_cgroup; -+ cgfsng_ops->get = isulad_cgfsng_get; -+ cgfsng_ops->set = isulad_cgfsng_set; -+ cgfsng_ops->freeze = isulad_cgfsng_freeze; -+ cgfsng_ops->unfreeze = isulad_cgfsng_unfreeze; -+ cgfsng_ops->setup_limits_legacy = isulad_cgfsng_setup_limits_legacy; -+ cgfsng_ops->setup_limits = isulad_cgfsng_setup_limits; -+ cgfsng_ops->driver = "isulad_cgfsng"; -+ cgfsng_ops->version = "1.0.0"; -+ cgfsng_ops->attach = isulad_cgfsng_attach; -+ cgfsng_ops->chown = isulad_cgfsng_chown; -+ cgfsng_ops->mount = isulad_cgfsng_mount; -+ cgfsng_ops->devices_activate = isulad_cgfsng_devices_activate; -+ -+ return move_ptr(cgfsng_ops); -+} --- -2.25.1 - diff --git a/0010-cgfsng-adjust-log-level-from-error-to-warn.patch b/0010-cgfsng-adjust-log-level-from-error-to-warn.patch deleted file mode 100644 index e0ca43c..0000000 --- a/0010-cgfsng-adjust-log-level-from-error-to-warn.patch +++ /dev/null @@ -1,76 +0,0 @@ -From 9fa92a4f8d0fd772a27e5c27d03b927c765c133c Mon Sep 17 00:00:00 2001 -From: lifeng68 -Date: Fri, 13 Nov 2020 14:11:07 +0800 -Subject: [PATCH 10/10] cgfsng: adjust log level from error to warn - -Signed-off-by: lifeng68 ---- - src/lxc/cgroups/cgfsng.c | 6 +++--- - src/lxc/cgroups/isulad_cgfsng.c | 6 +++--- - 2 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c -index 9b9aaf6c3..3f81f5c41 100644 ---- a/src/lxc/cgroups/cgfsng.c -+++ b/src/lxc/cgroups/cgfsng.c -@@ -654,7 +654,7 @@ static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line, - * verify /sys/fs/cgroup/ in this field. - */ - if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0) -- return log_error(NULL, "Found hierarchy not under " DEFAULT_CGROUP_MOUNTPOINT ": \"%s\"", p); -+ return log_warn(NULL, "Found hierarchy not under " DEFAULT_CGROUP_MOUNTPOINT ": \"%s\"", p); - - p += 15; - p2 = strchr(p, ' '); -@@ -3092,7 +3092,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg - - mountpoint = cg_hybrid_get_mountpoint(line); - if (!mountpoint) { -- ERROR("Failed parsing mountpoint from \"%s\"", line); -+ WARN("Failed parsing mountpoint from \"%s\"", line); - continue; - } - -@@ -3101,7 +3101,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg - else - base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC); - if (!base_cgroup) { -- ERROR("Failed to find current cgroup"); -+ WARN("Failed to find current cgroup"); - continue; - } - -diff --git a/src/lxc/cgroups/isulad_cgfsng.c b/src/lxc/cgroups/isulad_cgfsng.c -index 82a4333f3..e16f8a198 100644 ---- a/src/lxc/cgroups/isulad_cgfsng.c -+++ b/src/lxc/cgroups/isulad_cgfsng.c -@@ -317,7 +317,7 @@ static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line, - * verify /sys/fs/cgroup/ in this field. - */ - if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0) -- return log_error(NULL, "Found hierarchy not under " DEFAULT_CGROUP_MOUNTPOINT ": \"%s\"", p); -+ return log_warn(NULL, "Found hierarchy not under " DEFAULT_CGROUP_MOUNTPOINT ": \"%s\"", p); - - p += 15; - p2 = strchr(p, ' '); -@@ -2847,7 +2847,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg - - mountpoint = cg_hybrid_get_mountpoint(line); - if (!mountpoint) { -- ERROR("Failed parsing mountpoint from \"%s\"", line); -+ WARN("Failed parsing mountpoint from \"%s\"", line); - continue; - } - -@@ -2856,7 +2856,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg - else - base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC); - if (!base_cgroup) { -- ERROR("Failed to find current cgroup"); -+ WARN("Failed to find current cgroup"); - continue; - } - --- -2.25.1 - diff --git a/0011-rootfs-add-make-private-for-root.path-parent.patch b/0011-rootfs-add-make-private-for-root.path-parent.patch deleted file mode 100644 index 5ef6c6d..0000000 --- a/0011-rootfs-add-make-private-for-root.path-parent.patch +++ /dev/null @@ -1,28 +0,0 @@ -From e9214cfb2a247a78a07d2032956cde97e6d19e4a Mon Sep 17 00:00:00 2001 -From: lifeng68 -Date: Tue, 17 Nov 2020 18:37:56 +0800 -Subject: [PATCH 11/11] rootfs: add make private for root.path parent - -Signed-off-by: lifeng68 ---- - src/lxc/conf.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 3d8713954..ce5bab9c5 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -1434,6 +1434,10 @@ static int lxc_mount_rootfs(struct lxc_conf *conf) - * not propagate in other namespaces. Also it will help with kernel - * check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent)) - */ -+ ret = rootfs_parent_mount_private(conf->rootfs.path); -+ if (ret != 0) { -+ return log_error(-1, "Failed to make parent of rootfs %s to private.", conf->rootfs.path); -+ } - ret = rootfs_parent_mount_private(conf->rootfs.mount); - if (ret != 0) { - return log_error(-1, "Failed to make parent of rootfs %s to private.", conf->rootfs.mount); --- -2.25.1 - diff --git a/0012-mount-make-possible-to-bind-mount-proc-and-sys-fs.patch b/0012-mount-make-possible-to-bind-mount-proc-and-sys-fs.patch deleted file mode 100644 index 0cd6037..0000000 --- a/0012-mount-make-possible-to-bind-mount-proc-and-sys-fs.patch +++ /dev/null @@ -1,204 +0,0 @@ -From 53ca847c8d21b1e422745a221d49ddf61679d4dd Mon Sep 17 00:00:00 2001 -From: lifeng68 -Date: Fri, 27 Nov 2020 16:02:25 +0800 -Subject: [PATCH] mount: make possible to bind mount /proc and /sys/fs - -1. add check whether have /proc mounts entry, if has, skip the auto -mount proc -2. mount cgroup before do mount entrys -3. pass if the mount on top of /proc and the source of the mount is a proc filesystem - -Signed-off-by: lifeng68 ---- - src/lxc/conf.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++--- - src/lxc/path.c | 2 +- - 2 files changed, 92 insertions(+), 5 deletions(-) - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index ce5bab9c5..c3610ae33 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -2581,7 +2581,7 @@ retry: - - /* isulad: checkMountDestination checks to ensure that the mount destination is not over the top of /proc. - * dest is required to be an abs path and have any symlinks resolved before calling this function. */ --static int check_mount_destination(const char *rootfs, const char *dest) -+static int check_mount_destination(const char *rootfs, const char *dest, const char *src) - { - const char *invalid_destinations[] = { - "/proc", -@@ -2641,10 +2641,28 @@ static int check_mount_destination(const char *rootfs, const char *dest) - return -1; - } - relpath = path_relative(fullpath, dest); -+ DEBUG("dst path %s get relative path %s with full path %s,src:%s", dest, relpath, fullpath, src); - free(fullpath); -- if (!relpath) -+ if (!relpath) { -+ ERROR("Failed to get relpath for %s related to %s", dest, fullpath); - return -1; -- if (!strcmp(relpath, ".") || strncmp(relpath, "..", 2)) { -+ } -+ // pass if the mount path is outside of invalid proc -+ if (strncmp(relpath, "..", 2) == 0) { -+ free(relpath); -+ continue; -+ } -+ if (strcmp(relpath, ".") == 0) { -+ if (src == NULL) { -+ free(relpath); -+ continue; -+ } -+ // pass if the mount on top of /proc and the source of the mount is a proc filesystem -+ if (has_fs_type(src, PROC_SUPER_MAGIC)) { -+ WARN("src %s is proc allow mount on-top of %s", src, *invalid); -+ free(relpath); -+ continue; -+ } - ERROR("%s cannot be mounted because it is located inside %s", dest, *invalid); - free(relpath); - return -1; -@@ -2706,7 +2724,7 @@ static inline int mount_entry_on_generic(struct mntent *mntent, - } - dest = rpath; - -- ret = check_mount_destination(rootfs_path, dest); -+ ret = check_mount_destination(rootfs_path, dest, mntent->mnt_fsname); - if (ret) { - ERROR("Mount destination is invalid: '%s'", dest); - lxc_write_error_message(rootfs->errfd, "%s:%d: mount destination is invalid: '%s'.", -@@ -3119,6 +3137,52 @@ static bool need_setup_dev(const struct lxc_conf *conf, struct lxc_list *mount) - return true; - } - } -+ -+static bool have_proc_bind_mount_entry(FILE *file) -+{ -+ bool have_bind_proc = false; -+ char buf[PATH_MAX] = { 0 }; -+ struct mntent mntent; -+ -+ while (getmntent_r(file, &mntent, buf, sizeof(buf))) { -+ mntent.mnt_dir = lxc_string_replace(SPACE_MAGIC_STR, " ", mntent.mnt_dir); -+ if(mntent.mnt_dir == NULL) { -+ SYSERROR("memory allocation error"); -+ continue; -+ } -+ -+ DEBUG("parsed mnt %s, %s, %s", mntent.mnt_fsname, mntent.mnt_dir, mntent.mnt_type); -+ -+ if (strcmp(mntent.mnt_dir, "proc") == 0 && strcmp(mntent.mnt_type, "bind") == 0) { -+ have_bind_proc = true; -+ } -+ -+ free(mntent.mnt_dir); -+ mntent.mnt_dir = NULL; -+ -+ if (have_bind_proc) -+ return true; -+ } -+ -+ return false; -+} -+ -+// returns true if /proc needs to be set up. -+static bool need_setup_proc(const struct lxc_conf *conf, struct lxc_list *mount) -+{ -+ __do_fclose FILE *f = NULL; -+ -+ f = make_anonymous_mount_file(mount, conf->lsm_aa_allow_nesting); -+ if (f == NULL) -+ return true; -+ -+ if (have_proc_bind_mount_entry(f)) { -+ return false; -+ } else { -+ return true; -+ } -+} -+ - #endif - - static int parse_cap(const char *cap) -@@ -4870,6 +4934,7 @@ int lxc_setup(struct lxc_handler *handler) - char *keyring_context = NULL; - #ifdef HAVE_ISULAD - bool setup_dev = true; -+ bool setup_proc = true; - #endif - - ret = lxc_setup_rootfs_prepare_root(lxc_conf, name, lxcpath); -@@ -4930,6 +4995,17 @@ int lxc_setup(struct lxc_handler *handler) - ret = lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler); - if (ret < 0) - return log_error(-1, "Failed to setup first automatic mounts"); -+ -+#ifdef HAVE_ISULAD -+ /* Now mount only cgroups, if wanted. Before, /sys could not have been -+ * mounted. It is guaranteed to be mounted now either through -+ * automatically or via fstab entries. -+ */ -+ ret = lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler); -+ if (ret < 0) -+ return log_error(-1, "Failed to setup remaining automatic mounts"); -+#endif -+ - #ifdef HAVE_ISULAD - ret = setup_mount(lxc_conf, &lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath, lxc_conf->lsm_se_mount_context); - #else -@@ -4950,6 +5026,7 @@ int lxc_setup(struct lxc_handler *handler) - return log_error(-1, "Failed to setup mount entries"); - #ifdef HAVE_ISULAD - setup_dev = need_setup_dev(lxc_conf, &lxc_conf->mount_list); -+ setup_proc = need_setup_proc(lxc_conf, &lxc_conf->mount_list); - #endif - } - -@@ -4975,6 +5052,7 @@ int lxc_setup(struct lxc_handler *handler) - } - } - -+#ifndef HAVE_ISULAD - /* Now mount only cgroups, if wanted. Before, /sys could not have been - * mounted. It is guaranteed to be mounted now either through - * automatically or via fstab entries. -@@ -4982,6 +5060,7 @@ int lxc_setup(struct lxc_handler *handler) - ret = lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler); - if (ret < 0) - return log_error(-1, "Failed to setup remaining automatic mounts"); -+#endif - - ret = run_lxc_hooks(name, "mount", lxc_conf, NULL); - if (ret < 0) -@@ -5026,9 +5105,17 @@ int lxc_setup(struct lxc_handler *handler) - if (ret < 0) - return log_error(-1, "Failed to setup \"/dev\" symlinks"); - -+#ifdef HAVE_ISULAD -+ if (setup_proc) { -+ ret = lxc_create_tmp_proc_mount(lxc_conf); -+ if (ret < 0) -+ return log_error(-1, "Failed to \"/proc\" LSMs"); -+ } -+#else - ret = lxc_create_tmp_proc_mount(lxc_conf); - if (ret < 0) - return log_error(-1, "Failed to \"/proc\" LSMs"); -+#endif - - #ifdef HAVE_ISULAD - /* Ask father to run oci prestart hooks and wait for him to finish. */ -diff --git a/src/lxc/path.c b/src/lxc/path.c -index 65b8aadbf..46256cb26 100644 ---- a/src/lxc/path.c -+++ b/src/lxc/path.c -@@ -652,4 +652,4 @@ char *path_relative(const char *basepath, const char *targpath) - } - - return safe_strdup(targ + t0); --} -+} -\ No newline at end of file --- -2.25.1 - diff --git a/0013-use-path-based-unix-domain-sockets-instead-of-abstra.patch b/0013-use-path-based-unix-domain-sockets-instead-of-abstra.patch deleted file mode 100644 index 71d2e29..0000000 --- a/0013-use-path-based-unix-domain-sockets-instead-of-abstra.patch +++ /dev/null @@ -1,401 +0,0 @@ -From f0af10aef5b21b6bf19dce0d2657f645355a42ac Mon Sep 17 00:00:00 2001 -From: wujing -Date: Fri, 4 Dec 2020 10:04:30 +0800 -Subject: [PATCH] use path based unix domain sockets instead of abstract - namespace sockets to improve container security - -Signed-off-by: wujing ---- - src/lxc/af_unix.c | 50 ++++++++++++++++++++++++++++++++++-- - src/lxc/af_unix.h | 4 ++- - src/lxc/attach.c | 4 +++ - src/lxc/commands.c | 39 ++++++++++++++++++++++++++++ - src/lxc/commands_utils.c | 51 +++++++++++++++++++++++++++++++++++++ - src/lxc/commands_utils.h | 6 +++++ - src/lxc/exec_commands.c | 55 ++++++++++++++++++++++++++++++++++++++++ - src/lxc/exec_commands.h | 4 +++ - src/lxc/lxccontainer.c | 18 +++++++++++++ - 9 files changed, 228 insertions(+), 3 deletions(-) - -diff --git a/src/lxc/af_unix.c b/src/lxc/af_unix.c -index 9f268be6..090465b4 100644 ---- a/src/lxc/af_unix.c -+++ b/src/lxc/af_unix.c -@@ -372,12 +372,58 @@ int lxc_unix_connect_type(struct sockaddr_un *addr, int type) - ret = connect(fd, (struct sockaddr *)addr, - offsetof(struct sockaddr_un, sun_path) + len); - if (ret < 0) -- return log_error_errno(-1, errno, -- "Failed to bind new AF_UNIX socket"); -+ return log_warn_errno(-1, errno, -+ "Failed to connect new AF_UNIX socket"); -+ -+ return move_fd(fd); -+} -+ -+#ifdef HAVE_ISULAD -+int lxc_named_unix_open(const char *path, int type, int flags) -+{ -+ __do_close int fd = -EBADF; -+ int ret; -+ ssize_t len; -+ struct sockaddr_un addr; -+ -+ fd = socket(PF_UNIX, type | SOCK_CLOEXEC, 0); -+ if (fd < 0) -+ return -1; -+ -+ if (!path) -+ return move_fd(fd); -+ -+ len = lxc_unix_sockaddr(&addr, path); -+ if (len < 0) -+ return -1; -+ -+ ret = bind(fd, (struct sockaddr *)&addr, len); -+ if (ret < 0) -+ return -1; -+ -+ if (chmod(path, 0600) < 0) -+ return -1; -+ -+ if (type == SOCK_STREAM) { -+ ret = listen(fd, 100); -+ if (ret < 0) -+ return -1; -+ } - - return move_fd(fd); - } - -+int lxc_named_unix_connect(const char *path) -+{ -+ struct sockaddr_un addr; -+ -+ if (lxc_unix_sockaddr(&addr, path) < 0) -+ return -1; -+ -+ return lxc_unix_connect_type(&addr, SOCK_STREAM); -+} -+#endif -+ - int lxc_unix_connect(struct sockaddr_un *addr, int type) - { - return lxc_unix_connect_type(addr, SOCK_STREAM); -diff --git a/src/lxc/af_unix.h b/src/lxc/af_unix.h -index 6943a61e..a511330a 100644 ---- a/src/lxc/af_unix.h -+++ b/src/lxc/af_unix.h -@@ -28,7 +28,9 @@ extern int lxc_unix_connect(struct sockaddr_un *addr); - extern int lxc_unix_connect_type(struct sockaddr_un *addr, int type); - extern int lxc_socket_set_timeout(int fd, int rcv_timeout, int snd_timeout); - #ifdef HAVE_ISULAD --int lxc_abstract_unix_recv_fds_timeout(int fd, int *recvfds, int num_recvfds, -+extern int lxc_abstract_unix_recv_fds_timeout(int fd, int *recvfds, int num_recvfds, - void *data, size_t size, unsigned int timeout); -+extern int lxc_named_unix_open(const char *path, int type, int flags); -+extern int lxc_named_unix_connect(const char *path); - #endif - #endif /* __LXC_AF_UNIX_H */ -diff --git a/src/lxc/attach.c b/src/lxc/attach.c -index 72b3055c..87e23c22 100644 ---- a/src/lxc/attach.c -+++ b/src/lxc/attach.c -@@ -1474,6 +1474,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - if (exec_command.maincmd_fd != -1) { - close(exec_command.maincmd_fd); - } -+ lxc_exec_unix_sock_delete(name, suffix); - } - #endif - free(cwd); -@@ -1491,6 +1492,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - if (exec_command.maincmd_fd != -1) { - close(exec_command.maincmd_fd); - } -+ lxc_exec_unix_sock_delete(name, suffix); - } - close(ipc_sockets[0]); - close(ipc_sockets[1]); -@@ -1517,6 +1519,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - if (exec_command.maincmd_fd != -1) { - close(exec_command.maincmd_fd); - } -+ lxc_exec_unix_sock_delete(name, suffix); - } - close(ipc_sockets[0]); - close(ipc_sockets[1]); -@@ -1789,6 +1792,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - if (exec_command.maincmd_fd != -1) { - close(exec_command.maincmd_fd); - } -+ lxc_exec_unix_sock_delete(name, suffix); - #endif - } - -diff --git a/src/lxc/commands.c b/src/lxc/commands.c -index 37354e87..70c56579 100644 ---- a/src/lxc/commands.c -+++ b/src/lxc/commands.c -@@ -1691,6 +1691,44 @@ static int lxc_cmd_accept(int fd, uint32_t events, void *data, - return ret; - } - -+#ifdef HAVE_ISULAD -+int lxc_cmd_init(const char *name, const char *lxcpath, const char *suffix) -+{ -+ __do_close int fd = -EBADF; -+ int ret; -+ char path[LXC_AUDS_ADDR_LEN] = {0}; -+ __do_free char *runtime_sock_dir = NULL; -+ -+ runtime_sock_dir = generate_named_unix_sock_dir(name); -+ if (runtime_sock_dir == NULL) -+ return -1; -+ -+ if (mkdir_p(runtime_sock_dir, 0600) < 0) -+ return log_error_errno(-1, errno, "Failed to create container runtime unix sock directory %s", path); -+ -+ if (generate_named_unix_sock_path(name, suffix, path, sizeof(path)) != 0) -+ return -1; -+ -+ fd = lxc_named_unix_open(path, SOCK_STREAM, 0); -+ if (fd < 0) { -+ if (errno == EADDRINUSE) { -+ WARN("Container \"%s\" appears to be already running", name); -+ (void)unlink(path); -+ -+ fd = lxc_named_unix_open(path, SOCK_STREAM, 0); -+ if (fd < 0) -+ return log_error_errno(-1, errno, "Failed to create command socket %s", path); -+ } else -+ return log_error_errno(-1, errno, "Failed to create command socket %s", path); -+ } -+ -+ ret = fcntl(fd, F_SETFD, FD_CLOEXEC); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to set FD_CLOEXEC on command socket file descriptor"); -+ -+ return log_trace(move_fd(fd), "Created unix socket \"%s\"", path); -+} -+#else - int lxc_cmd_init(const char *name, const char *lxcpath, const char *suffix) - { - __do_close int fd = -EBADF; -@@ -1715,6 +1753,7 @@ int lxc_cmd_init(const char *name, const char *lxcpath, const char *suffix) - - return log_trace(move_fd(fd), "Created abstract unix socket \"%s\"", &path[1]); - } -+#endif - - int lxc_cmd_mainloop_add(const char *name, struct lxc_epoll_descr *descr, - struct lxc_handler *handler) -diff --git a/src/lxc/commands_utils.c b/src/lxc/commands_utils.c -index 2f2670d7..7dfefa5c 100644 ---- a/src/lxc/commands_utils.c -+++ b/src/lxc/commands_utils.c -@@ -137,12 +137,63 @@ int lxc_make_abstract_socket_name(char *path, size_t pathlen, - return 0; - } - -+#ifdef HAVE_ISULAD -+char *generate_named_unix_sock_dir(const char *name) -+{ -+ __do_free char *exec_sock_dir = NULL; -+ -+ if (asprintf(&exec_sock_dir, "/var/run/lxc/%s", name) < 0) -+ return log_error_errno(NULL, errno, "Failed to allocate memory"); -+ -+ return move_ptr(exec_sock_dir); -+} -+ -+int generate_named_unix_sock_path(const char *container_name, const char *sock_name, -+ char *out_path, size_t len) -+{ -+#define MAX_SOCK_NAME_LENGTH 12 -+ int ret; -+ __do_free char *sock_dir = NULL; -+ __do_free char *short_sock_name = NULL; -+ -+ if (container_name == NULL || sock_name == NULL) -+ return -1; -+ -+ sock_dir = generate_named_unix_sock_dir(container_name); -+ if (sock_dir == NULL) -+ return -1; -+ -+ short_sock_name = strdup(sock_name); -+ if (strlen(short_sock_name) > MAX_SOCK_NAME_LENGTH) -+ short_sock_name[MAX_SOCK_NAME_LENGTH] = '\0'; -+ -+ ret = snprintf(out_path, len, "%s/%s.sock", sock_dir, short_sock_name); -+ if (ret < 0 || (size_t)ret >= len) -+ return log_error_errno(-1, errno, "Failed to allocate memory"); -+ -+ return 0; -+} -+#endif -+ - int lxc_cmd_connect(const char *name, const char *lxcpath, - const char *hashed_sock_name, const char *suffix) - { - int ret, client_fd; - char path[LXC_AUDS_ADDR_LEN] = {0}; - -+#ifdef HAVE_ISULAD -+ if (generate_named_unix_sock_path(name, suffix, path, sizeof(path)) != 0) -+ return -1; -+ -+ if (file_exists(path)) { -+ client_fd = lxc_named_unix_connect(path); -+ if (client_fd < 0) -+ return -1; -+ -+ return client_fd; -+ } -+#endif -+ - ret = lxc_make_abstract_socket_name(path, sizeof(path), name, lxcpath, - hashed_sock_name, suffix); - if (ret < 0) -diff --git a/src/lxc/commands_utils.h b/src/lxc/commands_utils.h -index 3ef7920c..c836ead8 100644 ---- a/src/lxc/commands_utils.h -+++ b/src/lxc/commands_utils.h -@@ -65,4 +65,10 @@ extern int lxc_add_state_client(int state_client_fd, - extern int lxc_cmd_connect(const char *name, const char *lxcpath, - const char *hashed_sock_name, const char *suffix); - -+#ifdef HAVE_ISULAD -+extern char *generate_named_unix_sock_dir(const char *name); -+extern int generate_named_unix_sock_path(const char *container_name, -+ const char *sock_name, char *out_path, size_t len); -+#endif -+ - #endif /* __LXC_COMMANDS_UTILS_H */ -diff --git a/src/lxc/exec_commands.c b/src/lxc/exec_commands.c -index 00129cb0..50246fa4 100644 ---- a/src/lxc/exec_commands.c -+++ b/src/lxc/exec_commands.c -@@ -371,7 +371,61 @@ out_close: - close(connection); - goto out; - } -+#ifdef HAVE_ISULAD -+int lxc_exec_unix_sock_delete(const char *name, const char *suffix) -+{ -+ char path[LXC_AUDS_ADDR_LEN] = {0}; -+ -+ if (name == NULL || suffix == NULL) -+ return -1; -+ -+ if (generate_named_unix_sock_path(name, suffix, path, sizeof(path)) != 0) -+ return -1; -+ -+ (void)unlink(path); -+ -+ return 0; -+} -+ -+int lxc_exec_cmd_init(const char *name, const char *lxcpath, const char *suffix) -+{ -+ __do_close int fd = -EBADF; -+ int ret; -+ char path[LXC_AUDS_ADDR_LEN] = {0}; -+ __do_free char *exec_sock_dir = NULL; - -+ exec_sock_dir = generate_named_unix_sock_dir(name); -+ if (exec_sock_dir == NULL) -+ return -1; -+ -+ if (mkdir_p(exec_sock_dir, 0600) < 0) -+ return log_error_errno(-1, errno, "Failed to create exec sock directory %s", path); -+ -+ if (generate_named_unix_sock_path(name, suffix, path, sizeof(path)) != 0) -+ return -1; -+ -+ TRACE("Creating unix socket \"%s\"", path); -+ -+ fd = lxc_named_unix_open(path, SOCK_STREAM, 0); -+ if (fd < 0) { -+ if (errno == EADDRINUSE) { -+ WARN("Container \"%s\" exec unix sock is occupied", name); -+ (void)unlink(path); -+ fd = lxc_named_unix_open(path, SOCK_STREAM, 0); -+ if (fd < 0) -+ return log_error_errno(-1, errno, "Failed to create command socket %s", path); -+ } else { -+ return log_error_errno(-1, errno, "Failed to create command socket %s", path); -+ } -+ } -+ -+ ret = fcntl(fd, F_SETFD, FD_CLOEXEC); -+ if (ret < 0) -+ return log_error_errno(-1, errno, "Failed to set FD_CLOEXEC on command socket file descriptor"); -+ -+ return log_trace(move_fd(fd), "Created unix socket \"%s\"", path); -+} -+#else - int lxc_exec_cmd_init(const char *name, const char *lxcpath, const char *suffix) - { - int fd, ret; -@@ -400,6 +454,7 @@ int lxc_exec_cmd_init(const char *name, const char *lxcpath, const char *suffix) - - return fd; - } -+#endif - - int lxc_exec_cmd_mainloop_add(struct lxc_epoll_descr *descr, struct lxc_exec_command_handler *handler) - { -diff --git a/src/lxc/exec_commands.h b/src/lxc/exec_commands.h -index 2581ee90..3ec2a226 100644 ---- a/src/lxc/exec_commands.h -+++ b/src/lxc/exec_commands.h -@@ -70,4 +70,8 @@ extern int lxc_exec_cmd_init(const char *name, const char *lxcpath, const char * - extern int lxc_exec_cmd_mainloop_add(struct lxc_epoll_descr *descr, struct lxc_exec_command_handler *handler); - extern int lxc_exec_cmd_set_terminal_winch(const char *name, const char *lxcpath, const char *suffix, unsigned int height, unsigned int width); - -+#ifdef HAVE_ISULAD -+extern int lxc_exec_unix_sock_delete(const char *name, const char *suffix); -+#endif -+ - #endif /* __exec_commands_h */ -diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c -index eef98df6..cbb67f32 100644 ---- a/src/lxc/lxccontainer.c -+++ b/src/lxc/lxccontainer.c -@@ -3189,6 +3189,21 @@ static int lxc_unlink_exec_wrapper(void *data) - return unlink(arg); - } - -+#ifdef HAVE_ISULAD -+static void container_sock_dir_delete(const char *name) -+{ -+ __do_free char *sock_dir = NULL; -+ -+ sock_dir = generate_named_unix_sock_dir(name); -+ if (sock_dir == NULL) { -+ ERROR("Failed to generate exec unix sock dir"); -+ return; -+ } -+ -+ (void)lxc_rmdir_onedev(sock_dir, NULL); -+} -+#endif -+ - static bool container_destroy(struct lxc_container *c, - struct lxc_storage *storage) - { -@@ -3342,6 +3357,9 @@ static bool container_destroy(struct lxc_container *c, - #endif - goto out; - } -+#ifdef HAVE_ISULAD -+ container_sock_dir_delete(c->name); -+#endif - INFO("Destroyed directory \"%s\" for \"%s\"", path, c->name); - - on_success: --- -2.25.1 - diff --git a/0014-api-add-get-container-metrics-api.patch b/0014-api-add-get-container-metrics-api.patch deleted file mode 100644 index 73b3e4c..0000000 --- a/0014-api-add-get-container-metrics-api.patch +++ /dev/null @@ -1,266 +0,0 @@ -From d1f9a992190921783337b71103d3525c3381bedf Mon Sep 17 00:00:00 2001 -From: lifeng68 -Date: Tue, 15 Dec 2020 17:30:01 +0800 -Subject: [PATCH 14/14] api: add get container metrics api - -Signed-off-by: lifeng68 ---- - src/lxc/lxccontainer.c | 174 +++++++++++++++++++++++++++++++++++++++++ - src/lxc/lxccontainer.h | 42 ++++++++++ - 2 files changed, 216 insertions(+) - -diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c -index cbb67f321..9202b73ff 100644 ---- a/src/lxc/lxccontainer.c -+++ b/src/lxc/lxccontainer.c -@@ -5752,6 +5752,179 @@ static bool do_lxcapi_set_start_timeout(struct lxc_container *c, unsigned int s - - WRAP_API_1(bool, lxcapi_set_start_timeout, unsigned int) - -+static uint64_t metrics_get_ull(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item) -+{ -+ char buf[80] = {0}; -+ int len = 0; -+ uint64_t val = 0; -+ -+ len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf), c->name, c->config_path); -+ if (len <= 0) { -+ DEBUG("unable to read cgroup item %s", item); -+ return 0; -+ } -+ -+ val = strtoull(buf, NULL, 0); -+ return val; -+} -+ -+static inline bool is_blk_metrics_read(const char *value) -+{ -+ return strcmp(value, "Read") == 0; -+} -+ -+static inline bool is_blk_metrics_write(const char *value) -+{ -+ return strcmp(value, "Write") == 0; -+} -+ -+static inline bool is_blk_metrics_total(const char *value) -+{ -+ return strcmp(value, "Total") == 0; -+} -+ -+static void metrics_get_blk_stats(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item, struct lxc_blkio_metrics *stats) -+{ -+#define BUFSIZE 4096 -+ char buf[BUFSIZE] = {0}; -+ int i = 0; -+ int len = 0; -+ char **lines = NULL; -+ char **cols = NULL; -+ -+ len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf), c->name, c->config_path); -+ if (len <= 0) { -+ DEBUG("unable to read cgroup item %s", item); -+ return; -+ } -+ -+ lines = lxc_string_split_and_trim(buf, '\n'); -+ if (lines == NULL) { -+ return; -+ } -+ -+ (void)memset(stats, 0, sizeof(struct lxc_blkio_metrics)); -+ -+ for (i = 0; lines[i]; i++) { -+ cols = lxc_string_split_and_trim(lines[i], ' '); -+ if (cols == NULL) { -+ goto err_out; -+ } -+ if (is_blk_metrics_read(cols[1])) { -+ stats->read += strtoull(cols[2], NULL, 0); -+ } else if (is_blk_metrics_write(cols[1])) { -+ stats->write += strtoull(cols[2], NULL, 0); -+ } -+ if (is_blk_metrics_total(cols[0])) { -+ stats->total = strtoull(cols[1], NULL, 0); -+ } -+ -+ lxc_free_array((void **)cols, free); -+ } -+err_out: -+ lxc_free_array((void **)lines, free); -+ return; -+} -+ -+static uint64_t metrics_match_get_ull(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item, const char *match, int column) -+{ -+#define BUFSIZE 4096 -+ char buf[BUFSIZE] = {0}; -+ int i = 0; -+ int j = 0; -+ int len = 0; -+ uint64_t val = 0; -+ char **lines = NULL; -+ char **cols = NULL; -+ size_t matchlen = 0; -+ -+ len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf), c->name, c->config_path); -+ if (len <= 0) { -+ DEBUG("unable to read cgroup item %s", item); -+ goto err_out; -+ } -+ -+ lines = lxc_string_split_and_trim(buf, '\n'); -+ if (lines == NULL) { -+ goto err_out; -+ } -+ -+ matchlen = strlen(match); -+ for (i = 0; lines[i]; i++) { -+ if (strncmp(lines[i], match, matchlen) != 0) { -+ continue; -+ } -+ -+ cols = lxc_string_split_and_trim(lines[i], ' '); -+ if (cols == NULL) { -+ goto err1; -+ } -+ for (j = 0; cols[j]; j++) { -+ if (j == column) { -+ val = strtoull(cols[j], NULL, 0); -+ break; -+ } -+ } -+ lxc_free_array((void **)cols, free); -+ break; -+ } -+err1: -+ lxc_free_array((void **)lines, free); -+err_out: -+ return val; -+} -+ -+/* isulad add get container metrics */ -+static bool do_lxcapi_get_container_metrics(struct lxc_container *c, struct lxc_container_metrics *metrics) -+{ -+ call_cleaner(cgroup_exit) struct cgroup_ops *cgroup_ops = NULL; -+ const char *state = NULL; -+ if (c == NULL || c->lxc_conf == NULL || metrics == NULL) { -+ return false; -+ } -+ -+ state = c->state(c); -+ metrics->state = state; -+ -+ if (!is_stopped(c)) { -+ metrics->init = c->init_pid(c); -+ } else { -+ metrics->init = -1; -+ } -+ -+ cgroup_ops = cgroup_init(c->lxc_conf); -+ if (cgroup_ops == NULL) { -+ return false; -+ } -+ -+ metrics->cpu_use_nanos = metrics_get_ull(c, cgroup_ops, "cpuacct.usage"); -+ metrics->pids_current = metrics_get_ull(c, cgroup_ops, "pids.current"); -+ -+ metrics->cpu_use_user = metrics_match_get_ull(c, cgroup_ops, "cpuacct.stat", "user", 1); -+ metrics->cpu_use_sys = metrics_match_get_ull(c, cgroup_ops, "cpuacct.stat", "system", 1); -+ -+ // Try to read CFQ stats available on all CFQ enabled kernels first -+ metrics_get_blk_stats(c, cgroup_ops, "blkio.io_serviced_recursive", &metrics->io_serviced); -+ if (metrics->io_serviced.read == 0 && metrics->io_serviced.write == 0 && metrics->io_serviced.total == 0) { -+ metrics_get_blk_stats(c, cgroup_ops, "blkio.throttle.io_service_bytes", &metrics->io_service_bytes); -+ metrics_get_blk_stats(c, cgroup_ops, "blkio.throttle.io_serviced", &metrics->io_serviced); -+ } else { -+ metrics_get_blk_stats(c, cgroup_ops, "blkio.io_service_bytes_recursive", &metrics->io_service_bytes); -+ } -+ -+ metrics->mem_used = metrics_get_ull(c, cgroup_ops, "memory.usage_in_bytes"); -+ metrics->mem_limit = metrics_get_ull(c, cgroup_ops, "memory.limit_in_bytes"); -+ metrics->kmem_used = metrics_get_ull(c, cgroup_ops, "memory.kmem.usage_in_bytes"); -+ metrics->kmem_limit = metrics_get_ull(c, cgroup_ops, "memory.kmem.limit_in_bytes"); -+ -+ metrics->cache = metrics_match_get_ull(c, cgroup_ops, "memory.stat", "cache", 1); -+ metrics->cache_total = metrics_match_get_ull(c, cgroup_ops, "memory.stat", "total_cache", 1); -+ -+ return true; -+} -+ -+WRAP_API_1(bool, lxcapi_get_container_metrics, struct lxc_container_metrics *) -+ - #endif - - #ifdef HAVE_ISULAD -@@ -5924,6 +6097,7 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath - c->clean_container_resource = lxcapi_clean_container_resource; - c->get_container_pids = lxcapi_get_container_pids; - c->set_start_timeout = lxcapi_set_start_timeout; -+ c->get_container_metrics = lxcapi_get_container_metrics; - #endif - return c; - -diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h -index 2951ac7b4..e30bf6161 100644 ---- a/src/lxc/lxccontainer.h -+++ b/src/lxc/lxccontainer.h -@@ -40,6 +40,37 @@ struct lxc_mount { - int version; - }; - -+struct lxc_blkio_metrics { -+ uint64_t read; -+ uint64_t write; -+ uint64_t total; -+}; -+ -+struct lxc_container_metrics { -+ /* State of container */ -+ const char *state; -+ /* The process ID of the init container */ -+ pid_t init; -+ /* Current pids */ -+ uint64_t pids_current; -+ /* CPU usage */ -+ uint64_t cpu_use_nanos; -+ uint64_t cpu_use_user; -+ uint64_t cpu_use_sys; -+ /* BlkIO usage */ -+ struct lxc_blkio_metrics io_service_bytes; -+ struct lxc_blkio_metrics io_serviced; -+ /* Memory usage */ -+ uint64_t mem_used; -+ uint64_t mem_limit; -+ /* Kernel Memory usage */ -+ uint64_t kmem_used; -+ uint64_t kmem_limit; -+ /* Cache usage */ -+ uint64_t cache; -+ uint64_t cache_total; -+}; -+ - /*! - * An LXC container. - * -@@ -976,6 +1007,17 @@ struct lxc_container { - * \return \c true on success, else \c false. - */ - bool (*set_start_timeout)(struct lxc_container *c, unsigned int start_timeout); -+ -+ /*! isulad add -+ * \brief An API call to set start timeout -+ * -+ * \param c Container. -+ * \param start_timeout Value of start timeout. -+ * -+ * \return \c true on success, else \c false. -+ */ -+ bool (*get_container_metrics)(struct lxc_container *c, struct lxc_container_metrics *metrics); -+ - }; - - /*! --- -2.25.1 - diff --git a/0015-Streaming-IO-solution-optimization-and-enhancement.patch b/0015-Streaming-IO-solution-optimization-and-enhancement.patch deleted file mode 100644 index 6ecef27..0000000 --- a/0015-Streaming-IO-solution-optimization-and-enhancement.patch +++ /dev/null @@ -1,92 +0,0 @@ -From d19376d8735651b23394cdeb560cbebe374c8bb9 Mon Sep 17 00:00:00 2001 -From: wujing -Date: Wed, 21 Oct 2020 15:34:50 +0800 -Subject: [PATCH 2/3] Streaming IO solution optimization and enhancement - -Signed-off-by: wujing ---- - src/lxc/file_utils.c | 27 +++++++++++++++++++++++++++ - src/lxc/file_utils.h | 4 ++++ - src/lxc/terminal.c | 14 ++++++++++---- - 3 files changed, 41 insertions(+), 4 deletions(-) - -diff --git a/src/lxc/file_utils.c b/src/lxc/file_utils.c -index 1689cbaa..2dbbbd3b 100644 ---- a/src/lxc/file_utils.c -+++ b/src/lxc/file_utils.c -@@ -122,6 +122,33 @@ int lxc_read_from_file(const char *filename, void *buf, size_t count) - return ret; - } - -+#ifdef HAVE_ISULAD -+ssize_t lxc_write_nointr_for_fifo(int fd, const void *buf, size_t count) -+{ -+ ssize_t nret = 0; -+ ssize_t nwritten; -+ -+ if (buf == NULL) { -+ return -1; -+ } -+ -+ for (nwritten = 0; nwritten < count;) { -+ nret = write(fd, buf + nwritten, count - nwritten); -+ if (nret < 0) { -+ if (errno == EINTR || errno == EAGAIN) { -+ continue; -+ } else { -+ return nret; -+ } -+ } else { -+ nwritten += nret; -+ } -+ } -+ -+ return nwritten; -+} -+#endif -+ - ssize_t lxc_write_nointr(int fd, const void *buf, size_t count) - { - ssize_t ret; -diff --git a/src/lxc/file_utils.h b/src/lxc/file_utils.h -index 6d5dbf68..29162b3f 100644 ---- a/src/lxc/file_utils.h -+++ b/src/lxc/file_utils.h -@@ -58,4 +58,8 @@ extern FILE *fdopen_cached(int fd, const char *mode, void **caller_freed_buffer) - extern FILE *fopen_cached(const char *path, const char *mode, - void **caller_freed_buffer); - -+#ifdef HAVE_ISULAD -+extern ssize_t lxc_write_nointr_for_fifo(int fd, const void *buf, size_t count); -+#endif -+ - #endif /* __LXC_FILE_UTILS_H */ -diff --git a/src/lxc/terminal.c b/src/lxc/terminal.c -index 7441de79..a4c6ad0c 100644 ---- a/src/lxc/terminal.c -+++ b/src/lxc/terminal.c -@@ -683,11 +683,17 @@ static void lxc_forward_data_to_fifo(struct lxc_list *list, bool is_err, const c - lxc_list_for_each_safe(it, list, next) { - elem = it->elem; - if (is_err) { -- if (elem->err_fd >= 0) -- lxc_write_nointr(elem->err_fd, buf, r); -+ if (elem->err_fd >= 0) { -+ if (lxc_write_nointr_for_fifo(elem->err_fd, buf, r) < 0) { -+ ERROR("Failed to write to fifo fd %d with error: %s", elem->err_fd, strerror(errno)); -+ } -+ } - } else { -- if (elem->out_fd >= 0) -- lxc_write_nointr(elem->out_fd, buf, r); -+ if (elem->out_fd >= 0) { -+ if (lxc_write_nointr_for_fifo(elem->out_fd, buf, r) < 0) { -+ ERROR("Failed to write to fifo fd %d with error: %s", elem->out_fd, strerror(errno)); -+ } -+ } - } - } - --- -2.25.1 - diff --git a/0016-avoid-using-void-pointers-in-caclulation.patch b/0016-avoid-using-void-pointers-in-caclulation.patch deleted file mode 100644 index 2b07828..0000000 --- a/0016-avoid-using-void-pointers-in-caclulation.patch +++ /dev/null @@ -1,40 +0,0 @@ -From 1912d374c6fbabc9ac549011cd863c28ee1a55fa Mon Sep 17 00:00:00 2001 -From: wujing -Date: Thu, 24 Dec 2020 11:23:01 +0800 -Subject: [PATCH 3/3] avoid using void pointers in caclulation - -Signed-off-by: wujing ---- - src/lxc/file_utils.c | 2 +- - src/lxc/file_utils.h | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/lxc/file_utils.c b/src/lxc/file_utils.c -index 2dbbbd3b..681207b2 100644 ---- a/src/lxc/file_utils.c -+++ b/src/lxc/file_utils.c -@@ -123,7 +123,7 @@ int lxc_read_from_file(const char *filename, void *buf, size_t count) - } - - #ifdef HAVE_ISULAD --ssize_t lxc_write_nointr_for_fifo(int fd, const void *buf, size_t count) -+ssize_t lxc_write_nointr_for_fifo(int fd, const char *buf, size_t count) - { - ssize_t nret = 0; - ssize_t nwritten; -diff --git a/src/lxc/file_utils.h b/src/lxc/file_utils.h -index 29162b3f..cb959bfb 100644 ---- a/src/lxc/file_utils.h -+++ b/src/lxc/file_utils.h -@@ -59,7 +59,7 @@ extern FILE *fopen_cached(const char *path, const char *mode, - void **caller_freed_buffer); - - #ifdef HAVE_ISULAD --extern ssize_t lxc_write_nointr_for_fifo(int fd, const void *buf, size_t count); -+extern ssize_t lxc_write_nointr_for_fifo(int fd, const char *buf, size_t count); - #endif - - #endif /* __LXC_FILE_UTILS_H */ --- -2.25.1 - diff --git a/0017-fix-compilation-errors-without-libcap.patch b/0017-fix-compilation-errors-without-libcap.patch deleted file mode 100644 index fb5a0f8..0000000 --- a/0017-fix-compilation-errors-without-libcap.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 5a3bec3f80d59dfcc76e16cbab957f4072601816 Mon Sep 17 00:00:00 2001 -From: wujing -Date: Tue, 5 Jan 2021 16:53:40 +0800 -Subject: [PATCH] fix compilation errors without libcap - -Signed-off-by: wujing ---- - src/lxc/conf.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index c3610ae3..19e193dd 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -5301,11 +5301,12 @@ int lxc_drop_caps(struct lxc_conf *conf) - goto out; - } - --#endif -- - out: - free(caplist); - return ret; -+#else -+ return 0; -+#endif - } - #endif - --- -2.27.0 - diff --git a/0018-IO-fix-io-data-miss-when-exec-with-pipes.patch b/0018-IO-fix-io-data-miss-when-exec-with-pipes.patch deleted file mode 100644 index 73040ce..0000000 --- a/0018-IO-fix-io-data-miss-when-exec-with-pipes.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 9502363455188344dcfd7d1202cd48b7b554a5de Mon Sep 17 00:00:00 2001 -From: Li Feng -Date: Wed, 20 Jan 2021 14:22:33 +0800 -Subject: [PATCH 18/18] IO: fix io data miss when exec with pipes - -Signed-off-by: Li Feng ---- - src/lxc/attach.c | 4 ++-- - src/lxc/mainloop.c | 14 ++++++++++++++ - src/lxc/mainloop.h | 2 ++ - src/lxc/start.c | 4 ++-- - src/lxc/terminal.c | 12 ++++++++---- - 5 files changed, 28 insertions(+), 8 deletions(-) - -diff --git a/src/lxc/attach.c b/src/lxc/attach.c -index 87e23c229..c5fc56150 100644 ---- a/src/lxc/attach.c -+++ b/src/lxc/attach.c -@@ -1754,7 +1754,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - } - #endif - if (options->attach_flags & LXC_ATTACH_TERMINAL) { -- ret = lxc_mainloop(&descr, -1); -+ ret = isulad_safe_mainloop(&descr, -1); - if (ret < 0) { - ret_parent = -1; - to_cleanup_pid = attached_pid; -@@ -1763,7 +1763,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function, - - #ifdef HAVE_ISULAD - // do lxc_mainloop to make sure we do not lose any output -- (void)lxc_mainloop(&isulad_descr, 100); -+ (void)isulad_safe_mainloop(&isulad_descr, 100); - if (g_attach_timeout_state == ATTACH_TIMEOUT && err_msg != NULL && *err_msg == NULL) { - *err_msg = safe_strdup("Attach exceeded timeout"); - } -diff --git a/src/lxc/mainloop.c b/src/lxc/mainloop.c -index 6d4c5935a..35186f4b5 100644 ---- a/src/lxc/mainloop.c -+++ b/src/lxc/mainloop.c -@@ -141,3 +141,17 @@ void lxc_mainloop_close(struct lxc_epoll_descr *descr) - - close_prot_errno_disarm(descr->epfd); - } -+ -+int isulad_safe_mainloop(struct lxc_epoll_descr *descr, int timeout_ms) -+{ -+ int ret; -+ -+ ret = lxc_mainloop(descr, timeout_ms); -+ -+ // There are stdout and stderr channels, and two epolls should be performed to prevent -+ // one of the channels from exiting first, causing the other channel to not receive data, -+ // resulting in data loss -+ (void)lxc_mainloop(descr, 100); -+ -+ return ret; -+} -\ No newline at end of file -diff --git a/src/lxc/mainloop.h b/src/lxc/mainloop.h -index 8afac60d3..dad79188c 100644 ---- a/src/lxc/mainloop.h -+++ b/src/lxc/mainloop.h -@@ -34,4 +34,6 @@ extern void lxc_mainloop_close(struct lxc_epoll_descr *descr); - - define_cleanup_function(struct lxc_epoll_descr *, lxc_mainloop_close); - -+extern int isulad_safe_mainloop(struct lxc_epoll_descr *descr, int timeout_ms); -+ - #endif -diff --git a/src/lxc/start.c b/src/lxc/start.c -index 50a1a8203..e6e217042 100644 ---- a/src/lxc/start.c -+++ b/src/lxc/start.c -@@ -590,13 +590,13 @@ int lxc_poll(const char *name, struct lxc_handler *handler) - } - #endif - -- ret = lxc_mainloop(&descr, -1); -+ ret = isulad_safe_mainloop(&descr, -1); - close_prot_errno_disarm(descr.epfd); - if (ret < 0 || !handler->init_died) - goto out_mainloop_console; - - if (has_console) -- ret = lxc_mainloop(&descr_console, 100); -+ ret = isulad_safe_mainloop(&descr_console, 100); - - out_mainloop_console: - if (has_console) { -diff --git a/src/lxc/terminal.c b/src/lxc/terminal.c -index a4c6ad0c8..1e467f5a6 100644 ---- a/src/lxc/terminal.c -+++ b/src/lxc/terminal.c -@@ -679,19 +679,22 @@ static void lxc_forward_data_to_fifo(struct lxc_list *list, bool is_err, const c - struct lxc_list *it = NULL; - struct lxc_list *next = NULL; - struct lxc_fifos_fd *elem = NULL; -+ ssize_t w = 0; - - lxc_list_for_each_safe(it, list, next) { - elem = it->elem; - if (is_err) { - if (elem->err_fd >= 0) { -- if (lxc_write_nointr_for_fifo(elem->err_fd, buf, r) < 0) { -- ERROR("Failed to write to fifo fd %d with error: %s", elem->err_fd, strerror(errno)); -+ w = lxc_write_nointr_for_fifo(elem->err_fd, buf, r); -+ if (w != r) { -+ WARN("Failed to write to fifo fd %d with error: %s", elem->err_fd, strerror(errno)); - } - } - } else { - if (elem->out_fd >= 0) { -- if (lxc_write_nointr_for_fifo(elem->out_fd, buf, r) < 0) { -- ERROR("Failed to write to fifo fd %d with error: %s", elem->out_fd, strerror(errno)); -+ w = lxc_write_nointr_for_fifo(elem->out_fd, buf, r); -+ if (w != r) { -+ WARN("Failed to write to fifo fd %d with error: %s", elem->out_fd, strerror(errno)); - } - } - } -@@ -1673,6 +1676,7 @@ int lxc_terminal_create(struct lxc_terminal *terminal) - ERROR("Failed to create stdin pipe"); - goto err; - } -+ - /* for stdout */ - if (pipe2(terminal->pipes[1], O_CLOEXEC)) { - ERROR("Failed to create stdout pipe"); --- -2.25.1 - diff --git a/0019-metrics-add-total_inactive_file-metric-for-memory.patch b/0019-metrics-add-total_inactive_file-metric-for-memory.patch deleted file mode 100644 index 737afcc..0000000 --- a/0019-metrics-add-total_inactive_file-metric-for-memory.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 15da6e1f057c70eee476730138788fd73de1b208 Mon Sep 17 00:00:00 2001 -From: Li Feng -Date: Thu, 28 Jan 2021 16:05:18 +0800 -Subject: [PATCH 19/19] metrics: add total_inactive_file metric for memory - -Signed-off-by: Li Feng ---- - src/lxc/lxccontainer.c | 1 + - src/lxc/lxccontainer.h | 2 ++ - 2 files changed, 3 insertions(+) - -diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c -index 9202b73ff..06552ce5c 100644 ---- a/src/lxc/lxccontainer.c -+++ b/src/lxc/lxccontainer.c -@@ -5919,6 +5919,7 @@ static bool do_lxcapi_get_container_metrics(struct lxc_container *c, struct lxc - - metrics->cache = metrics_match_get_ull(c, cgroup_ops, "memory.stat", "cache", 1); - metrics->cache_total = metrics_match_get_ull(c, cgroup_ops, "memory.stat", "total_cache", 1); -+ metrics->inactive_file_total = metrics_match_get_ull(c, cgroup_ops, "memory.stat", "total_inactive_file", 1); - - return true; - } -diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h -index e30bf6161..9abbd09ed 100644 ---- a/src/lxc/lxccontainer.h -+++ b/src/lxc/lxccontainer.h -@@ -69,6 +69,8 @@ struct lxc_container_metrics { - /* Cache usage */ - uint64_t cache; - uint64_t cache_total; -+ /* total inactive file */ -+ uint64_t inactive_file_total; - }; - - /*! --- -2.25.1 - diff --git a/0020-support-cgroup-v2.patch b/0020-support-cgroup-v2.patch deleted file mode 100644 index 8b10dd7..0000000 --- a/0020-support-cgroup-v2.patch +++ /dev/null @@ -1,582 +0,0 @@ -From 0eac597a6f853c4eb41d7ebe58398c117798542c Mon Sep 17 00:00:00 2001 -From: WangFengTu -Date: Fri, 29 Jan 2021 16:55:13 +0800 -Subject: [PATCH] support cgroup v2 - -Signed-off-by: WangFengTu ---- - src/lxc/cgroups/cgroup2_devices.c | 126 +++++++++++++++++++---------- - src/lxc/cgroups/isulad_cgfsng.c | 56 ++++++++++++- - src/lxc/lxccontainer.c | 129 ++++++++++++++++++++++++++++++ - 3 files changed, 269 insertions(+), 42 deletions(-) - -diff --git a/src/lxc/cgroups/cgroup2_devices.c b/src/lxc/cgroups/cgroup2_devices.c -index 4efb28fb..05613c51 100644 ---- a/src/lxc/cgroups/cgroup2_devices.c -+++ b/src/lxc/cgroups/cgroup2_devices.c -@@ -25,6 +25,19 @@ - #include - #include - -+#define BPF_LOG_BUF_SIZE (1 << 23) /* 8MB */ -+#ifndef BPF_LOG_LEVEL1 -+#define BPF_LOG_LEVEL1 1 -+#endif -+ -+#ifndef BPF_LOG_LEVEL2 -+#define BPF_LOG_LEVEL2 2 -+#endif -+ -+#ifndef BPF_LOG_LEVEL -+#define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2) -+#endif -+ - lxc_log_define(cgroup2_devices, cgroup); - - static int bpf_program_add_instructions(struct bpf_program *prog, -@@ -42,6 +55,8 @@ static int bpf_program_add_instructions(struct bpf_program *prog, - return log_error_errno(-1, ENOMEM, "Failed to reallocate bpf cgroup program"); - - prog->instructions = new_insn; -+ memset(prog->instructions + prog->n_instructions, 0, -+ sizeof(struct bpf_insn) * count); - memcpy(prog->instructions + prog->n_instructions, instructions, - sizeof(struct bpf_insn) * count); - prog->n_instructions += count; -@@ -118,29 +133,27 @@ void bpf_program_free(struct bpf_program *prog) - .off = 0, \ - .imm = 0}) - --static int bpf_access_mask(const char *acc) -+static int bpf_access_mask(const char *acc, __u32 *mask) - { -- int mask = 0; -- - if (!acc) -- return mask; -+ return 0; - - for (; *acc; acc++) - switch (*acc) { - case 'r': -- mask |= BPF_DEVCG_ACC_READ; -+ *mask |= BPF_DEVCG_ACC_READ; - break; - case 'w': -- mask |= BPF_DEVCG_ACC_WRITE; -+ *mask |= BPF_DEVCG_ACC_WRITE; - break; - case 'm': -- mask |= BPF_DEVCG_ACC_MKNOD; -+ *mask |= BPF_DEVCG_ACC_MKNOD; - break; - default: - return -EINVAL; - } - -- return mask; -+ return 0; - } - - static int bpf_device_type(char type) -@@ -157,19 +170,18 @@ static int bpf_device_type(char type) - return -1; - } - --static inline bool bpf_device_all_access(int access_mask) -+static inline bool bpf_device_all_access(__u32 access_mask) - { -- return (access_mask == (BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | -- BPF_DEVCG_ACC_MKNOD)); -+ return access_mask == (BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD); - } - - struct bpf_program *bpf_program_new(uint32_t prog_type) - { - __do_free struct bpf_program *prog = NULL; - -- prog = calloc(1, sizeof(struct bpf_program)); -+ prog = zalloc(sizeof(struct bpf_program)); - if (!prog) -- return NULL; -+ return ret_set_errno(NULL, ENOMEM); - - prog->prog_type = prog_type; - prog->kernel_fd = -EBADF; -@@ -209,12 +221,10 @@ int bpf_program_append_device(struct bpf_program *prog, struct device_item *devi - { - int ret; - int jump_nr = 1; -- struct bpf_insn bpf_access_decision[] = { -- BPF_MOV64_IMM(BPF_REG_0, device->allow), -- BPF_EXIT_INSN(), -- }; -- int access_mask; -+ __u32 access_mask = 0; - int device_type; -+ struct bpf_insn bpf_access_decision[2]; -+ bool add_exist = false; - - if (!prog || !device) - return ret_set_errno(-1, EINVAL); -@@ -225,6 +235,13 @@ int bpf_program_append_device(struct bpf_program *prog, struct device_item *devi - return 0; - } - -+ ret = bpf_access_mask(device->access, &access_mask); -+ if (ret < 0) -+ return log_error_errno(ret, -ret, "Invalid access mask specified %s", device->access); -+ -+ if (!bpf_device_all_access(access_mask)) -+ jump_nr += 3; -+ - device_type = bpf_device_type(device->type); - if (device_type < 0) - return log_error_errno(-1, EINVAL, "Invalid bpf cgroup device type %c", device->type); -@@ -232,63 +249,67 @@ int bpf_program_append_device(struct bpf_program *prog, struct device_item *devi - if (device_type > 0) - jump_nr++; - -- access_mask = bpf_access_mask(device->access); -- if (!bpf_device_all_access(access_mask)) -- jump_nr += 3; -- -- if (device->major != -1) -+ if (device->major >= 0) - jump_nr++; - -- if (device->minor != -1) -+ if (device->minor >= 0) - jump_nr++; - - if (device_type > 0) { - struct bpf_insn ins[] = { -- BPF_JMP_IMM(BPF_JNE, BPF_REG_2, device_type, jump_nr--), -+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, device_type, jump_nr--), - }; - - ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); - if (ret) - return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); -+ add_exist = true; - } - - if (!bpf_device_all_access(access_mask)) { - struct bpf_insn ins[] = { -- BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), -- BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access_mask), -- BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, jump_nr), -+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_3), -+ BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access_mask), -+ BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, jump_nr-2), - }; - - jump_nr -= 3; - ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); - if (ret) - return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); -+ add_exist = true; - } - - if (device->major >= 0) { - struct bpf_insn ins[] = { -- BPF_JMP_IMM(BPF_JNE, BPF_REG_4, device->major, jump_nr--), -+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, device->major, jump_nr--), - }; - - ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); - if (ret) - return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); -+ add_exist = true; - } - - if (device->minor >= 0) { - struct bpf_insn ins[] = { -- BPF_JMP_IMM(BPF_JNE, BPF_REG_5, device->minor, jump_nr--), -+ BPF_JMP_IMM(BPF_JNE, BPF_REG_5, device->minor, jump_nr--), - }; - - ret = bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); - if (ret) - return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); -+ add_exist = true; - } - -- ret = bpf_program_add_instructions(prog, bpf_access_decision, -- ARRAY_SIZE(bpf_access_decision)); -- if (ret) -- return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); -+ if (add_exist) { -+ bpf_access_decision[0] = BPF_MOV64_IMM(BPF_REG_0, device->allow); -+ bpf_access_decision[1] = BPF_EXIT_INSN(); -+ ret = bpf_program_add_instructions(prog, bpf_access_decision, -+ ARRAY_SIZE(bpf_access_decision)); -+ if (ret) -+ return log_error_errno(-1, errno, "Failed to add instructions to bpf cgroup program"); -+ } - - return 0; - } -@@ -310,30 +331,49 @@ int bpf_program_finalize(struct bpf_program *prog) - return bpf_program_add_instructions(prog, ins, ARRAY_SIZE(ins)); - } - --static int bpf_program_load_kernel(struct bpf_program *prog, char *log_buf, -- size_t log_size) -+static int bpf_program_load_kernel(struct bpf_program *prog) - { -+ __do_free char *log_buf = NULL; -+ __u32 log_level = 0; -+ __u32 log_size = 0; - union bpf_attr attr; -+ struct rlimit limit = { -+ .rlim_cur = RLIM_INFINITY, -+ .rlim_max = RLIM_INFINITY, -+ }; - - if (prog->kernel_fd >= 0) { -- memset(log_buf, 0, log_size); - return 0; - } - -+ if (lxc_log_get_level() <= LXC_LOG_LEVEL_DEBUG) { -+ log_buf = zalloc(BPF_LOG_BUF_SIZE); -+ if (!log_buf) { -+ WARN("Failed to allocate bpf log buffer"); -+ } else { -+ log_level = BPF_LOG_LEVEL; -+ log_size = BPF_LOG_BUF_SIZE; -+ } -+ } -+ -+ if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0) -+ return log_error_errno(-1, errno, "Failed to set rlimit memlock to unlimited"); -+ - attr = (union bpf_attr){ - .prog_type = prog->prog_type, - .insns = PTR_TO_UINT64(prog->instructions), - .insn_cnt = prog->n_instructions, - .license = PTR_TO_UINT64("GPL"), - .log_buf = PTR_TO_UINT64(log_buf), -- .log_level = !!log_buf, -+ .log_level = log_level, - .log_size = log_size, - }; - - prog->kernel_fd = bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); - if (prog->kernel_fd < 0) -- return log_error_errno(-1, errno, "Failed to load bpf program"); -+ return log_error_errno(-1, errno, "Failed to load bpf program: %s", log_buf); - -+ TRACE("Loaded bpf program: %s", log_buf ?: "(null)"); - return 0; - } - -@@ -362,7 +402,7 @@ int bpf_program_cgroup_attach(struct bpf_program *prog, int type, - return true; - } - -- ret = bpf_program_load_kernel(prog, NULL, 0); -+ ret = bpf_program_load_kernel(prog); - if (ret < 0) - return log_error_errno(-1, ret, "Failed to load bpf program"); - -@@ -518,11 +558,15 @@ bool bpf_devices_cgroup_supported(void) - if (prog < 0) - return log_trace(false, "Failed to allocate new bpf device cgroup program"); - -+ ret = bpf_program_init(prog); -+ if (ret) -+ return log_error_errno(false, ENOMEM, "Failed to initialize bpf program"); -+ - ret = bpf_program_add_instructions(prog, dummy, ARRAY_SIZE(dummy)); - if (ret < 0) - return log_trace(false, "Failed to add new instructions to bpf device cgroup program"); - -- ret = bpf_program_load_kernel(prog, NULL, 0); -+ ret = bpf_program_load_kernel(prog); - if (ret < 0) - return log_trace(false, "Failed to load new bpf device cgroup program"); - -diff --git a/src/lxc/cgroups/isulad_cgfsng.c b/src/lxc/cgroups/isulad_cgfsng.c -index e16f8a19..c80527d5 100644 ---- a/src/lxc/cgroups/isulad_cgfsng.c -+++ b/src/lxc/cgroups/isulad_cgfsng.c -@@ -823,6 +823,9 @@ static bool isulad_cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char * - char *cgpath, *slash; - bool sub_mk_success = false; - -+ if (is_unified_hierarchy(h)) -+ return true; -+ - if (!string_in_list(h->controllers, "cpuset")) - return true; - -@@ -1288,6 +1291,20 @@ __cgfsng_ops static bool isulad_cgfsng_mount(struct cgroup_ops *ops, - ERROR("Failed to create directory: %s", tmpfspath); - goto on_error; - } -+ -+ if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) { -+ if (has_cgns && wants_force_mount) { -+ /* -+ * If cgroup namespaces are supported but the container -+ * will not have CAP_SYS_ADMIN after it has started we -+ * need to mount the cgroups manually. -+ */ -+ return cg_mount_in_cgroup_namespace(type, ops->unified, tmpfspath) == 0; -+ } -+ -+ return cg_mount_cgroup_full(type, ops->unified, tmpfspath) == 0; -+ } -+ - ret = safe_mount(NULL, tmpfspath, "tmpfs", - MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME, - "size=10240k,mode=755", root, handler->conf->lsm_se_mount_context); -@@ -2196,8 +2213,16 @@ __cgfsng_ops static int isulad_cgfsng_set(struct cgroup_ops *ops, - h = get_hierarchy(ops, controller); - if (h) { - char *fullpath; -- - fullpath = build_full_cgpath_from_monitorpath(h, path, filename); -+ -+ if (strcmp(filename, "io.weight") == 0 || strcmp(filename, "io.bfq.weight") == 0) { -+ if (!file_exists(fullpath)) { -+ free(path); -+ free(fullpath); -+ return 0; -+ } -+ } -+ - ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666); - free(fullpath); - } -@@ -2428,6 +2453,9 @@ __cgfsng_ops static bool isulad_cgfsng_setup_limits_legacy(struct cgroup_ops *op - if (!ops->hierarchies) - return ret_set_errno(false, EINVAL); - -+ if (pure_unified_layout(ops)) -+ return true; -+ - sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings); - if (!sorted_cgroup_settings) - return false; -@@ -2528,6 +2556,7 @@ static int bpf_device_cgroup_prepare(struct cgroup_ops *ops, - __cgfsng_ops static bool isulad_cgfsng_setup_limits(struct cgroup_ops *ops, - struct lxc_handler *handler) - { -+ __do_free char *path = NULL; - struct lxc_list *cgroup_settings, *iterator; - struct hierarchy *h; - struct lxc_conf *conf; -@@ -2549,6 +2578,9 @@ __cgfsng_ops static bool isulad_cgfsng_setup_limits(struct cgroup_ops *ops, - return true; - cgroup_settings = &conf->cgroup2; - -+ if (!pure_unified_layout(ops)) -+ return true; -+ - if (!ops->unified) - return false; - h = ops->unified; -@@ -2560,7 +2592,29 @@ __cgfsng_ops static bool isulad_cgfsng_setup_limits(struct cgroup_ops *ops, - if (strncmp("devices", cg->subsystem, 7) == 0) { - ret = bpf_device_cgroup_prepare(ops, conf, cg->subsystem, - cg->value); -+ } else if (strcmp(cg->subsystem, "files.limit") == 0) { -+ long long int setvalue = 0; -+ const char *cgvalue = cg->value; -+ -+ if (lxc_safe_long_long(cgvalue, &setvalue) != 0) -+ return log_error(false, "Invalid integer value %s", cgvalue); -+ -+ if (setvalue <= 0) -+ cgvalue = "max"; -+ -+ ret = lxc_write_openat(h->container_full_path, -+ cg->subsystem, cgvalue, -+ strlen(cgvalue)); -+ if (ret < 0) -+ return log_error_errno(false, errno, "Failed to set \"%s\" to \"%s\"", -+ cg->subsystem, cgvalue); - } else { -+ if (strcmp(cg->subsystem, "io.weight") == 0 || strcmp(cg->subsystem, "io.bfq.weight") == 0) { -+ path = must_make_path(h->container_full_path, cg->subsystem, NULL); -+ if (!file_exists(path)) { -+ continue; -+ } -+ } - ret = lxc_write_openat(h->container_full_path, - cg->subsystem, cg->value, - strlen(cg->value)); -diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c -index 06552ce5..5769b251 100644 ---- a/src/lxc/lxccontainer.c -+++ b/src/lxc/lxccontainer.c -@@ -87,6 +87,9 @@ - - lxc_log_define(lxccontainer, lxc); - -+typedef bool (*func_is_io_stat_read)(const char *value); -+typedef bool (*func_is_io_stat_write)(const char *value); -+ - static bool do_lxcapi_destroy(struct lxc_container *c); - static const char *lxcapi_get_config_path(struct lxc_container *c); - #define do_lxcapi_get_config_path(c) lxcapi_get_config_path(c) -@@ -5768,6 +5771,26 @@ static uint64_t metrics_get_ull(struct lxc_container *c, struct cgroup_ops *cgro - return val; - } - -+static uint64_t metrics_get_ull_with_max(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item) -+{ -+ char buf[80] = {0}; -+ int len = 0; -+ uint64_t val = 0; -+ -+ len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf), c->name, c->config_path); -+ if (len <= 0) { -+ DEBUG("unable to read cgroup item %s", item); -+ return 0; -+ } -+ -+ if (strcmp(buf, "max") == 0) { -+ return ULONG_MAX; -+ } -+ -+ val = strtoull(buf, NULL, 0); -+ return val; -+} -+ - static inline bool is_blk_metrics_read(const char *value) - { - return strcmp(value, "Read") == 0; -@@ -5826,6 +5849,60 @@ err_out: - return; - } - -+static void metrics_get_io_stats_v2(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item, struct lxc_blkio_metrics *stats, func_is_io_stat_read is_io_stat_read, func_is_io_stat_write is_io_stat_write) -+{ -+#define BUFSIZE 4096 -+ char buf[BUFSIZE] = {0}; -+ int i = 0; -+ int j = 0; -+ int len = 0; -+ char **lines = NULL; -+ char **cols = NULL; -+ char **kv = NULL; -+ -+ len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf), c->name, c->config_path); -+ if (len <= 0) { -+ DEBUG("unable to read cgroup item %s", item); -+ return; -+ } -+ -+ lines = lxc_string_split_and_trim(buf, '\n'); -+ if (lines == NULL) { -+ return; -+ } -+ -+ (void)memset(stats, 0, sizeof(struct lxc_blkio_metrics)); -+ // line example: -+ // 259:0 rbytes=0 wbytes=12288 rios=0 wios=4 dbytes=0 dios=0 -+ for (i = 0; lines[i]; i++) { -+ cols = lxc_string_split_and_trim(lines[i], ' '); -+ if (cols == NULL || lxc_array_len((void **)cols) < 2) { -+ goto err_out; -+ } -+ len = lxc_array_len((void **)cols); -+ for (j = 1; j < len; j++) { -+ kv = lxc_string_split(cols[j], '='); -+ if (kv == NULL || lxc_array_len((void **)kv) != 2) { -+ lxc_free_array((void **)kv, free); -+ continue; -+ } -+ if (is_io_stat_read(kv[0])) { -+ stats->read += strtoull(kv[1], NULL, 0); -+ } else if (is_io_stat_write(kv[0])) { -+ stats->write += strtoull(kv[1], NULL, 0); -+ } -+ lxc_free_array((void **)kv, free); -+ } -+ lxc_free_array((void **)cols, free); -+ } -+ -+ stats->total = stats->read + stats->write; -+ -+err_out: -+ lxc_free_array((void **)lines, free); -+ return; -+} -+ - static uint64_t metrics_match_get_ull(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item, const char *match, int column) - { - #define BUFSIZE 4096 -@@ -5874,6 +5951,54 @@ err_out: - return val; - } - -+static bool is_io_stat_rbytes(const char *value) -+{ -+ return strcmp(value, "rbytes") == 0; -+} -+ -+static bool is_io_stat_wbytes(const char *value) -+{ -+ return strcmp(value, "wbytes") == 0; -+} -+ -+static bool is_io_stat_rios(const char *value) -+{ -+ return strcmp(value, "rios") == 0; -+} -+ -+static bool is_io_stat_wios(const char *value) -+{ -+ return strcmp(value, "wios") == 0; -+} -+ -+static bool unified_metrics_get(struct lxc_container *c, struct cgroup_ops *cgroup_ops, struct lxc_container_metrics *metrics) -+{ -+ // cpu -+ metrics->cpu_use_nanos = metrics_match_get_ull(c, cgroup_ops, "cpu.stat", "usage_usec", 1) * 1000; -+ metrics->cpu_use_user = metrics_match_get_ull(c, cgroup_ops, "cpu.stat", "user_usec", 1) * 1000; -+ metrics->cpu_use_sys = metrics_match_get_ull(c, cgroup_ops, "cpu.stat", "system_usec", 1) * 1000; -+ -+ // io -+ metrics_get_io_stats_v2(c, cgroup_ops, "io.stat", &metrics->io_service_bytes, is_io_stat_rbytes, is_io_stat_wbytes); -+ metrics_get_io_stats_v2(c, cgroup_ops, "io.stat", &metrics->io_serviced, is_io_stat_rios, is_io_stat_wios); -+ -+ // memory -+ metrics->mem_used = metrics_get_ull(c, cgroup_ops, "memory.current"); -+ metrics->mem_limit = metrics_get_ull_with_max(c, cgroup_ops, "memory.max"); -+ metrics->inactive_file_total = metrics_match_get_ull(c, cgroup_ops, "memory.stat", "inactive_file", 1); -+ metrics->cache = metrics_match_get_ull(c, cgroup_ops, "memory.stat", "file", 1); -+ metrics->cache_total = metrics->cache; -+ -+ // cgroup v2 does not support kernel memory -+ metrics->kmem_used = 0; -+ metrics->kmem_limit = 0; -+ -+ // pids -+ metrics->pids_current = metrics_get_ull(c, cgroup_ops, "pids.current"); -+ -+ return true; -+} -+ - /* isulad add get container metrics */ - static bool do_lxcapi_get_container_metrics(struct lxc_container *c, struct lxc_container_metrics *metrics) - { -@@ -5897,6 +6022,10 @@ static bool do_lxcapi_get_container_metrics(struct lxc_container *c, struct lxc - return false; - } - -+ if (cgroup_ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) { -+ return unified_metrics_get(c, cgroup_ops, metrics); -+ } -+ - metrics->cpu_use_nanos = metrics_get_ull(c, cgroup_ops, "cpuacct.usage"); - metrics->pids_current = metrics_get_ull(c, cgroup_ops, "pids.current"); - --- -2.20.1 - diff --git a/0021-support-isula-exec-workdir.patch b/0021-support-isula-exec-workdir.patch deleted file mode 100644 index 202f2ab..0000000 --- a/0021-support-isula-exec-workdir.patch +++ /dev/null @@ -1,90 +0,0 @@ -From ccdbeeb8b502e06355c2e55c9f980e5142891b7c Mon Sep 17 00:00:00 2001 -From: WangFengTu -Date: Thu, 11 Mar 2021 09:18:51 +0800 -Subject: [PATCH] support isula exec --workdir - -Signed-off-by: WangFengTu ---- - src/lxc/attach.c | 4 ++-- - src/lxc/tools/arguments.h | 1 + - src/lxc/tools/lxc_attach.c | 15 +++++++++++++-- - 3 files changed, 16 insertions(+), 4 deletions(-) - -diff --git a/src/lxc/attach.c b/src/lxc/attach.c -index c5fc561..2ed2329 100644 ---- a/src/lxc/attach.c -+++ b/src/lxc/attach.c -@@ -791,9 +791,9 @@ static int attach_child_main(struct attach_clone_payload *payload) - - #ifdef HAVE_ISULAD - /* isulad: set workdir */ -- if (init_ctx->container->lxc_conf->init_cwd) { -+ if (options->initial_cwd || init_ctx->container->lxc_conf->init_cwd) { - char *init_cwd; -- init_cwd = init_ctx->container->lxc_conf->init_cwd; -+ init_cwd = options->initial_cwd ? options->initial_cwd : init_ctx->container->lxc_conf->init_cwd; - /* try to create workdir if not exist */ - struct stat st; - if (stat(init_cwd, &st) < 0 && mkdir_p(init_cwd, 0750) < 0) { -diff --git a/src/lxc/tools/arguments.h b/src/lxc/tools/arguments.h -index 41ea109..c16d99f 100644 ---- a/src/lxc/tools/arguments.h -+++ b/src/lxc/tools/arguments.h -@@ -41,6 +41,7 @@ struct lxc_arguments { - /* for lxc-start */ - const char *share_ns[32]; /* size must be greater than LXC_NS_MAX */ - #ifdef HAVE_ISULAD -+ char *workdir; - const char *container_info; /* isulad: file used to store pid and ppid info of container */ - char *terminal_fifos[3]; /* isulad add, fifos used to redirct stdin/out/err */ - const char *exit_monitor_fifo; /* isulad: fifo used to monitor state of monitor process */ -diff --git a/src/lxc/tools/lxc_attach.c b/src/lxc/tools/lxc_attach.c -index dbddc2a..3bfbe6a 100644 ---- a/src/lxc/tools/lxc_attach.c -+++ b/src/lxc/tools/lxc_attach.c -@@ -76,6 +76,7 @@ static const struct option my_longopts[] = { - {"uid", required_argument, 0, 'u'}, - {"gid", required_argument, 0, 'g'}, - #else -+ {"workdir", required_argument, 0, 'w'}, - {"user", required_argument, 0, 'u'}, - {"in-fifo", required_argument, 0, OPT_INPUT_FIFO}, /* isulad add terminal fifos*/ - {"out-fifo", required_argument, 0, OPT_OUTPUT_FIFO}, -@@ -143,8 +144,9 @@ Options :\n\ - " - #else - "\ -- --user User ID (format: UID[:GID])\n\ -- --timeout Timeout in seconds (default: 0)\n\ -+ --user User ID (format: UID[:GID])\n\ -+ -w, --workdir Working directory inside the container.\n\ -+ --timeout Timeout in seconds (default: 0)\n\ - " - #endif - , -@@ -295,6 +297,9 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg) - return -1; - } - break; -+ case 'w': -+ args->workdir=arg; -+ break; - case OPT_INPUT_FIFO: - args->terminal_fifos[0] = arg; - break; -@@ -639,6 +644,12 @@ int main(int argc, char *argv[]) - attach_options.open_stdin = true; - } - -+#ifdef HAVE_ISULAD -+ if (my_args.workdir) { -+ attach_options.initial_cwd = my_args.workdir; -+ } -+#endif -+ - /* isulad: add do attach background */ - if (attach_options.attach_flags & LXC_ATTACH_TERMINAL) - wexit = do_attach_foreground(c, &command, &attach_options, &errmsg); --- -2.20.1 - diff --git a/0022-print-error-message-if-process-workdir-failed.patch b/0022-print-error-message-if-process-workdir-failed.patch deleted file mode 100644 index 8b75967..0000000 --- a/0022-print-error-message-if-process-workdir-failed.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 540981ef79b921fea26e24456fbecc648eaf6e9e Mon Sep 17 00:00:00 2001 -From: WangFengTu -Date: Sat, 8 May 2021 11:02:08 +0800 -Subject: [PATCH] print error message if process workdir failed - -Signed-off-by: WangFengTu ---- - src/lxc/attach.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/src/lxc/attach.c b/src/lxc/attach.c -index 2ed2329..68f4148 100644 ---- a/src/lxc/attach.c -+++ b/src/lxc/attach.c -@@ -798,10 +798,14 @@ static int attach_child_main(struct attach_clone_payload *payload) - struct stat st; - if (stat(init_cwd, &st) < 0 && mkdir_p(init_cwd, 0750) < 0) { - SYSERROR("Try to create directory \"%s\" as workdir failed when attach", init_cwd); -+ lxc_write_error_message(msg_fd, "Try to create directory \"%s\" as workdir failed when attach: %s", -+ init_cwd, strerror(errno)); - goto on_error; - } - if (chdir(init_cwd)) { - SYSERROR("Could not change directory to \"%s\" when attach", init_cwd); -+ lxc_write_error_message(msg_fd, "Could not change directory to \"%s\" when attach: %s", -+ init_cwd, strerror(errno)); - goto on_error; - } - } --- -2.25.1 - diff --git a/0023-log-support-long-syslog-tag.patch b/0023-log-support-long-syslog-tag.patch deleted file mode 100644 index a662c6d..0000000 --- a/0023-log-support-long-syslog-tag.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 17d87a933dd7e3744a68c61aaec21aedebce3440 Mon Sep 17 00:00:00 2001 -From: haozi007 -Date: Thu, 21 Jan 2021 11:06:31 +0800 -Subject: [PATCH] log: support long syslog tag - -Signed-off-by: haozi007 ---- - src/lxc/confile.c | 5 +---- - 1 file changed, 1 insertion(+), 4 deletions(-) - -diff --git a/src/lxc/confile.c b/src/lxc/confile.c -index f108b37b..e898e23b 100644 ---- a/src/lxc/confile.c -+++ b/src/lxc/confile.c -@@ -6634,13 +6634,10 @@ static int set_config_console_log_driver(const char *key, const char *value, - static int set_config_console_syslog_tag(const char *key, const char *value, - struct lxc_conf *lxc_conf, void *data) - { -- char buf[16] = { 0 }; -- - if (value == NULL) { - return -1; - } -- (void)strlcpy(buf, value, 16); -- return set_config_string_item(&lxc_conf->console.log_syslog_tag, buf); -+ return set_config_string_item(&lxc_conf->console.log_syslog_tag, value); - } - - static int parse_facility(const char *facility) --- -2.25.1 - diff --git a/0024-log-adjust-log-level-from-error-to-warn.patch b/0024-log-adjust-log-level-from-error-to-warn.patch deleted file mode 100644 index 5a07fcb..0000000 --- a/0024-log-adjust-log-level-from-error-to-warn.patch +++ /dev/null @@ -1,68 +0,0 @@ -From 7e829529bfd45dfdb26f43d50c1296de3456695f Mon Sep 17 00:00:00 2001 -From: Li Feng -Date: Thu, 13 May 2021 14:57:20 +0800 -Subject: [PATCH] log: adjust log level from error to warn - -Signed-off-by: Li Feng ---- - src/lxc/attach.c | 2 +- - src/lxc/commands.c | 2 +- - src/lxc/terminal.c | 2 +- - src/lxc/tools/lxc_attach.c | 2 +- - 4 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/src/lxc/attach.c b/src/lxc/attach.c -index 68f414875..5225e9982 100644 ---- a/src/lxc/attach.c -+++ b/src/lxc/attach.c -@@ -1222,7 +1222,7 @@ static int attach_signal_handler(int fd, uint32_t events, void *data, - info.si_pid = 0; - ret = waitid(P_PID, *pid, &info, WEXITED | WNOWAIT | WNOHANG); - if (ret == 0 && info.si_pid == *pid) { -- return log_error(LXC_MAINLOOP_CLOSE, "Container attach init process %d exited", *pid); -+ return log_warn(LXC_MAINLOOP_CLOSE, "Container attach init process %d exited", *pid); - } - - return LXC_MAINLOOP_CONTINUE; -diff --git a/src/lxc/commands.c b/src/lxc/commands.c -index 70c56579e..b954453c0 100644 ---- a/src/lxc/commands.c -+++ b/src/lxc/commands.c -@@ -1204,7 +1204,7 @@ int lxc_cmd_serve_state_clients(const char *name, const char *lxcpath, - - ret = lxc_cmd(name, &cmd, &stopped, lxcpath, NULL); - if (ret < 0) -- return log_error_errno(-1, errno, "Failed to serve state clients"); -+ return log_warn_errno(-1, errno, "Failed to serve state clients"); - - return 0; - } -diff --git a/src/lxc/terminal.c b/src/lxc/terminal.c -index 1e467f5a6..5d836916e 100644 ---- a/src/lxc/terminal.c -+++ b/src/lxc/terminal.c -@@ -804,7 +804,7 @@ int lxc_terminal_io_cb(int fd, uint32_t events, void *data, - terminal->pipes[0][1] = -EBADF; - return LXC_MAINLOOP_CONTINUE; - } else { -- ERROR("Handler received unexpected file descriptor"); -+ WARN("Handler received unexpected file descriptor"); - } - close(fd); - return LXC_MAINLOOP_CLOSE; -diff --git a/src/lxc/tools/lxc_attach.c b/src/lxc/tools/lxc_attach.c -index 3bfbe6a19..4d69e9448 100644 ---- a/src/lxc/tools/lxc_attach.c -+++ b/src/lxc/tools/lxc_attach.c -@@ -428,7 +428,7 @@ static int do_attach_foreground(struct lxc_container *c, lxc_attach_command_t *c - wexit = EXIT_SIGNAL_OFFSET + signal; - } - -- ERROR("Execd pid %d exit with %d", pid, wexit); -+ WARN("Execd pid %d exit with %d", pid, wexit); - - out: - if (c->lxc_conf->errmsg) { --- -2.25.1 - diff --git a/0025-get-cgroup-data-len-first-and-malloc-read-buff-by-le.patch b/0025-get-cgroup-data-len-first-and-malloc-read-buff-by-le.patch deleted file mode 100644 index 38e1812..0000000 --- a/0025-get-cgroup-data-len-first-and-malloc-read-buff-by-le.patch +++ /dev/null @@ -1,160 +0,0 @@ -From 35b321354e3c5216b3fa6aed408e985273e0575e Mon Sep 17 00:00:00 2001 -From: zhangxiaoyu -Date: Mon, 31 May 2021 20:31:26 +0800 -Subject: [PATCH 25/25] get cgroup data len first, and malloc read buff by len - -Signed-off-by: zhangxiaoyu ---- - src/lxc/lxccontainer.c | 56 ++++++++++++++++++++++++++++-------------- - 1 file changed, 38 insertions(+), 18 deletions(-) - -diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c -index 5769b251..01e6cbb6 100644 ---- a/src/lxc/lxccontainer.c -+++ b/src/lxc/lxccontainer.c -@@ -5757,11 +5757,11 @@ WRAP_API_1(bool, lxcapi_set_start_timeout, unsigned int) - - static uint64_t metrics_get_ull(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item) - { -- char buf[80] = {0}; -+ char buf[81] = {0}; - int len = 0; - uint64_t val = 0; - -- len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf), c->name, c->config_path); -+ len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf) - 1, c->name, c->config_path); - if (len <= 0) { - DEBUG("unable to read cgroup item %s", item); - return 0; -@@ -5773,11 +5773,11 @@ static uint64_t metrics_get_ull(struct lxc_container *c, struct cgroup_ops *cgro - - static uint64_t metrics_get_ull_with_max(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item) - { -- char buf[80] = {0}; -+ char buf[81] = {0}; - int len = 0; - uint64_t val = 0; - -- len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf), c->name, c->config_path); -+ len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf) - 1, c->name, c->config_path); - if (len <= 0) { - DEBUG("unable to read cgroup item %s", item); - return 0; -@@ -5808,22 +5808,29 @@ static inline bool is_blk_metrics_total(const char *value) - - static void metrics_get_blk_stats(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item, struct lxc_blkio_metrics *stats) - { --#define BUFSIZE 4096 -- char buf[BUFSIZE] = {0}; -+ char *buf = NULL; - int i = 0; - int len = 0; -+ int ret = 0; - char **lines = NULL; - char **cols = NULL; - -- len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf), c->name, c->config_path); -+ len = cgroup_ops->get(cgroup_ops, item, NULL, 0, c->name, c->config_path); - if (len <= 0) { - DEBUG("unable to read cgroup item %s", item); - return; - } - -+ buf = malloc(len + 1); -+ (void)memset(buf, 0, len + 1); -+ ret = cgroup_ops->get(cgroup_ops, item, buf, len, c->name, c->config_path); -+ if (ret != len) { -+ DEBUG("get cgroup item %s len %d has changed to %d", item, len, ret); -+ } -+ - lines = lxc_string_split_and_trim(buf, '\n'); - if (lines == NULL) { -- return; -+ goto out; - } - - (void)memset(stats, 0, sizeof(struct lxc_blkio_metrics)); -@@ -5833,12 +5840,14 @@ static void metrics_get_blk_stats(struct lxc_container *c, struct cgroup_ops *cg - if (cols == NULL) { - goto err_out; - } -- if (is_blk_metrics_read(cols[1])) { -- stats->read += strtoull(cols[2], NULL, 0); -- } else if (is_blk_metrics_write(cols[1])) { -- stats->write += strtoull(cols[2], NULL, 0); -+ if (lxc_array_len((void **)cols) == 3) { -+ if (is_blk_metrics_read(cols[1])) { -+ stats->read += strtoull(cols[2], NULL, 0); -+ } else if (is_blk_metrics_write(cols[1])) { -+ stats->write += strtoull(cols[2], NULL, 0); -+ } - } -- if (is_blk_metrics_total(cols[0])) { -+ if (lxc_array_len((void **)cols) == 2 && is_blk_metrics_total(cols[0])) { - stats->total = strtoull(cols[1], NULL, 0); - } - -@@ -5846,29 +5855,38 @@ static void metrics_get_blk_stats(struct lxc_container *c, struct cgroup_ops *cg - } - err_out: - lxc_free_array((void **)lines, free); -+out: -+ free(buf); - return; - } - - static void metrics_get_io_stats_v2(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item, struct lxc_blkio_metrics *stats, func_is_io_stat_read is_io_stat_read, func_is_io_stat_write is_io_stat_write) - { --#define BUFSIZE 4096 -- char buf[BUFSIZE] = {0}; -+ char *buf = NULL; - int i = 0; - int j = 0; - int len = 0; -+ int ret = 0; - char **lines = NULL; - char **cols = NULL; - char **kv = NULL; - -- len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf), c->name, c->config_path); -+ len = cgroup_ops->get(cgroup_ops, item, NULL, 0, c->name, c->config_path); - if (len <= 0) { - DEBUG("unable to read cgroup item %s", item); - return; - } - -+ buf = malloc(len + 1); -+ (void)memset(buf, 0, len + 1); -+ ret = cgroup_ops->get(cgroup_ops, item, buf, len, c->name, c->config_path); -+ if (ret != len) { -+ DEBUG("get cgroup item %s len %d change to %d", item, len, ret); -+ } -+ - lines = lxc_string_split_and_trim(buf, '\n'); - if (lines == NULL) { -- return; -+ goto out; - } - - (void)memset(stats, 0, sizeof(struct lxc_blkio_metrics)); -@@ -5900,6 +5918,8 @@ static void metrics_get_io_stats_v2(struct lxc_container *c, struct cgroup_ops * - - err_out: - lxc_free_array((void **)lines, free); -+out: -+ free(buf); - return; - } - -@@ -5915,7 +5935,7 @@ static uint64_t metrics_match_get_ull(struct lxc_container *c, struct cgroup_ops - char **cols = NULL; - size_t matchlen = 0; - -- len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf), c->name, c->config_path); -+ len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf) - 1, c->name, c->config_path); - if (len <= 0) { - DEBUG("unable to read cgroup item %s", item); - goto err_out; --- -2.25.1 - diff --git a/0026-coredump-fix-coredump-when-cgroup-get-return-error.patch b/0026-coredump-fix-coredump-when-cgroup-get-return-error.patch deleted file mode 100644 index fba43b2..0000000 --- a/0026-coredump-fix-coredump-when-cgroup-get-return-error.patch +++ /dev/null @@ -1,57 +0,0 @@ -From aeb038c9f17ba6a82bb881ff6e84f0ac4c980723 Mon Sep 17 00:00:00 2001 -From: LiFeng -Date: Sat, 12 Jun 2021 13:54:25 +0800 -Subject: [PATCH] coredump: fix coredump when cgroup get return error - -Signed-off-by: LiFeng ---- - src/lxc/lxccontainer.c | 10 ++++++---- - src/lxc/string_utils.c | 4 ++++ - 2 files changed, 10 insertions(+), 4 deletions(-) - -diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c -index 01e6cbb69..2d581911a 100644 ---- a/src/lxc/lxccontainer.c -+++ b/src/lxc/lxccontainer.c -@@ -5824,8 +5824,9 @@ static void metrics_get_blk_stats(struct lxc_container *c, struct cgroup_ops *cg - buf = malloc(len + 1); - (void)memset(buf, 0, len + 1); - ret = cgroup_ops->get(cgroup_ops, item, buf, len, c->name, c->config_path); -- if (ret != len) { -- DEBUG("get cgroup item %s len %d has changed to %d", item, len, ret); -+ if (ret <= 0) { -+ DEBUG("unable to read cgroup item %s", item); -+ goto out; - } - - lines = lxc_string_split_and_trim(buf, '\n'); -@@ -5880,8 +5881,9 @@ static void metrics_get_io_stats_v2(struct lxc_container *c, struct cgroup_ops * - buf = malloc(len + 1); - (void)memset(buf, 0, len + 1); - ret = cgroup_ops->get(cgroup_ops, item, buf, len, c->name, c->config_path); -- if (ret != len) { -- DEBUG("get cgroup item %s len %d change to %d", item, len, ret); -+ if (ret <= 0) { -+ DEBUG("unable to read cgroup item %s", item); -+ goto out; - } - - lines = lxc_string_split_and_trim(buf, '\n'); -diff --git a/src/lxc/string_utils.c b/src/lxc/string_utils.c -index 9118add02..d3c60897c 100644 ---- a/src/lxc/string_utils.c -+++ b/src/lxc/string_utils.c -@@ -473,6 +473,10 @@ char **lxc_string_split_and_trim(const char *string, char _sep) - result_count++; - } - -+ if (result == NULL) { -+ return calloc(1, sizeof(char *)); -+ } -+ - /* if we allocated too much, reduce it */ - return realloc(result, (result_count + 1) * sizeof(char *)); - --- -2.25.1 - diff --git a/0027-add-help-for-new-arguments.patch b/0027-add-help-for-new-arguments.patch deleted file mode 100644 index 086c2eb..0000000 --- a/0027-add-help-for-new-arguments.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 3d673da7da97058f6e4a200d924dbbdcfeb63678 Mon Sep 17 00:00:00 2001 -From: haozi007 -Date: Thu, 26 Aug 2021 13:50:41 +0100 -Subject: [PATCH] add help for new arguments - -Signed-off-by: haozi007 ---- - src/lxc/tools/lxc_attach.c | 10 ++++++++-- - src/lxc/tools/lxc_start.c | 16 ++++++++++++++-- - 2 files changed, 22 insertions(+), 4 deletions(-) - -diff --git a/src/lxc/tools/lxc_attach.c b/src/lxc/tools/lxc_attach.c -index 4d69e944..9931b39f 100644 ---- a/src/lxc/tools/lxc_attach.c -+++ b/src/lxc/tools/lxc_attach.c -@@ -144,9 +144,15 @@ Options :\n\ - " - #else - "\ -- --user User ID (format: UID[:GID])\n\ - -w, --workdir Working directory inside the container.\n\ -- --timeout Timeout in seconds (default: 0)\n\ -+ -u, --user User ID (format: UID[:GID])\n\ -+ --in-fifo Stdin fifo path\n\ -+ --out-fifo Stdout fifo path\n\ -+ --err-fifo Stderr fifo path\n\ -+ --suffi ID for mutli-attach on one container\n\ -+ --timeout Timeout in seconds (default: 0)\n\ -+ --disable-pty Disable pty for attach\n\ -+ --open-stdin Open stdin for attach\n\ - " - #endif - , -diff --git a/src/lxc/tools/lxc_start.c b/src/lxc/tools/lxc_start.c -index 4f2c8afa..3ef59610 100644 ---- a/src/lxc/tools/lxc_start.c -+++ b/src/lxc/tools/lxc_start.c -@@ -62,7 +62,6 @@ static const struct option my_longopts[] = { - {"start-timeout", required_argument, 0, OPT_START_TIMEOUT}, - {"disable-pty", no_argument, 0, OPT_DISABLE_PTY}, - {"open-stdin", no_argument, 0, OPT_OPEN_STDIN}, -- {"start-timeout", required_argument, 0, OPT_START_TIMEOUT}, - #endif - LXC_COMMON_OPTIONS - }; -@@ -86,7 +85,20 @@ Options :\n\ - Note: --daemon implies --close-all-fds\n\ - -s, --define KEY=VAL Assign VAL to configuration variable KEY\n\ - --share-[net|ipc|uts|pid]=NAME Share a namespace with another container or pid\n\ --", -+" -+#ifdef HAVE_ISULAD -+"\ -+ --in-fifo Stdin fifo path\n\ -+ --out-fifo Stdout fifo path\n\ -+ --err-fifo Stderr fifo path\n\ -+ --container-pidfile File path for container pid\n\ -+ --exit-fifo Fifo path to save exit code\n\ -+ --start-timeout Timeout for start container\n\ -+ --disable-pty Disable pty for attach\n\ -+ --open-stdin Open stdin for attach\n\ -+" -+#endif -+, - .options = my_longopts, - .parser = my_parser, - .checker = NULL, --- -2.20.1 - diff --git a/0028-seccomp-init-and-destroy-notifier.cookie.patch b/0028-seccomp-init-and-destroy-notifier.cookie.patch deleted file mode 100644 index bc983eb..0000000 --- a/0028-seccomp-init-and-destroy-notifier.cookie.patch +++ /dev/null @@ -1,37 +0,0 @@ -From aca2dde947317d4e3c1a75ec7fdebf2ae70878a2 Mon Sep 17 00:00:00 2001 -From: Evgeny Vereshchagin -Date: Thu, 15 Apr 2021 07:09:10 +0000 -Subject: [PATCH] seccomp: init and destroy notifier.cookie - -It's a follow-up to 84cf6d259b24e4ad48e - -Closes https://github.com/lxc/lxc/issues/3806 - -Signed-off-by: Evgeny Vereshchagin ---- - src/lxc/seccomp.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/lxc/seccomp.c b/src/lxc/seccomp.c -index 4b9d23c55..ebbba80f7 100644 ---- a/src/lxc/seccomp.c -+++ b/src/lxc/seccomp.c -@@ -1867,6 +1867,7 @@ void lxc_seccomp_free(struct lxc_seccomp *seccomp) - seccomp_notify_free(seccomp->notifier.req_buf, seccomp->notifier.rsp_buf); - seccomp->notifier.req_buf = NULL; - seccomp->notifier.rsp_buf = NULL; -+ free_disarm(seccomp->notifier.cookie); - #endif - } - -@@ -2076,6 +2077,7 @@ void seccomp_conf_init(struct lxc_conf *conf) - sizeof(conf->seccomp.notifier.proxy_addr)); - conf->seccomp.notifier.req_buf = NULL; - conf->seccomp.notifier.rsp_buf = NULL; -+ conf->seccomp.notifier.cookie = NULL; - #endif - } - --- -2.25.1 - diff --git a/0029-just-use-origin-loop-if-do-not-have-io.patch b/0029-just-use-origin-loop-if-do-not-have-io.patch deleted file mode 100644 index 1b38a3c..0000000 --- a/0029-just-use-origin-loop-if-do-not-have-io.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 21aba903a2e0d744eb54a7737bc11585c2be9aa3 Mon Sep 17 00:00:00 2001 -From: haozi007 -Date: Fri, 17 Sep 2021 08:18:14 +0100 -Subject: [PATCH] just use origin loop if do not have io - -Signed-off-by: haozi007 ---- - src/lxc/start.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lxc/start.c b/src/lxc/start.c -index e6e2170..52ea561 100644 ---- a/src/lxc/start.c -+++ b/src/lxc/start.c -@@ -590,7 +590,7 @@ int lxc_poll(const char *name, struct lxc_handler *handler) - } - #endif - -- ret = isulad_safe_mainloop(&descr, -1); -+ ret = lxc_mainloop(&descr, -1); - close_prot_errno_disarm(descr.epfd); - if (ret < 0 || !handler->init_died) - goto out_mainloop_console; --- -2.20.1 - diff --git a/0030-conf-fix-a-memory-leak.patch b/0030-conf-fix-a-memory-leak.patch deleted file mode 100644 index ddd0ae5..0000000 --- a/0030-conf-fix-a-memory-leak.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 1ce660ae03e85574b1fb8f899b78f13ab14faf46 Mon Sep 17 00:00:00 2001 -From: Evgeny Vereshchagin -Date: Thu, 25 Mar 2021 09:03:21 +0000 -Subject: [PATCH 1/2] conf: fix a memory leak - -It was triggered by passing "lxc.selinux.context.keyring=xroot" to the -fuzz target introduced in https://github.com/google/oss-fuzz/pull/5498 -``` -================================================================= -==22==ERROR: LeakSanitizer: detected memory leaks - -Direct leak of 6 byte(s) in 1 object(s) allocated from: - #0 0x538ca4 in __strdup /src/llvm-project/compiler-rt/lib/asan/asan_interceptors.cpp:468:3 - #1 0x5c40e8 in set_config_string_item /src/lxc/src/lxc/confile_utils.c:635:14 - #2 0x44394e in set_config_selinux_context_keyring /src/lxc/src/lxc/confile.c:1596:9 - #3 0x5af955 in parse_line /src/lxc/src/lxc/confile.c:2953:9 - #4 0x4475cd in lxc_file_for_each_line_mmap /src/lxc/src/lxc/parse.c:125:9 - #5 0x5af24f in lxc_config_read /src/lxc/src/lxc/confile.c:3024:9 - #6 0x580b04 in LLVMFuzzerTestOneInput /src/fuzz-lxc-config-read.c:36:2 - #7 0x483643 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:599:15 - #8 0x46d4a2 in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:323:6 - #9 0x4732ea in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:856:9 - #10 0x49f022 in main /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10 - #11 0x7f16d09b883f in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2083f) -``` - -This is a follow-up to https://github.com/lxc/lxc/commit/4fef78bc332a2d186dca6f - -Signed-off-by: Evgeny Vereshchagin ---- - src/lxc/conf.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 19e193dda..8b1d2d43d 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -5678,6 +5678,7 @@ void lxc_conf_free(struct lxc_conf *conf) - free(conf->lsm_aa_profile); - free(conf->lsm_aa_profile_computed); - free(conf->lsm_se_context); -+ free(conf->lsm_se_keyring_context); - lxc_seccomp_free(&conf->seccomp); - lxc_clear_config_caps(conf); - lxc_clear_config_keepcaps(conf); --- -2.25.1 - diff --git a/0031-fix-lsm_se_mount_context-memory-leak.patch b/0031-fix-lsm_se_mount_context-memory-leak.patch deleted file mode 100644 index 759584e..0000000 --- a/0031-fix-lsm_se_mount_context-memory-leak.patch +++ /dev/null @@ -1,25 +0,0 @@ -From ecb05bff284f2f5364bcfd2196e7cb5e1d4fa512 Mon Sep 17 00:00:00 2001 -From: zhangxiaoyu -Date: Fri, 17 Sep 2021 19:45:30 +0800 -Subject: [PATCH 2/2] fix lsm_se_mount_context memory leak - -Signed-off-by: zhangxiaoyu ---- - src/lxc/conf.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 8b1d2d43d..ce550e264 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -5718,6 +5718,7 @@ void lxc_conf_free(struct lxc_conf *conf) - if (conf->ocihooks) { - free_oci_runtime_spec_hooks(conf->ocihooks); - } -+ free(conf->lsm_se_mount_context); - #endif - free(conf); - } --- -2.25.1 - diff --git a/0032-disable-lxc_keep-with-oci-image.patch b/0032-disable-lxc_keep-with-oci-image.patch deleted file mode 100644 index 0e0c6e0..0000000 --- a/0032-disable-lxc_keep-with-oci-image.patch +++ /dev/null @@ -1,169 +0,0 @@ -From dc08143bcaf8722492a70848124b8d48dfa099f7 Mon Sep 17 00:00:00 2001 -From: gaohuatao -Date: Fri, 22 Oct 2021 14:43:13 +0800 -Subject: [PATCH] disable lxc_keep with oci image - -Signed-off-by: gaohuatao ---- - src/lxc/confile.c | 6 ++++++ - src/lxc/lxccontainer.c | 24 ++++++++++++++++++++++++ - src/lxc/lxccontainer.h | 18 ++++++++++++++++++ - src/lxc/start.c | 5 +++++ - src/lxc/start.h | 1 + - 5 files changed, 54 insertions(+) - -diff --git a/src/lxc/confile.c b/src/lxc/confile.c -index e898e23..e298ce9 100644 ---- a/src/lxc/confile.c -+++ b/src/lxc/confile.c -@@ -3018,6 +3018,12 @@ bool lxc_config_define_load(struct lxc_list *defines, struct lxc_container *c) - - lxc_list_for_each(it, defines) { - struct new_config_item *new_item = it->elem; -+#ifdef HAVE_ISULAD -+ if (strcmp(new_item->key, LXC_IMAGE_OCI_KEY) == 0) { -+ c->set_oci_type(c, true); -+ continue; -+ } -+#endif - bret = c->set_config_item(c, new_item->key, new_item->val); - if (!bret) - break; -diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c -index 2d58191..69706a5 100644 ---- a/src/lxc/lxccontainer.c -+++ b/src/lxc/lxccontainer.c -@@ -1061,6 +1061,10 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a - if (!argv) { - argv = init_cmd = use_init_args(conf->init_argv, conf->init_argc); - } -+ -+ if (c->image_type_oci) { -+ handler->image_type_oci = true; -+ } - #endif - - /* ... otherwise use default_args. */ -@@ -5755,6 +5759,22 @@ static bool do_lxcapi_set_start_timeout(struct lxc_container *c, unsigned int s - - WRAP_API_1(bool, lxcapi_set_start_timeout, unsigned int) - -+/* isulad add set image type */ -+static bool do_lxcapi_set_oci_type(struct lxc_container *c, bool image_type_oci) -+{ -+ if (!c || !c->lxc_conf) -+ return false; -+ if (container_mem_lock(c)) { -+ ERROR("Error getting mem lock"); -+ return false; -+ } -+ c->image_type_oci = image_type_oci; -+ container_mem_unlock(c); -+ return true; -+} -+ -+WRAP_API_1(bool, lxcapi_set_oci_type, bool) -+ - static uint64_t metrics_get_ull(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item) - { - char buf[81] = {0}; -@@ -6177,6 +6197,9 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath - - c->daemonize = true; - c->pidfile = NULL; -+#ifdef HAVE_ISULAD -+ c->image_type_oci = false; -+#endif - - /* Assign the member functions. */ - c->is_defined = lxcapi_is_defined; -@@ -6249,6 +6272,7 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath - c->clean_container_resource = lxcapi_clean_container_resource; - c->get_container_pids = lxcapi_get_container_pids; - c->set_start_timeout = lxcapi_set_start_timeout; -+ c->set_oci_type = lxcapi_set_oci_type; - c->get_container_metrics = lxcapi_get_container_metrics; - #endif - return c; -diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h -index 9abbd09..a5be3f8 100644 ---- a/src/lxc/lxccontainer.h -+++ b/src/lxc/lxccontainer.h -@@ -26,6 +26,8 @@ extern "C" { - #define LXC_CREATE_MAXFLAGS (1 << 1) /*!< Number of \c LXC_CREATE* flags */ - #define LXC_MOUNT_API_V1 1 - -+#define LXC_IMAGE_OCI_KEY "lxc.imagetype.oci" -+ - struct bdev_specs; - - struct lxc_snapshot; -@@ -164,6 +166,12 @@ struct lxc_container { - */ - unsigned int start_timeout; - -+ /*! isulad: -+ * \private -+ * image_type_oci -+ */ -+ bool image_type_oci; -+ - /*! - * \brief Determine if \c /var/lib/lxc/$name/config exists. - * -@@ -1010,6 +1018,16 @@ struct lxc_container { - */ - bool (*set_start_timeout)(struct lxc_container *c, unsigned int start_timeout); - -+ /*! isulad add -+ * \brief An API call to set oci type -+ * -+ * \param c Container. -+ * \param image_type_oci image oci type. -+ * -+ * \return \c true on success, else \c false. -+ */ -+ bool (*set_oci_type)(struct lxc_container *c, bool image_type_oci); -+ - /*! isulad add - * \brief An API call to set start timeout - * -diff --git a/src/lxc/start.c b/src/lxc/start.c -index 52ea561..c1563e0 100644 ---- a/src/lxc/start.c -+++ b/src/lxc/start.c -@@ -694,6 +694,7 @@ struct lxc_handler *lxc_init_handler(const char *name, struct lxc_conf *conf, - - #ifdef HAVE_ISULAD - handler->exit_code = -1; /* isulad: record exit code of container */ -+ handler->image_type_oci = false; - #endif - - if (daemonize) -@@ -2080,7 +2081,11 @@ static int lxc_spawn(struct lxc_handler *handler) - * it readonly. - * If the container is unprivileged then skip rootfs pinning. - */ -+#ifdef HAVE_ISULAD -+ if (!wants_to_map_ids && !handler->image_type_oci) { -+#else - if (!wants_to_map_ids) { -+#endif - handler->pinfd = pin_rootfs(conf->rootfs.path); - if (handler->pinfd == -EBADF) - INFO("Failed to pin the rootfs for container \"%s\"", handler->name); -diff --git a/src/lxc/start.h b/src/lxc/start.h -index ebeeb72..98de103 100644 ---- a/src/lxc/start.h -+++ b/src/lxc/start.h -@@ -129,6 +129,7 @@ struct lxc_handler { - bool disable_pty; - /* Indicates whether should we keep stdin active. */ - bool open_stdin; -+ bool image_type_oci; - #endif - - }; --- -2.20.1 - diff --git a/0033-conf-ensure-that-the-idmap-pointer-itself-is-freed.patch b/0033-conf-ensure-that-the-idmap-pointer-itself-is-freed.patch deleted file mode 100644 index a217af7..0000000 --- a/0033-conf-ensure-that-the-idmap-pointer-itself-is-freed.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 8156691b97ac48763cf42c03aa3b92cfa37f1488 Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Tue, 4 Aug 2020 00:05:05 +0200 -Subject: [PATCH] conf: ensure that the idmap pointer itself is freed - -Signed-off-by: Christian Brauner - -(cherry-picked from https://github.com/lxc/lxc/pull/3504) -Conflicts: - src/lxc/conf.c - -Signed-off-by: WangFengTu ---- - src/lxc/conf.c | 20 +++++++++++++------- - 1 file changed, 13 insertions(+), 7 deletions(-) - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 2a6e27aa4..7464e4a15 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -5362,7 +5362,15 @@ static int lxc_free_idmap(struct lxc_list *id_map) - - return 0; - } --define_cleanup_function(struct lxc_list *, lxc_free_idmap); -+ -+static int __lxc_free_idmap(struct lxc_list *id_map) -+{ -+ lxc_free_idmap(id_map); -+ free(id_map); -+ return 0; -+} -+ -+define_cleanup_function(struct lxc_list *, __lxc_free_idmap); - - int lxc_clear_idmaps(struct lxc_conf *c) - { -@@ -5939,7 +5947,7 @@ static struct lxc_list *get_minimal_idmap(const struct lxc_conf *conf, - int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), void *data, - const char *fn_name) - { -- call_cleaner(lxc_free_idmap) struct lxc_list *idmap = NULL; -+ call_cleaner(__lxc_free_idmap) struct lxc_list *idmap = NULL; - int ret = -1, status = -1; - char c = '1'; - pid_t pid; -@@ -6015,7 +6023,7 @@ int userns_exec_minimal(const struct lxc_conf *conf, - int (*fn_parent)(void *), void *fn_parent_data, - int (*fn_child)(void *), void *fn_child_data) - { -- call_cleaner(lxc_free_idmap) struct lxc_list *idmap = NULL; -+ call_cleaner(__lxc_free_idmap) struct lxc_list *idmap = NULL; - uid_t resuid = LXC_INVALID_UID; - gid_t resgid = LXC_INVALID_GID; - char c = '1'; -@@ -6306,10 +6314,8 @@ on_error: - if (pid > 0) - ret = wait_for_pid(pid); - -- if (idmap) { -- lxc_free_idmap(idmap); -- free(idmap); -- } -+ if (idmap) -+ __lxc_free_idmap(idmap); - - if (host_uid_map && (host_uid_map != container_root_uid)) - free(host_uid_map); --- -2.25.1 - diff --git a/0034-cgfsng-fix-cgroup-attach-cgroup-creation.patch b/0034-cgfsng-fix-cgroup-attach-cgroup-creation.patch deleted file mode 100644 index 6273923..0000000 --- a/0034-cgfsng-fix-cgroup-attach-cgroup-creation.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 2ab5069d8a04c12a28b523323cb51055b02c815c Mon Sep 17 00:00:00 2001 -From: Christian Brauner -Date: Thu, 25 Nov 2021 15:44:32 +0800 -Subject: [PATCH] cgfsng: fix cgroup attach cgroup creation - -Signed-off-by: Christian Brauner - -(cherry-picked from https://github.com/lxc/lxc/pull/3526) -Signed-off-by: WangFengTu - -Conflicts: - src/lxc/cgroups/isulad_cgfsng.c ---- - src/lxc/cgroups/cgfsng.c | 10 +++++++++- - src/lxc/cgroups/isulad_cgfsng.c | 10 +++++++++- - 2 files changed, 18 insertions(+), 2 deletions(-) - -diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c -index 3f81f5c..28ddf55 100644 ---- a/src/lxc/cgroups/cgfsng.c -+++ b/src/lxc/cgroups/cgfsng.c -@@ -2090,13 +2090,21 @@ static int cgroup_attach_leaf(const struct lxc_conf *conf, int unified_fd, pid_t - - do { - bool rm = false; -- char attach_cgroup[STRLITERALLEN(".lxc-1000/cgroup.procs") + 1]; -+ char attach_cgroup[STRLITERALLEN(".lxc-/cgroup.procs") + INTTYPE_TO_STRLEN(int) + 1]; - char *slash; - - ret = snprintf(attach_cgroup, sizeof(attach_cgroup), ".lxc-%d/cgroup.procs", idx); - if (ret < 0 || (size_t)ret >= sizeof(attach_cgroup)) - return ret_errno(EIO); - -+ /* -+ * This shouldn't really happen but the compiler might complain -+ * that a short write would cause a buffer overrun. So be on -+ * the safe side. -+ */ -+ if (ret < STRLITERALLEN(".lxc-/cgroup.procs")) -+ return log_error_errno(-EINVAL, EINVAL, "Unexpected short write would cause buffer-overrun"); -+ - slash = &attach_cgroup[ret] - STRLITERALLEN("/cgroup.procs"); - *slash = '\0'; - -diff --git a/src/lxc/cgroups/isulad_cgfsng.c b/src/lxc/cgroups/isulad_cgfsng.c -index c80527d..576b424 100644 ---- a/src/lxc/cgroups/isulad_cgfsng.c -+++ b/src/lxc/cgroups/isulad_cgfsng.c -@@ -1766,13 +1766,21 @@ static int cgroup_attach_leaf(const struct lxc_conf *conf, int unified_fd, pid_t - - do { - bool rm = false; -- char attach_cgroup[STRLITERALLEN(".lxc-1000/cgroup.procs") + 1]; -+ char attach_cgroup[STRLITERALLEN(".lxc-/cgroup.procs") + INTTYPE_TO_STRLEN(int) + 1]; - char *slash; - - ret = snprintf(attach_cgroup, sizeof(attach_cgroup), ".lxc-%d/cgroup.procs", idx); - if (ret < 0 || (size_t)ret >= sizeof(attach_cgroup)) - return ret_errno(EIO); - -+ /* -+ * This shouldn't really happen but the compiler might complain -+ * that a short write would cause a buffer overrun. So be on -+ * the safe side. -+ */ -+ if (ret < STRLITERALLEN(".lxc-/cgroup.procs")) -+ return log_error_errno(-EINVAL, EINVAL, "Unexpected short write would cause buffer-overrun"); -+ - slash = &attach_cgroup[ret] - STRLITERALLEN("/cgroup.procs"); - *slash = '\0'; - --- -2.25.1 - diff --git a/0035-adapt-upstream-compiler-settings.patch b/0035-adapt-upstream-compiler-settings.patch deleted file mode 100644 index fce34bd..0000000 --- a/0035-adapt-upstream-compiler-settings.patch +++ /dev/null @@ -1,29 +0,0 @@ -From d65cefcee3dce74a970239d38dcb4e491cb38b70 Mon Sep 17 00:00:00 2001 -From: haozi007 -Date: Mon, 27 Dec 2021 09:23:44 +0000 -Subject: [PATCH] adapt upstream compiler settings - -Signed-off-by: haozi007 ---- - configure.ac | 5 +++++ - 1 file changed, 5 insertions(+) - -diff --git a/configure.ac b/configure.ac -index 9eb6dcb..d1d793b 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -761,6 +761,11 @@ AX_CHECK_COMPILE_FLAG([-Wnested-externs], [CFLAGS="$CFLAGS -Wnested-externs"],,[ - AX_CHECK_COMPILE_FLAG([-fasynchronous-unwind-tables], [CFLAGS="$CFLAGS -fasynchronous-unwind-tables"],,[-Werror]) - AX_CHECK_COMPILE_FLAG([-pipe], [CFLAGS="$CFLAGS -pipe"],,[-Werror]) - AX_CHECK_COMPILE_FLAG([-fexceptions], [CFLAGS="$CFLAGS -fexceptions"],,[-Werror]) -+AX_CHECK_COMPILE_FLAG([-g], [CFLAGS="$CFLAGS -g"],,[-Werror]) -+AX_CHECK_COMPILE_FLAG([-Warray-bounds], [CFLAGS="$CFLAGS -Warray-bounds"],,[-Werror]) -+AX_CHECK_COMPILE_FLAG([-Wrestrict], [CFLAGS="$CFLAGS -Wrestrict"],,[-Werror]) -+AX_CHECK_COMPILE_FLAG([-Wreturn-local-addr], [CFLAGS="$CFLAGS -Wreturn-local-addr"],,[-Werror]) -+AX_CHECK_COMPILE_FLAG([-Wstringop-overflow], [CFLAGS="$CFLAGS -Wstringop-overflow"],,[-Werror]) - - AX_CHECK_LINK_FLAG([-z relro], [LDFLAGS="$LDFLAGS -z relro"],,[]) - AX_CHECK_LINK_FLAG([-z now], [LDFLAGS="$LDFLAGS -z now"],,[]) --- -2.20.1 - diff --git a/0036-compile-in-android-env.patch b/0036-compile-in-android-env.patch deleted file mode 100644 index e1f63ab..0000000 --- a/0036-compile-in-android-env.patch +++ /dev/null @@ -1,200 +0,0 @@ -From 2de0b4dddb98fa70874eb96a4a9dc33c12037db4 Mon Sep 17 00:00:00 2001 -From: chegJH -Date: Tue, 15 Feb 2022 16:13:56 +0800 -Subject: [PATCH] changes for compile in android env - -Signed-off-by: chegJH ---- - configure.ac | 3 ++- - src/lxc/Makefile.am | 3 ++- - src/lxc/commands_utils.c | 8 +++++++- - src/lxc/confile.c | 6 +++--- - src/lxc/json/read-file.c | 2 +- - src/lxc/log.c | 2 +- - src/lxc/lxclock.c | 1 + - src/lxc/syscall_wrappers.h | 2 +- - src/lxc/utils.c | 21 +++++++-------------- - 9 files changed, 25 insertions(+), 23 deletions(-) - -diff --git a/configure.ac b/configure.ac -index d1d793b..7766638 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -771,7 +771,8 @@ AX_CHECK_LINK_FLAG([-z relro], [LDFLAGS="$LDFLAGS -z relro"],,[]) - AX_CHECK_LINK_FLAG([-z now], [LDFLAGS="$LDFLAGS -z now"],,[]) - AX_CHECK_LINK_FLAG([-z noexecstack], [LDFLAGS="$LDFLAGS -z noexecstack"],,[]) - --CFLAGS="$CFLAGS -Wvla -std=gnu11 -D_FORTIFY_SOURCE=2 -Wall -fPIC -fPIE -pie" -+CFLAGS="$CFLAGS -Wvla -std=gnu11 -D_FORTIFY_SOURCE=2 -Wall -fPIC -fPIE" -+LDFLAGS="$LDFLAGS -pie" - if test "x$enable_werror" = "xyes"; then - CFLAGS="$CFLAGS -Werror" - fi -diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am -index dc49c7e..2686e24 100644 ---- a/src/lxc/Makefile.am -+++ b/src/lxc/Makefile.am -@@ -361,7 +361,8 @@ LDADD = liblxc.la \ - @OPENSSL_LIBS@ \ - @SECCOMP_LIBS@ \ - @SELINUX_LIBS@ \ -- @DLOG_LIBS@ -+ @DLOG_LIBS@ \ -+ @YAJL_LIBS@ - - if ENABLE_TOOLS - lxc_attach_SOURCES = tools/lxc_attach.c \ -diff --git a/src/lxc/commands_utils.c b/src/lxc/commands_utils.c -index 7dfefa5..54ba26e 100644 ---- a/src/lxc/commands_utils.c -+++ b/src/lxc/commands_utils.c -@@ -141,9 +141,15 @@ int lxc_make_abstract_socket_name(char *path, size_t pathlen, - char *generate_named_unix_sock_dir(const char *name) - { - __do_free char *exec_sock_dir = NULL; -+ __do_free char *rundir = NULL; - -- if (asprintf(&exec_sock_dir, "/var/run/lxc/%s", name) < 0) -+ rundir = get_rundir(); -+ if (!rundir) -+ rundir = strdup("/var/run"); -+ -+ if (asprintf(&exec_sock_dir, "%s/lxc/%s", rundir, name) < 0) { - return log_error_errno(NULL, errno, "Failed to allocate memory"); -+ } - - return move_ptr(exec_sock_dir); - } -diff --git a/src/lxc/confile.c b/src/lxc/confile.c -index e298ce9..cc53148 100644 ---- a/src/lxc/confile.c -+++ b/src/lxc/confile.c -@@ -6239,21 +6239,21 @@ static int set_config_init_args(const char *key, const char *value, - struct lxc_conf *lxc_conf, void *data) - { - int ret = 0; -- char *tmp = NULL; -+ char **tmp = NULL; - char *new_value = NULL; - - ret = set_config_string_item(&new_value, value); - if (ret || !new_value) - return ret; - -- tmp = realloc(lxc_conf->init_argv, (lxc_conf->init_argc + 1) * sizeof(char *)); -+ tmp = (char **)realloc(lxc_conf->init_argv, (lxc_conf->init_argc + 1) * sizeof(char *)); - if (!tmp) { - ERROR("Out of memory"); - free(new_value); - return -1; - } - -- lxc_conf->init_argv = (char **)tmp; -+ lxc_conf->init_argv = tmp; - - lxc_conf->init_argv[lxc_conf->init_argc] = new_value; - lxc_conf->init_argc++; -diff --git a/src/lxc/json/read-file.c b/src/lxc/json/read-file.c -index 70e73e5..34ebeed 100644 ---- a/src/lxc/json/read-file.c -+++ b/src/lxc/json/read-file.c -@@ -76,7 +76,7 @@ char *read_file(const char *path, size_t *length) - return NULL; - } - -- fd = open(rpath, O_RDONLY | O_CLOEXEC, 0640); -+ fd = open(rpath, O_RDONLY | O_CLOEXEC); - if (fd < 0) { - return NULL; - } -diff --git a/src/lxc/log.c b/src/lxc/log.c -index 79caa2c..a04f78e 100644 ---- a/src/lxc/log.c -+++ b/src/lxc/log.c -@@ -71,7 +71,7 @@ static int isulad_open_fifo(const char *file_path) - #define LOG_FIFO_SIZE (1024 * 1024) - int fd; - -- fd = lxc_unpriv(open(file_path, O_RDWR | O_NONBLOCK | O_CLOEXEC, 0640)); -+ fd = lxc_unpriv(open(file_path, O_RDWR | O_NONBLOCK | O_CLOEXEC)); - if (fd == -1) { - fprintf(stderr, "Open fifo %s failed: %s\n", file_path, strerror(errno)); - return -1; -diff --git a/src/lxc/lxclock.c b/src/lxc/lxclock.c -index bb0dca0..d65c614 100644 ---- a/src/lxc/lxclock.c -+++ b/src/lxc/lxclock.c -@@ -179,6 +179,7 @@ struct lxc_lock *lxc_newlock(const char *lxcpath, const char *name) - l->u.f.fd = -1; - - on_error: -+ fprintf(stderr, "Failed to create lock for %s, path %s\n", name, lxcpath); - return l; - } - -diff --git a/src/lxc/syscall_wrappers.h b/src/lxc/syscall_wrappers.h -index 1cef215..1c8e652 100644 ---- a/src/lxc/syscall_wrappers.h -+++ b/src/lxc/syscall_wrappers.h -@@ -62,7 +62,7 @@ extern int memfd_create(const char *name, unsigned int flags); - #endif - - #ifndef HAVE_PIVOT_ROOT --static int pivot_root(const char *new_root, const char *put_old) -+static inline int pivot_root(const char *new_root, const char *put_old) - { - return syscall(__NR_pivot_root, new_root, put_old); - } -diff --git a/src/lxc/utils.c b/src/lxc/utils.c -index 95c00cf..b39b6a8 100644 ---- a/src/lxc/utils.c -+++ b/src/lxc/utils.c -@@ -2081,7 +2081,10 @@ void lxc_write_error_message(int errfd, const char *format, ...) - return; - - va_start(argp, format); -+#pragma GCC diagnostic push -+#pragma GCC diagnostic ignored "-Wformat-nonliteral" - ret = vsnprintf(errbuf, BUFSIZ, format, argp); -+#pragma GCC diagnostic pop - va_end(argp); - if (ret < 0 || ret >= BUFSIZ) - SYSERROR("Failed to call vsnprintf"); -@@ -2210,30 +2213,20 @@ out: - // isulad: set env home in container - int lxc_setup_env_home(uid_t uid) - { --#define __PASSWD_FILE__ "/etc/passwd" - char *homedir = "/"; // default home dir is / -- FILE *stream = NULL; - struct passwd pw, *pwbufp = NULL; - char buf[BUFSIZ]; -+ int ret; - -- stream = fopen_cloexec(__PASSWD_FILE__, "r"); -- if (stream == NULL) { -- SYSWARN("Failed to open %s", __PASSWD_FILE__); -+ ret = getpwuid_r(uid, &pw, buf, sizeof(buf), &pwbufp); -+ if ((ret == 0) && (pwbufp != NULL) && (pwbufp->pw_uid == uid)) { -+ homedir = pwbufp->pw_dir; - goto set_env; - } - -- while (fgetpwent_r(stream, &pw, buf, sizeof(buf), &pwbufp) == 0 && pwbufp != NULL) { -- if (pwbufp->pw_uid == uid) { -- homedir = pwbufp->pw_dir; -- goto set_env; -- } -- } - WARN("User invalid, can not find user '%u'", uid); - - set_env: -- if (stream) -- fclose(stream); -- - // if we didn't configure HOME, set it based on uid - if (setenv("HOME", homedir, 0) < 0) { - SYSERROR("Unable to set env 'HOME'"); --- -2.32.0 (Apple Git-132) - diff --git a/0037-fix-always-print-and-temp-len.patch b/0037-fix-always-print-and-temp-len.patch deleted file mode 100644 index b9ccdf5..0000000 --- a/0037-fix-always-print-and-temp-len.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 178d09524a346cab9ca1f9eb939e35945b7bfbb0 Mon Sep 17 00:00:00 2001 -From: chegJH -Date: Mon, 21 Feb 2022 19:14:56 +0800 -Subject: [PATCH] fix always print and temp len - -Signed-off-by: chegJH ---- - src/lxc/confile_utils.h | 2 +- - src/lxc/lxclock.c | 1 + - 2 files changed, 2 insertions(+), 1 deletion(-) - -diff --git a/src/lxc/confile_utils.h b/src/lxc/confile_utils.h -index 62990e9..3655ec4 100644 ---- a/src/lxc/confile_utils.h -+++ b/src/lxc/confile_utils.h -@@ -13,7 +13,7 @@ - if (str) \ - len = snprintf(str, inlen, ##__VA_ARGS__); \ - else \ -- len = snprintf((char *){""}, 0, ##__VA_ARGS__); \ -+ len = 0; \ - if (len < 0) { \ - SYSERROR("failed to create string"); \ - return -1; \ -diff --git a/src/lxc/lxclock.c b/src/lxc/lxclock.c -index d65c614..9c9b57c 100644 ---- a/src/lxc/lxclock.c -+++ b/src/lxc/lxclock.c -@@ -177,6 +177,7 @@ struct lxc_lock *lxc_newlock(const char *lxcpath, const char *name) - } - - l->u.f.fd = -1; -+ return l; - - on_error: - fprintf(stderr, "Failed to create lock for %s, path %s\n", name, lxcpath); --- -2.32.0 (Apple Git-132) - diff --git a/0038-just-print-error-when-new-lock-failed.patch b/0038-just-print-error-when-new-lock-failed.patch deleted file mode 100644 index d6d9037..0000000 --- a/0038-just-print-error-when-new-lock-failed.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 456d154a6e0a34ac8e4474408ea02f2e0ec6e194 Mon Sep 17 00:00:00 2001 -From: haozi007 -Date: Thu, 17 Mar 2022 02:39:46 +0000 -Subject: [PATCH] just print error when new lock failed - -Signed-off-by: haozi007 ---- - src/lxc/lxclock.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/lxc/lxclock.c b/src/lxc/lxclock.c -index 9c9b57c..7114fc5 100644 ---- a/src/lxc/lxclock.c -+++ b/src/lxc/lxclock.c -@@ -177,10 +177,10 @@ struct lxc_lock *lxc_newlock(const char *lxcpath, const char *name) - } - - l->u.f.fd = -1; -- return l; - - on_error: -- fprintf(stderr, "Failed to create lock for %s, path %s\n", name, lxcpath); -+ if (l == NULL) -+ fprintf(stderr, "Failed to create lock for %s, path %s\n", name, lxcpath); - return l; - } - --- -2.20.1 - diff --git a/0039-fix-bug-of-memory-free.patch b/0039-fix-bug-of-memory-free.patch deleted file mode 100644 index c38bbfb..0000000 --- a/0039-fix-bug-of-memory-free.patch +++ /dev/null @@ -1,98 +0,0 @@ -From b235b7526f452dab2db7f9de71ea27b3dfacde1a Mon Sep 17 00:00:00 2001 -From: wujing -Date: Sat, 9 Apr 2022 15:15:02 +0800 -Subject: [PATCH] fix bug of memory free - -Signed-off-by: wujing ---- - src/lxc/conf.c | 27 ++++++++++----------------- - 1 file changed, 10 insertions(+), 17 deletions(-) - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 19e193dd..4ef154e6 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -2604,70 +2604,63 @@ static int check_mount_destination(const char *rootfs, const char *dest, const c - const char **invalid = NULL; - - for(valid = valid_destinations; *valid != NULL; valid++) { -- char *fullpath = NULL; -- char *relpath = NULL; -+ __do_free char *fullpath = NULL; -+ __do_free char *relpath = NULL; - const char *parts[3] = { - rootfs, - *valid, - NULL - }; - fullpath = lxc_string_join("/", parts, false); -- if (!fullpath) { -+ if (fullpath == NULL) { - ERROR("Out of memory"); - return -1; - } - relpath = path_relative(fullpath, dest); -- free(fullpath); -- if (!relpath) -+ if (relpath == NULL) { -+ ERROR("Failed to get relpath for %s related to %s", dest, fullpath); - return -1; -+ } - if (!strcmp(relpath, ".")) { -- free(relpath); - return 0; - } -- free(relpath); - } - - for(invalid = invalid_destinations; *invalid != NULL; invalid++) { -- char *fullpath = NULL; -- char *relpath = NULL; -+ __do_free char *fullpath = NULL; -+ __do_free char *relpath = NULL; - const char *parts[3] = { - rootfs, - *invalid, - NULL - }; - fullpath = lxc_string_join("/", parts, false); -- if (!fullpath) { -+ if (fullpath == NULL) { - ERROR("Out of memory"); - return -1; - } - relpath = path_relative(fullpath, dest); - DEBUG("dst path %s get relative path %s with full path %s,src:%s", dest, relpath, fullpath, src); -- free(fullpath); -- if (!relpath) { -+ if (relpath == NULL) { - ERROR("Failed to get relpath for %s related to %s", dest, fullpath); - return -1; - } - // pass if the mount path is outside of invalid proc - if (strncmp(relpath, "..", 2) == 0) { -- free(relpath); - continue; - } - if (strcmp(relpath, ".") == 0) { - if (src == NULL) { -- free(relpath); - continue; - } - // pass if the mount on top of /proc and the source of the mount is a proc filesystem - if (has_fs_type(src, PROC_SUPER_MAGIC)) { - WARN("src %s is proc allow mount on-top of %s", src, *invalid); -- free(relpath); - continue; - } - ERROR("%s cannot be mounted because it is located inside %s", dest, *invalid); -- free(relpath); - return -1; - } -- free(relpath); - } - - return 0; --- -2.35.1 - diff --git a/0040-refactor-the-way-to-convert-selinux-label-to-shared.path b/0040-refactor-the-way-to-convert-selinux-label-to-shared.path deleted file mode 100644 index 476ed81..0000000 --- a/0040-refactor-the-way-to-convert-selinux-label-to-shared.path +++ /dev/null @@ -1,107 +0,0 @@ -From 70e7dd0da58071557c897fbce2f48c8169633a54 Mon Sep 17 00:00:00 2001 -From: wujing -Date: Fri, 15 Apr 2022 11:11:38 +0800 -Subject: [PATCH] Refactor the way to convert selinux label to shared mode - -Signed-off-by: wujing ---- - src/lxc/lsm/selinux.c | 58 ++++++++++++++++++++++++++++++++++--------- - 1 file changed, 46 insertions(+), 12 deletions(-) - -diff --git a/src/lxc/lsm/selinux.c b/src/lxc/lsm/selinux.c -index 79697c5..0a1e205 100644 ---- a/src/lxc/lsm/selinux.c -+++ b/src/lxc/lsm/selinux.c -@@ -230,15 +230,11 @@ static int selinux_chcon(const char *fpath, const char *label, bool recurse) - { - struct stat s_buf; - -- if (fpath == NULL) { -- ERROR("Empty file path"); -+ if (fpath == NULL || label == NULL) { -+ ERROR("Invalid parameters!"); - return -1; - } - -- if (label == NULL) { -- return 0; -- } -- - if (bad_prefix(fpath) != 0) { - return -1; - } -@@ -257,6 +253,42 @@ static int selinux_chcon(const char *fpath, const char *label, bool recurse) - return 0; - } - -+/* -+ * convert_context_to_share_mode: set sensitivity to s0 and remove categories -+ * user:role:type:sensitivity[:categories] => user:role:type:s0 -+ * -+ * @label : label string -+ * -+ * Returns label with share mode on success, NULL on failure -+ */ -+static char *convert_context_to_share_mode(const char *label) { -+ __do_free char *converted_label = strdup(label); -+ char *s = converted_label; -+ const char *shared_level = "s0"; -+ int cnt = 0; -+ -+ // selinux label format: user:role:type:sensitivity[:categories] -+ // locates the ":" position in front of the sensitivity -+ while (cnt++ < 3 && (s = strchr(s, ':')) != NULL) { -+ s++; -+ } -+ -+ // make sure sensitivity can set s0 value -+ if (s == NULL || strlen(s) < strlen(shared_level)) { -+ ERROR("Invalid selinux file context: %s", label); -+ return NULL; -+ } -+ -+ if (strcmp(s, shared_level) == 0) { -+ return move_ptr(converted_label); -+ } -+ -+ *s = '\0'; -+ strcat(converted_label, shared_level); -+ -+ return move_ptr(converted_label); -+} -+ - /* - * selinux_relabel: Relabel changes the label of path to the filelabel string. - * It changes the MCS label to s0 if shared is true. -@@ -280,20 +312,22 @@ static int selinux_relabel(const char *path, const char *label, bool shared) - return 0; - } - -- tmp_file_label = strdup(label); - if (is_exclude_relabel_path(path)) { - ERROR("SELinux relabeling of %s is not allowed", path); - return -1; - } - - if (shared) { -- context_t c = context_new(label); -- context_range_set(c, "s0"); -- free(tmp_file_label); -- tmp_file_label = strdup(context_str(c)); -- context_free(c); -+ tmp_file_label = convert_context_to_share_mode(label); -+ if (tmp_file_label == NULL) { -+ ERROR("Failed to convert context to share mode: %s", label); -+ return -1; -+ } -+ } else { -+ tmp_file_label = strdup(label); - } - -+ - if (selinux_chcon(path, tmp_file_label, true) != 0) { - ERROR("Failed to modify %s's selinux context: %s", path, tmp_file_label); - return -1; --- -2.35.1 - diff --git a/0041-do-not-free-the-pointer-returned-by-dirname.patch b/0041-do-not-free-the-pointer-returned-by-dirname.patch deleted file mode 100644 index 28e1da5..0000000 --- a/0041-do-not-free-the-pointer-returned-by-dirname.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 11621ec06e911395c9bb3b5ae5d8f47cfc02ce3e Mon Sep 17 00:00:00 2001 -From: WangFengTu -Date: Thu, 21 Apr 2022 15:59:11 +0800 -Subject: [PATCH] do not free the pointer returned by dirname - -Signed-off-by: WangFengTu ---- - src/lxc/conf.c | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/src/lxc/conf.c b/src/lxc/conf.c -index 7c2619c..cd9e818 100644 ---- a/src/lxc/conf.c -+++ b/src/lxc/conf.c -@@ -4258,6 +4258,7 @@ static int setup_populate_devs(const struct lxc_rootfs *rootfs, struct lxc_list - INFO("Populating devices into container"); - cur_mask = umask(0000); - lxc_list_for_each(it, devs) { -+ __do_free char *tmp_path = NULL; - ret = 0; - dev_elem = it->elem; - -@@ -4268,10 +4269,9 @@ static int setup_populate_devs(const struct lxc_rootfs *rootfs, struct lxc_list - } - - /* create any missing directories */ -- pathdirname = safe_strdup(path); -- pathdirname = dirname(pathdirname); -+ tmp_path = safe_strdup(path); -+ pathdirname = dirname(tmp_path); - ret = mkdir_p(pathdirname, 0755); -- free(pathdirname); - if (ret < 0) { - WARN("Failed to create target directory"); - ret = -1; --- -2.25.1 - diff --git a/0042-add-x-permission-when-create-directory.patch b/0042-add-x-permission-when-create-directory.patch deleted file mode 100644 index 2ba3593..0000000 --- a/0042-add-x-permission-when-create-directory.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 98d47f6ab07bbf28c6a053658628b47ef7a430ab Mon Sep 17 00:00:00 2001 -From: WangFengTu -Date: Sat, 21 May 2022 16:21:38 +0800 -Subject: [PATCH] add x permission when create directory - -Signed-off-by: WangFengTu ---- - src/lxc/commands.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/lxc/commands.c b/src/lxc/commands.c -index b954453..b79fc3d 100644 ---- a/src/lxc/commands.c -+++ b/src/lxc/commands.c -@@ -1703,7 +1703,7 @@ int lxc_cmd_init(const char *name, const char *lxcpath, const char *suffix) - if (runtime_sock_dir == NULL) - return -1; - -- if (mkdir_p(runtime_sock_dir, 0600) < 0) -+ if (mkdir_p(runtime_sock_dir, 0700) < 0) - return log_error_errno(-1, errno, "Failed to create container runtime unix sock directory %s", path); - - if (generate_named_unix_sock_path(name, suffix, path, sizeof(path)) != 0) --- -2.25.1 - diff --git a/0043-do-not-operate-playload-and-attach-cgroup-if-no-cont.patch b/0043-do-not-operate-playload-and-attach-cgroup-if-no-cont.patch deleted file mode 100644 index 102b524..0000000 --- a/0043-do-not-operate-playload-and-attach-cgroup-if-no-cont.patch +++ /dev/null @@ -1,128 +0,0 @@ -From c080da6dda7a47de8ccb5cc3eabec6e5b2e4c649 Mon Sep 17 00:00:00 2001 -From: WangFengTu -Date: Mon, 23 May 2022 19:00:28 +0800 -Subject: [PATCH] do not operate playload and attach cgroup if no controller - -Signed-off-by: WangFengTu ---- - src/lxc/cgroups/cgroup.h | 1 + - src/lxc/cgroups/isulad_cgfsng.c | 48 ++++++++++++++++++++++++++++++++- - 2 files changed, 48 insertions(+), 1 deletion(-) - -diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h -index a9048c4..8b18c1e 100644 ---- a/src/lxc/cgroups/cgroup.h -+++ b/src/lxc/cgroups/cgroup.h -@@ -104,6 +104,7 @@ struct cgroup_ops { - - #ifdef HAVE_ISULAD - int errfd; -+ bool no_controller; - #endif - - /* @hierarchies -diff --git a/src/lxc/cgroups/isulad_cgfsng.c b/src/lxc/cgroups/isulad_cgfsng.c -index 576b424..8a9656a 100644 ---- a/src/lxc/cgroups/isulad_cgfsng.c -+++ b/src/lxc/cgroups/isulad_cgfsng.c -@@ -677,6 +677,13 @@ __cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops, - return false; - } - -+#ifdef HAVE_ISULAD -+ if (ops->no_controller) { -+ DEBUG("no controller found, isgnore isulad_cgfsng_payload_destroy"); -+ return true; -+ } -+#endif -+ - if (!ops->hierarchies) { - return false; - } -@@ -934,6 +941,13 @@ __cgfsng_ops static inline bool isulad_cgfsng_payload_create(struct cgroup_ops * - int i; - char *container_cgroup = ops->container_cgroup; - -+#ifdef HAVE_ISULAD -+ if (ops->no_controller) { -+ DEBUG("no controller found, isgnore isulad_cgfsng_payload_create"); -+ return true; -+ } -+#endif -+ - if (!container_cgroup) { - ERROR("cgfsng_create container_cgroup is invalid"); - return false; -@@ -964,6 +978,13 @@ __cgfsng_ops static bool isulad_cgfsng_payload_enter(struct cgroup_ops *ops, - if (!ops) - return ret_set_errno(false, ENOENT); - -+#ifdef HAVE_ISULAD -+ if (ops->no_controller) { -+ DEBUG("no controller found, isgnore isulad_cgfsng_payload_enter"); -+ return true; -+ } -+#endif -+ - if (!ops->hierarchies) - return true; - -@@ -1121,6 +1142,13 @@ __cgfsng_ops void isulad_cgfsng_payload_finalize(struct cgroup_ops *ops) - if (!ops) - return; - -+#ifdef HAVE_ISULAD -+ if (ops->no_controller) { -+ DEBUG("no controller found, isgnore isulad_cgfsng_payload_finalize"); -+ return; -+ } -+#endif -+ - if (!ops->hierarchies) - return; - -@@ -2010,6 +2038,13 @@ __cgfsng_ops static bool isulad_cgfsng_attach(struct cgroup_ops *ops, - if (!ops) - return ret_set_errno(false, ENOENT); - -+#ifdef HAVE_ISULAD -+ if (ops->no_controller) { -+ DEBUG("no controller found, isgnore isulad_cgfsng_attach"); -+ return true; -+ } -+#endif -+ - if (!ops->hierarchies) - return true; - -@@ -2781,6 +2816,13 @@ __cgfsng_ops bool isulad_cgfsng_payload_delegate_controllers(struct cgroup_ops * - if (!ops) - return ret_set_errno(false, ENOENT); - -+#ifdef HAVE_ISULAD -+ if (ops->no_controller) { -+ DEBUG("no controller found, isgnore isulad_cgfsng_payload_delegate_controllers"); -+ return true; -+ } -+#endif -+ - return __cgfsng_delegate_controllers(ops, ops->container_cgroup); - } - -@@ -3041,8 +3083,12 @@ static int cg_unified_init(struct cgroup_ops *ops, bool relative, - delegatable = cg_unified_get_controllers(subtree_path); - if (!delegatable) - delegatable = cg_unified_make_empty_controller(); -- if (!delegatable[0]) -+ if (!delegatable[0]) { - TRACE("No controllers are enabled for delegation"); -+#ifdef HAVE_ISULAD -+ ops->no_controller = true; -+#endif -+ } - - /* TODO: If the user requested specific controllers via lxc.cgroup.use - * we should verify here. The reason I'm not doing it right is that I'm --- -2.25.1 - diff --git a/lxc.spec b/lxc.spec index ed79a7d..f2c7992 100644 --- a/lxc.spec +++ b/lxc.spec @@ -1,4 +1,4 @@ -%global _release 2022052501 +%global _release 2022071501 Name: lxc Version: 4.0.3 @@ -8,49 +8,7 @@ License: LGPLv2+ and GPLv2 and GPLv3 URL: https://github.com/lxc/lxc Source0: https://linuxcontainers.org/downloads/lxc/lxc-4.0.3.tar.gz -Patch0001: 0001-huawei-adapt-to-huawei-4.0.3.patch -Patch0002: 0002-add-mount-label-for-rootfs.patch -Patch0003: 0003-format-code-and-verify-mount-mode.patch -Patch0004: 0004-Removes-the-definition-of-the-thread-attributes-obje.patch -Patch0005: 0005-solve-coredump-bug-caused-by-fstype-being-NULL-durin.patch -Patch0006: 0006-SIGTERM-do-not-catch-signal-SIGTERM-in-lxc-monitor.patch -Patch0007: 0007-Using-string-type-instead-of-security_context_t-beca.patch -Patch0008: 0008-hook-pass-correct-mount-dir-as-root-to-hook.patch -Patch0009: 0009-cgroup-refact-cgroup-manager-to-single-file.patch -Patch0010: 0010-cgfsng-adjust-log-level-from-error-to-warn.patch -Patch0011: 0011-rootfs-add-make-private-for-root.path-parent.patch -Patch0012: 0012-mount-make-possible-to-bind-mount-proc-and-sys-fs.patch -Patch0013: 0013-use-path-based-unix-domain-sockets-instead-of-abstra.patch -Patch0014: 0014-api-add-get-container-metrics-api.patch -Patch0015: 0015-Streaming-IO-solution-optimization-and-enhancement.patch -Patch0016: 0016-avoid-using-void-pointers-in-caclulation.patch -Patch0017: 0017-fix-compilation-errors-without-libcap.patch -Patch0018: 0018-IO-fix-io-data-miss-when-exec-with-pipes.patch -Patch0019: 0019-metrics-add-total_inactive_file-metric-for-memory.patch -Patch0020: 0020-support-cgroup-v2.patch -Patch0021: 0021-support-isula-exec-workdir.patch -Patch0022: 0022-print-error-message-if-process-workdir-failed.patch -Patch0023: 0023-log-support-long-syslog-tag.patch -Patch0024: 0024-log-adjust-log-level-from-error-to-warn.patch -Patch0025: 0025-get-cgroup-data-len-first-and-malloc-read-buff-by-le.patch -Patch0026: 0026-coredump-fix-coredump-when-cgroup-get-return-error.patch -Patch0027: 0027-add-help-for-new-arguments.patch -Patch0028: 0028-seccomp-init-and-destroy-notifier.cookie.patch -Patch0029: 0029-just-use-origin-loop-if-do-not-have-io.patch -Patch0030: 0030-conf-fix-a-memory-leak.patch -Patch0031: 0031-fix-lsm_se_mount_context-memory-leak.patch -Patch0032: 0032-disable-lxc_keep-with-oci-image.patch -Patch0033: 0033-conf-ensure-that-the-idmap-pointer-itself-is-freed.patch -Patch0034: 0034-cgfsng-fix-cgroup-attach-cgroup-creation.patch -Patch0035: 0035-adapt-upstream-compiler-settings.patch -Patch0036: 0036-compile-in-android-env.patch -Patch0037: 0037-fix-always-print-and-temp-len.patch -Patch0038: 0038-just-print-error-when-new-lock-failed.patch -Patch0039: 0039-fix-bug-of-memory-free.patch -Patch0040: 0040-refactor-the-way-to-convert-selinux-label-to-shared.path -Patch0041: 0041-do-not-free-the-pointer-returned-by-dirname.patch -Patch0042: 0042-add-x-permission-when-create-directory.patch -Patch0043: 0043-do-not-operate-playload-and-attach-cgroup-if-no-cont.patch +Patch0001: 0001-refactor-patch-code-of-utils-commands-and-so-on.patch BuildRequires: systemd-units git libtool graphviz docbook2X doxygen chrpath BuildRequires: pkgconfig(libseccomp) @@ -222,6 +180,12 @@ make check %{_mandir}/*/man7/%{name}* %changelog +* Fri Jul 15 2022 zhangxiaoyu - 4.0.3-2022071501 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC: refactor patch code of utils commands and so on + * Wed May 25 2022 hejunjie - 4.0.3-2022052501 - Type:bugfix - ID:NA -- Gitee