diff --git a/0004-iSulad-adapt-confile-lxccontainer-and-start.patch b/0004-iSulad-adapt-confile-lxccontainer-and-start.patch
new file mode 100644
index 0000000000000000000000000000000000000000..92508980e8200ea326c5d1bf1d3bee7da514e323
--- /dev/null
+++ b/0004-iSulad-adapt-confile-lxccontainer-and-start.patch
@@ -0,0 +1,3310 @@
+From 3e7fb35a35cff34be2bb7ace0b239d540fe0657f Mon Sep 17 00:00:00 2001
+From: zhangxiaoyu <zhangxiaoyu58@huawei.com>
+Date: Wed, 26 Jul 2023 14:57:33 +0800
+Subject: [PATCH] [iSulad] adapt confile lxccontainer and start
+
+Signed-off-by: zhangxiaoyu <zhangxiaoyu58@huawei.com>
+---
+ src/lxc/conf.c         |  11 -
+ src/lxc/conf.h         |   4 -
+ src/lxc/confile.c      | 558 +++++++++++++++++++++++++
+ src/lxc/lxccontainer.c | 899 +++++++++++++++++++++++++++++++++++++++-
+ src/lxc/lxccontainer.h | 197 +++++++++
+ src/lxc/start.c        | 902 +++++++++++++++++++++++++++++++++++++++++
+ src/lxc/start.h        |  18 +
+ 7 files changed, 2573 insertions(+), 16 deletions(-)
+
+diff --git a/src/lxc/conf.c b/src/lxc/conf.c
+index a0e0375..187e60e 100644
+--- a/src/lxc/conf.c
++++ b/src/lxc/conf.c
+@@ -5242,7 +5242,6 @@ void lxc_conf_free(struct lxc_conf *conf)
+ 	}
+ 	free(conf->systemd);
+ 	lxc_clear_init_args(conf);
+-	lxc_clear_init_groups(conf);
+ 	lxc_clear_populate_devices(conf);
+ 	lxc_clear_rootfs_masked_paths(conf);
+ 	lxc_clear_rootfs_ro_paths(conf);
+@@ -7427,16 +7426,6 @@ int lxc_clear_init_args(struct lxc_conf *lxc_conf)
+ 	return 0;
+ }
+ 
+-/*isulad clear init groups*/
+-int lxc_clear_init_groups(struct lxc_conf *lxc_conf)
+-{
+-	free(lxc_conf->init_groups);
+-	lxc_conf->init_groups = NULL;
+-	lxc_conf->init_groups_len = 0;
+-
+-	return 0;
+-}
+-
+ /*isulad: clear populate devices*/
+ int lxc_clear_populate_devices(struct lxc_conf *c)
+ {
+diff --git a/src/lxc/conf.h b/src/lxc/conf.h
+index 683b8ba..108e05b 100644
+--- a/src/lxc/conf.h
++++ b/src/lxc/conf.h
+@@ -622,9 +622,6 @@ struct lxc_conf {
+ 	char **init_argv;
+ 	size_t init_argc;
+ 
+-	gid_t *init_groups;
+-	size_t init_groups_len;
+-
+ 	/* populate devices */
+ 	struct lxc_list populate_devs;
+ 	mode_t umask;  // umask value
+@@ -794,7 +791,6 @@ __hidden extern int parse_cap(const char *cap_name, __u32 *cap);
+ #ifdef HAVE_ISULAD
+ // isulad add
+ __hidden int lxc_clear_init_args(struct lxc_conf *lxc_conf);
+-__hidden int lxc_clear_init_groups(struct lxc_conf *lxc_conf);
+ __hidden int lxc_clear_populate_devices(struct lxc_conf *c);
+ __hidden int lxc_clear_rootfs_masked_paths(struct lxc_conf *c);
+ __hidden int lxc_clear_rootfs_ro_paths(struct lxc_conf *c);
+diff --git a/src/lxc/confile.c b/src/lxc/confile.c
+index 7966d32..1492776 100644
+--- a/src/lxc/confile.c
++++ b/src/lxc/confile.c
+@@ -157,6 +157,18 @@ lxc_config_define(uts_name);
+ lxc_config_define(sysctl);
+ lxc_config_define(proc);
+ lxc_config_define(sched_core);
++#ifdef HAVE_ISULAD
++lxc_config_define(init_args);
++lxc_config_define(populate_device);
++lxc_config_define(umask);
++lxc_config_define(rootfs_masked_paths);
++lxc_config_define(rootfs_ro_paths);
++lxc_config_define(systemd);
++lxc_config_define(console_log_driver);
++lxc_config_define(console_syslog_tag);
++lxc_config_define(console_syslog_facility);
++lxc_config_define(selinux_mount_context);
++#endif
+ 
+ static int set_config_unsupported_key(const char *key, const char *value,
+ 				      struct lxc_conf *lxc_conf, void *data)
+@@ -274,6 +286,18 @@ static struct lxc_config_t config_jump_table[] = {
+ 	{ "lxc.uts.name",                   true,  set_config_uts_name,                   get_config_uts_name,                   clr_config_uts_name,                   },
+ 	{ "lxc.sysctl",                     false, set_config_sysctl,                     get_config_sysctl,                     clr_config_sysctl,                     },
+ 	{ "lxc.proc",                       false, set_config_proc,                       get_config_proc,                       clr_config_proc,                       },
++#ifdef HAVE_ISULAD
++	{ "lxc.isulad.init.args",          set_config_init_args,                   get_config_init_args,                   clr_config_init_args,                 },
++	{ "lxc.isulad.populate.device",    set_config_populate_device,             get_config_populate_device,             clr_config_populate_device,           },
++	{ "lxc.isulad.umask",              set_config_umask,                       get_config_umask,                       clr_config_umask,                     },
++	{ "lxc.isulad.rootfs.maskedpaths", set_config_rootfs_masked_paths,         get_config_rootfs_masked_paths,         clr_config_rootfs_masked_paths,       },
++	{ "lxc.isulad.rootfs.ropaths",     set_config_rootfs_ro_paths,             get_config_rootfs_ro_paths,             clr_config_rootfs_ro_paths,           },
++	{ "lxc.isulad.systemd",            set_config_systemd,                     get_config_systemd,                     clr_config_systemd,                   },
++	{ "lxc.console.logdriver",         set_config_console_log_driver,          get_config_console_log_driver,          clr_config_console_log_driver,        },
++	{ "lxc.console.syslog_tag",        set_config_console_syslog_tag,          get_config_console_syslog_tag,          clr_config_console_syslog_tag,        },
++	{ "lxc.console.syslog_facility",   set_config_console_syslog_facility,     get_config_console_syslog_facility,     clr_config_console_syslog_facility,   },
++	{ "lxc.selinux.mount_context",     set_config_selinux_mount_context,       get_config_selinux_mount_context,       clr_config_selinux_mount_context,     },
++#endif
+ };
+ 
+ static struct lxc_config_t unsupported_config_key = {
+@@ -1588,7 +1612,12 @@ static int set_config_environment(const char *key, const char *value,
+ 	if (!new_env)
+ 		return ret_errno(ENOMEM);
+ 
++#ifdef HAVE_ISULAD
++	/* isulad: recover space replaced by SPACE_MAGIC_STR */
++	dup = lxc_string_replace(SPACE_MAGIC_STR, " ", value);
++#else
+ 	dup = strdup(value);
++#endif
+ 	if (!dup)
+ 		return ret_errno(ENOMEM);
+ 
+@@ -2558,8 +2587,11 @@ static int set_config_console_rotate(const char *key, const char *value,
+ 	if (ret)
+ 		return ret_errno(EINVAL);
+ 
++#ifndef HAVE_ISULAD
++	/* isulad: support rotate muti-files */
+ 	if (lxc_conf->console.log_rotate > 1)
+ 		return log_error_errno(-EINVAL, EINVAL, "The \"lxc.console.rotate\" config key can only be set to 0 or 1");
++#endif
+ 
+ 	return 0;
+ }
+@@ -3049,6 +3081,54 @@ struct parse_line_conf {
+ 	bool from_include;
+ };
+ 
++#ifdef HAVE_ISULAD
++// escape_string_decode compress some escape characters
++static char *escape_string_decode(const char *src)
++{
++	size_t src_end = 0;
++	size_t dst_end = 0;
++	size_t len = 0;
++	char *dst = NULL;
++
++	if (src == NULL) {
++		return NULL;
++	}
++
++	len = strlen(src);
++	if (len == 0) {
++		return NULL;
++	}
++
++	dst = calloc(1, len + 1);
++	if (dst == NULL) {
++		ERROR("Out of memory");
++		return NULL;
++	}
++
++	while(src_end < len) {
++		if (src[src_end] == '\\') {
++			switch (src[++src_end])
++			{
++				case 'r': dst[dst_end] = '\r'; break;
++				case 'n': dst[dst_end] = '\n'; break;
++				case 'f': dst[dst_end] = '\f'; break;
++				case 'b': dst[dst_end] = '\b'; break;
++				case 't': dst[dst_end] = '\t'; break;
++				case '\\': dst[dst_end] = '\\'; break;
++				// default do not decode
++				default: dst[dst_end++] = '\\'; dst[dst_end] = src[src_end]; break;
++			}
++		} else {
++			dst[dst_end] = src[src_end];
++		}
++		dst_end++;
++		src_end++;
++	}
++
++	return dst;
++}
++#endif
++
+ static int parse_line(char *buffer, void *data)
+ {
+ 	__do_free char *linep = NULL;
+@@ -3058,6 +3138,9 @@ static int parse_line(char *buffer, void *data)
+ 	int ret;
+ 	char *dup = buffer;
+ 	struct parse_line_conf *plc = data;
++#ifdef HAVE_ISULAD
++	__do_free char *value_decode = NULL;
++#endif
+ 
+ 	if (!plc->conf)
+ 		return syserror_set(-EINVAL, "Missing config");
+@@ -3118,7 +3201,15 @@ static int parse_line(char *buffer, void *data)
+ 	}
+ 
+ 	config = lxc_get_config(key);
++#ifdef HAVE_ISULAD
++	value_decode = escape_string_decode(value);
++	if (value_decode == NULL) {
++		ERROR("Value %s decode failed", value);
++	}
++	ret = config->set(key, value_decode ? value_decode: value, plc->conf, NULL);
++#else
+ 	return config->set(key, value, plc->conf, NULL);
++#endif
+ }
+ 
+ static struct new_config_item *parse_new_conf_line(char *buffer)
+@@ -3222,6 +3313,12 @@ bool lxc_config_define_load(struct lxc_list *defines, struct lxc_container *c)
+ 
+ 	lxc_list_for_each(it, defines) {
+ 		struct new_config_item *new_item = it->elem;
++#ifdef HAVE_ISULAD
++		if (strcmp(new_item->key, LXC_IMAGE_OCI_KEY) == 0) {
++			c->set_oci_type(c, true);
++			continue;
++		}
++#endif
+ 		bret = c->set_config_item(c, new_item->key, new_item->val);
+ 		if (!bret)
+ 			break;
+@@ -6764,3 +6861,464 @@ static int clr_config_sched_core(const char *key, struct lxc_conf *c, void *data
+ 	c->sched_core = false;
+ 	return 0;
+ }
++
++
++#ifdef HAVE_ISULAD
++/* isulad: set config for init args */
++static int set_config_init_args(const char *key, const char *value,
++				 struct lxc_conf *lxc_conf, void *data)
++{
++	int ret = 0;
++	char **tmp = NULL;
++	char *new_value = NULL;
++
++	ret = set_config_string_item(&new_value, value);
++	if (ret || !new_value)
++		return ret;
++
++	tmp = (char **)realloc(lxc_conf->init_argv, (lxc_conf->init_argc + 1) * sizeof(char *));
++	if (!tmp) {
++		ERROR("Out of memory");
++		free(new_value);
++		return -1;
++	}
++
++	lxc_conf->init_argv = tmp;
++
++	lxc_conf->init_argv[lxc_conf->init_argc] = new_value;
++	lxc_conf->init_argc++;
++
++	return 0;
++}
++
++/* isulad: get config init args */
++static int get_config_init_args(const char *key, char *retv, int inlen,
++				  struct lxc_conf *c, void *data)
++{
++	int i, len, fulllen = 0;
++
++	if (!retv)
++		inlen = 0;
++	else
++		memset(retv, 0, inlen);
++
++	for (i = 0; i < c->init_argc; i++) {
++           strprint(retv, inlen, "%s", c->init_argv[i]);
++    }
++
++	return fulllen;
++}
++
++/* isulad: clr config init args*/
++static inline int clr_config_init_args(const char *key, struct lxc_conf *c,
++				   void *data)
++{
++	return lxc_clear_init_args(c);
++}
++
++/* isulad: set config for populate device */
++static int set_config_populate_device(const char *key, const char *value,
++                                      struct lxc_conf *lxc_conf, void *data)
++{
++	int ret = 0, major = 0, minor = 0;
++	uid_t uid = (uid_t)-1;
++	gid_t gid = (gid_t)-1;
++	char name[4096] = {0}; /* MAX dev path name */
++	char type[3] = {0};
++	char *replace_value = NULL;
++	mode_t filemode = 0;
++	struct lxc_list *iter = NULL;
++	struct lxc_list *dev_list = NULL;
++	struct lxc_populate_devs *dev_elem = NULL;
++
++	if (lxc_config_value_empty(value))
++		return lxc_clear_populate_devices(lxc_conf);
++
++	/* lxc.populate.device = PATH_IN_CONTAINER:DEVICETYPE:MAJOR:MINOR:MODE:UID:GID
++	 * For e.g. lxc.populate.device = /dev/sda:b:8:0:0666:0:0
++	 */
++	ret = sscanf(value, "%4095[^:]:%2[^:]:%i:%i:%i:%u:%u", name, type, &major, &minor, &filemode, &uid, &gid);
++	if (ret != 7)
++		return -1;
++
++	/* find existing list element */
++	lxc_list_for_each(iter, &lxc_conf->populate_devs) {
++		dev_elem = iter->elem;
++
++		if (strcmp(name, dev_elem->name) != 0)
++			continue;
++
++		replace_value = safe_strdup(type);
++
++		free(dev_elem->type);
++		dev_elem->type = replace_value;
++		dev_elem->file_mode = filemode;
++		dev_elem->maj = major;
++		dev_elem->min = minor;
++		dev_elem->uid = (uid_t)uid;
++		dev_elem->gid = (gid_t)gid;
++		return 0;
++	}
++
++	/* allocate list element */
++	dev_list = malloc(sizeof(*dev_list));
++	if (dev_list == NULL)
++		goto on_error;
++
++	lxc_list_init(dev_list);
++
++	dev_elem = malloc(sizeof(*dev_elem));
++	if (dev_elem == NULL)
++		goto on_error;
++	memset(dev_elem, 0, sizeof(*dev_elem));
++
++	dev_elem->name = safe_strdup(name);
++
++	dev_elem->type = safe_strdup(type);
++
++	dev_elem->file_mode = filemode;
++	dev_elem->maj = major;
++	dev_elem->min = minor;
++	dev_elem->uid = (uid_t)uid;
++	dev_elem->gid = (gid_t)gid;
++
++	lxc_list_add_elem(dev_list, dev_elem);
++
++	lxc_list_add_tail(&lxc_conf->populate_devs, dev_list);
++
++	return 0;
++
++on_error:
++	free(dev_list);
++	if (dev_elem) {
++		free(dev_elem->name);
++		free(dev_elem->type);
++		free(dev_elem);
++	}
++	return -1;
++}
++
++/* isulad: get config populate device
++ * If you ask for 'lxc.populate.device', then all populate device
++ * entries will be printed, in 'lxc.populate.device = path_in_container:type:major:minor:mode:uid:gid' format.
++ * For e.g. lxc.populate.device = /dev/sda:b:8:0:0666:0:0
++ */
++static int get_config_populate_device(const char *key, char *retv, int inlen,
++                                      struct lxc_conf *c, void *data)
++{
++	int len;
++	struct lxc_list *it = NULL;
++	int fulllen = 0;
++
++	if (!retv)
++		inlen = 0;
++	else
++		memset(retv, 0, inlen);
++
++	lxc_list_for_each(it, &c->populate_devs) {
++		struct lxc_populate_devs *elem = it->elem;
++		strprint(retv, inlen, "lxc.populate.device = %s:%s:%d:%d:%o:%u:%u\n",
++		         elem->name, elem->type, elem->maj,
++		         elem->min, elem->file_mode, elem->uid, elem->gid);
++	}
++
++	return fulllen;
++}
++
++/* isulad: clr config populate devices*/
++static inline int clr_config_populate_device(const char *key, struct lxc_conf *c,
++                void *data)
++{
++	return lxc_clear_populate_devices(c);
++}
++
++/* isulad: set config for umask */
++static int set_config_umask(const char *key, const char *value,
++			      struct lxc_conf *lxc_conf, void *data)
++{
++	if (lxc_config_value_empty(value)) {
++		ERROR("Empty umask");
++		return -1;
++	}
++
++	if (strcmp(value, "normal") == 0) {
++		lxc_conf->umask = 0022;
++		return 0;
++	} else if (strcmp(value, "secure") == 0) {
++		lxc_conf->umask = 0027;
++		return 0;
++	} else {
++		ERROR("Invalid native umask: %s", value);
++		return -1;
++	}
++}
++
++/* isulad add: get umask value*/
++static int get_config_umask(const char *key, char *retv, int inlen,
++			      struct lxc_conf *c, void *data)
++{
++	return lxc_get_conf_size_t(c, retv, inlen, c->umask);
++}
++
++/* isulad add: clear umask value */
++static inline int clr_config_umask(const char *key, struct lxc_conf *c,
++				     void *data)
++{
++	c->umask = 0027;
++	return 0;
++}
++
++/* isulad: set config for rootfs masked paths */
++static int set_config_rootfs_masked_paths(const char *key, const char *value,
++                struct lxc_conf *lxc_conf, void *data)
++{
++	struct lxc_list *list_item = NULL;
++
++	if (lxc_config_value_empty(value))
++		return lxc_clear_rootfs_masked_paths(lxc_conf);
++
++	list_item = malloc(sizeof(*list_item));
++	if (list_item == NULL)
++		goto on_error;
++
++	list_item->elem = safe_strdup(value);
++
++	lxc_list_add_tail(&lxc_conf->rootfs.maskedpaths, list_item);
++
++	return 0;
++
++on_error:
++	free(list_item);
++
++	return -1;
++}
++
++// isulad: get config rootfs masked paths
++static int get_config_rootfs_masked_paths(const char *key, char *retv, int inlen,
++                struct lxc_conf *c, void *data)
++{
++	int len, fulllen = 0;
++	struct lxc_list *it = NULL;
++
++	if (!retv)
++		inlen = 0;
++	else
++		memset(retv, 0, inlen);
++
++	lxc_list_for_each(it, &c->rootfs.maskedpaths) {
++		strprint(retv, inlen, "%s\n", (char *)it->elem);
++	}
++
++	return fulllen;
++}
++
++/* isulad: set config for rootfs ro paths */
++static int set_config_rootfs_ro_paths(const char *key, const char *value,
++                                      struct lxc_conf *lxc_conf, void *data)
++{
++	struct lxc_list *list_item = NULL;
++
++	if (lxc_config_value_empty(value))
++		return lxc_clear_rootfs_ro_paths(lxc_conf);
++
++	list_item = malloc(sizeof(*list_item));
++	if (list_item == NULL)
++		goto on_error;
++
++	list_item->elem = safe_strdup(value);
++
++	lxc_list_add_tail(&lxc_conf->rootfs.ropaths, list_item);
++
++	return 0;
++
++on_error:
++	free(list_item);
++
++	return -1;
++}
++
++// isulad: get config rootfs ro paths
++static int get_config_rootfs_ro_paths(const char *key, char *retv, int inlen,
++                                      struct lxc_conf *c, void *data)
++{
++	int len, fulllen = 0;
++	struct lxc_list *it = NULL;
++
++	if (!retv)
++		inlen = 0;
++	else
++		memset(retv, 0, inlen);
++
++	lxc_list_for_each(it, &c->rootfs.ropaths) {
++		strprint(retv, inlen, "%s\n", (char *)it->elem);
++	}
++
++	return fulllen;
++}
++
++/* isulad: clr config rootfs masked paths */
++static inline int clr_config_rootfs_masked_paths(const char *key, struct lxc_conf *c,
++                void *data)
++{
++	return lxc_clear_rootfs_masked_paths(c);
++}
++
++/* isulad: clr config rootfs ro paths */
++static inline int clr_config_rootfs_ro_paths(const char *key, struct lxc_conf *c,
++                void *data)
++{
++	return lxc_clear_rootfs_ro_paths(c);
++}
++
++/* isulad: set config for systemd */
++static int set_config_systemd(const char *key, const char *value,
++			      struct lxc_conf *lxc_conf, void *data)
++{
++	if (lxc_config_value_empty(value)) {
++		ERROR("Empty umask");
++		return -1;
++	}
++	lxc_conf->systemd = strdup(value);
++	return 0;
++}
++
++/* isulad add: get systemd value*/
++static int get_config_systemd(const char *key, char *retv, int inlen,
++			      struct lxc_conf *c, void *data)
++{
++	return lxc_get_conf_str(retv, inlen, c->systemd);
++}
++
++/* isulad add: clear systemd value */
++static inline int clr_config_systemd(const char *key, struct lxc_conf *c,
++				     void *data)
++{
++	free(c->systemd);
++	c->systemd = NULL;
++	return 0;
++}
++
++static int set_config_console_log_driver(const char *key, const char *value,
++        struct lxc_conf *lxc_conf, void *data)
++{
++	return set_config_string_item(&lxc_conf->console.log_driver, value);
++}
++
++static int set_config_console_syslog_tag(const char *key, const char *value,
++        struct lxc_conf *lxc_conf, void *data)
++{
++	if (value == NULL) {
++		return -1;
++	}
++	return set_config_string_item(&lxc_conf->console.log_syslog_tag, value);
++}
++
++static int parse_facility(const char *facility)
++{
++#define FACILITIES_LEN 20
++	const char *facility_keys[FACILITIES_LEN] = {
++		"kern", "user", "mail", "daemon", "auth",
++		"syslog", "lpr", "news", "uucp", "cron", "authpriv", "ftp",
++		"local0", "local1", "local2", "local3", "local4", "local5", "local6", "local7"
++	};
++	const int facilities[FACILITIES_LEN] = {
++		LOG_KERN, LOG_USER, LOG_MAIL, LOG_DAEMON, LOG_AUTH, LOG_SYSLOG,
++		LOG_LPR, LOG_NEWS, LOG_UUCP, LOG_CRON, LOG_AUTHPRIV, LOG_FTP,
++		LOG_LOCAL0, LOG_LOCAL1, LOG_LOCAL2, LOG_LOCAL3, LOG_LOCAL4,
++		LOG_LOCAL5, LOG_LOCAL6, LOG_LOCAL7
++	};
++	int i = 0;
++
++	if (facility == NULL) {
++		return -1;
++	}
++
++	for (; i < FACILITIES_LEN; i++) {
++		if (strcmp(facility, facility_keys[i]) == 0) {
++			return facilities[i];
++		}
++	}
++
++	return -1;
++}
++
++static int set_config_console_syslog_facility(const char *key, const char *value,
++        struct lxc_conf *lxc_conf, void *data)
++{
++	int facility;
++
++	facility = parse_facility(value);
++	if (facility < 0) {
++		NOTICE("Invalid facility: %s", value);
++		facility = LOG_DAEMON;
++	}
++
++	lxc_conf->console.log_syslog_facility = facility;
++	return 0;
++}
++
++static int set_config_selinux_mount_context(const char *key, const char *value,
++    struct lxc_conf *lxc_conf, void *data)
++{
++	if (value != NULL && strcmp(value, "unconfined_t") == 0) {
++		return set_config_string_item(&lxc_conf->lsm_se_mount_context, NULL);
++	}
++
++	return set_config_string_item(&lxc_conf->lsm_se_mount_context, value);
++}
++
++static int get_config_console_log_driver(const char *key, char *retv, int inlen,
++        struct lxc_conf *c, void *data)
++{
++	return lxc_get_conf_str(retv, inlen, c->console.log_driver);
++}
++
++static int get_config_console_syslog_tag(const char *key, char *retv, int inlen,
++        struct lxc_conf *c, void *data)
++{
++	return lxc_get_conf_str(retv, inlen, c->console.log_syslog_tag);
++}
++
++static int get_config_console_syslog_facility(const char *key, char *retv, int inlen,
++        struct lxc_conf *c, void *data)
++{
++	return lxc_get_conf_int(c, retv, inlen, c->console.log_syslog_facility);
++}
++
++static int get_config_selinux_mount_context(const char *key, char *retv, int inlen,
++    struct lxc_conf *c, void *data)
++{
++	return lxc_get_conf_str(retv, inlen, c->lsm_se_mount_context);
++}
++
++static inline int clr_config_console_log_driver(const char *key,
++                struct lxc_conf *c, void *data)
++{
++	free(c->console.log_driver);
++	c->console.log_driver = NULL;
++	return 0;
++}
++
++static inline int clr_config_console_syslog_tag(const char *key,
++                struct lxc_conf *c, void *data)
++{
++	free(c->console.log_syslog_tag);
++	c->console.log_syslog_tag= NULL;
++	return 0;
++}
++
++static inline int clr_config_console_syslog_facility(const char *key,
++                struct lxc_conf *c, void *data)
++{
++	c->console.log_syslog_facility = LOG_DAEMON;
++	return 0;
++}
++
++static inline int clr_config_selinux_mount_context(const char *key,
++    struct lxc_conf *c, void *data)
++{
++	free(c->lsm_se_mount_context);
++	c->lsm_se_mount_context = NULL;
++	return 0;
++}
++#endif
+diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
+index 8df6059..d4495f7 100644
+--- a/src/lxc/lxccontainer.c
++++ b/src/lxc/lxccontainer.c
+@@ -62,6 +62,10 @@
+ #include "utils.h"
+ #include "version.h"
+ 
++#ifdef HAVE_ISULAD
++#include "exec_commands.h"
++#endif
++
+ #if HAVE_OPENSSL
+ #include <openssl/evp.h>
+ #endif
+@@ -83,6 +87,11 @@
+ 
+ lxc_log_define(lxccontainer, lxc);
+ 
++#ifdef HAVE_ISULAD
++typedef bool (*func_is_io_stat_read)(const char *value);
++typedef bool (*func_is_io_stat_write)(const char *value);
++#endif
++
+ static bool do_lxcapi_destroy(struct lxc_container *c);
+ static const char *lxcapi_get_config_path(struct lxc_container *c);
+ #define do_lxcapi_get_config_path(c) lxcapi_get_config_path(c)
+@@ -272,6 +281,13 @@ static void lxc_container_free(struct lxc_container *c)
+ 	free(c->config_path);
+ 	c->config_path = NULL;
+ 
++#ifdef HAVE_ISULAD
++	free(c->exit_fifo);
++	c->exit_fifo = NULL;
++	free(c->ocihookfile);
++	c->ocihookfile = NULL;
++#endif
++
+ 	free(c);
+ }
+ 
+@@ -652,6 +668,66 @@ static bool load_config_locked(struct lxc_container *c, const char *fname)
+ 	return true;
+ }
+ 
++#ifdef HAVE_ISULAD
++static bool load_ocihooks_locked(struct lxc_container *c)
++{
++	parser_error err = NULL;
++	oci_runtime_spec_hooks *hooks = NULL;
++
++	if (!c->lxc_conf)
++		c->lxc_conf = lxc_conf_init();
++
++	if (!c->lxc_conf)
++		return false;
++
++	hooks = oci_runtime_spec_hooks_parse_file(c->ocihookfile, NULL, &err);
++	if (!hooks) {
++		fprintf(stderr, "parse oci hooks config failed: %s\n", err);
++		free(err);
++		return true;
++	}
++	c->lxc_conf->ocihooks = hooks;
++
++	if (err)
++		free(err);
++	return true;
++}
++
++/*
++ * isulad: set oci hook file path
++ * */
++static bool set_oci_hook_config_filename(struct lxc_container *c)
++{
++#define OCI_HOOK_JSON_FILE_NAME "ocihooks.json"
++	char *newpath = NULL;
++	int len, ret;
++
++	if (!c->config_path)
++		return false;
++
++	/* $lxc_path + "/" + c->name + "/" + "config" + '\0' */
++	if (strlen(c->config_path) + strlen(c->name) > SIZE_MAX - strlen(OCI_HOOK_JSON_FILE_NAME) - 3)
++		return false;
++	len = strlen(c->config_path) + strlen(c->name) + strlen(OCI_HOOK_JSON_FILE_NAME) + 3;
++
++	newpath = malloc(len);
++	if (newpath == NULL)
++		return false;
++
++	ret = snprintf(newpath, len, "%s/%s/%s", c->config_path, c->name, OCI_HOOK_JSON_FILE_NAME);
++	if (ret < 0 || ret >= len) {
++		fprintf(stderr, "Error printing out config file name\n");
++		free(newpath);
++		return false;
++	}
++
++	free(c->ocihookfile);
++	c->ocihookfile = newpath;
++
++	return true;
++}
++#endif
++
+ static bool do_lxcapi_load_config(struct lxc_container *c, const char *alt_file)
+ {
+ 	int lret;
+@@ -685,6 +761,11 @@ static bool do_lxcapi_load_config(struct lxc_container *c, const char *alt_file)
+ 
+ 	ret = load_config_locked(c, fname);
+ 
++#ifdef HAVE_ISULAD
++	if (ret && file_exists(c->ocihookfile))
++		ret = load_ocihooks_locked(c);
++#endif
++
+ 	if (need_disklock)
+ 		container_disk_unlock(c);
+ 	else
+@@ -884,6 +965,33 @@ static bool wait_on_daemonized_start(struct lxc_handler *handler, int pid)
+ 	return true;
+ }
+ 
++#ifdef HAVE_ISULAD
++/* isulad: use init argv as init cmd */
++static char **use_init_args(char **init_argv, size_t init_args)
++{
++	size_t i;
++	int nargs = 0;
++	char **argv;
++
++	if (!init_argv)
++		return NULL;
++
++	do {
++		argv = malloc(sizeof(char *));
++	} while (!argv);
++
++	argv[0] = NULL;
++	for (i = 0; i < init_args; i++)
++		push_arg(&argv, init_argv[i], &nargs);
++
++	if (nargs == 0) {
++		free(argv);
++		return NULL;
++	}
++	return argv;
++}
++#endif
++
+ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const argv[])
+ {
+ 	int ret;
+@@ -894,6 +1002,11 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
+ 		NULL,
+ 	};
+ 	char **init_cmd = NULL;
++#ifdef HAVE_ISULAD
++	int keepfds[] = {-1, -1, -1, -1, -1};
++	ssize_t size_read;
++	char errbuf[BUFSIZ + 1] = {0};
++#endif
+ 
+ 	/* container does exist */
+ 	if (!c)
+@@ -940,6 +1053,30 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
+ 			argv = init_cmd = split_init_cmd(conf->init_cmd);
+ 	}
+ 
++#ifdef HAVE_ISULAD
++	if (!argv) {
++		argv = init_cmd = use_init_args(conf->init_argv, conf->init_argc);
++	}
++
++	// do not allow using default rootfs path when isulad
++	if (conf->rootfs.mount == NULL) {
++		ERROR("Empty rootfs path detected");
++		lxc_put_handler(handler);
++		return false;
++	}
++
++	// do not allow using default args when isulad
++	if (!argv) {
++		ERROR("Empty args detected");
++		lxc_put_handler(handler);
++		return false;
++	}
++
++	if (c->image_type_oci) {
++		handler->image_type_oci = true;
++	}
++#endif
++
+ 	/* ... otherwise use default_args. */
+ 	if (!argv) {
+ 		if (useinit) {
+@@ -959,10 +1096,23 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
+ 		char title[2048];
+ 		pid_t pid_first, pid_second;
+ 
++#ifdef HAVE_ISULAD
++		//isulad: pipdfd for get error message of child or grandchild process.
++		if (pipe2(conf->errpipe, O_CLOEXEC) != 0) {
++			SYSERROR("Failed to init errpipe");
++			free_init_cmd(init_cmd);
++			lxc_put_handler(handler);
++			return false;
++		}
++#endif
++
+ 		pid_first = fork();
+ 		if (pid_first < 0) {
+ 			free_init_cmd(init_cmd);
+ 			lxc_put_handler(handler);
++#ifdef HAVE_ISULAD
++			lxc_close_error_pipe(conf->errpipe);
++#endif
+ 			return false;
+ 		}
+ 
+@@ -972,11 +1122,25 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
+ 			 * the PID file, child will do the free and unlink.
+ 			 */
+ 			c->pidfile = NULL;
++#ifdef HAVE_ISULAD
++			close(conf->errpipe[1]);
++			conf->errpipe[1] = -1;
++#endif
+ 
+ 			/* Wait for container to tell us whether it started
+ 			 * successfully.
+ 			 */
+ 			started = wait_on_daemonized_start(handler, pid_first);
++#ifdef HAVE_ISULAD
++			if (!started) {
++				size_read = read(conf->errpipe[0], errbuf, BUFSIZ);
++				if (size_read > 0) {
++					conf->errmsg = safe_strdup(errbuf);
++				}
++			}
++			close(conf->errpipe[0]);
++			conf->errpipe[0] = -1;
++#endif
+ 
+ 			free_init_cmd(init_cmd);
+ 			lxc_put_handler(handler);
+@@ -1012,6 +1176,9 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
+ 		if (pid_second != 0) {
+ 			free_init_cmd(init_cmd);
+ 			lxc_put_handler(handler);
++#ifdef HAVE_ISULAD
++			lxc_close_error_pipe(conf->errpipe);
++#endif
+ 			_exit(EXIT_SUCCESS);
+ 		}
+ 
+@@ -1024,7 +1191,18 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
+ 			_exit(EXIT_FAILURE);
+ 		}
+ 
++#ifdef HAVE_ISULAD
++		keepfds[0] = handler->conf->maincmd_fd;
++		keepfds[1] = handler->state_socket_pair[0];
++		keepfds[2] = handler->state_socket_pair[1];
++		keepfds[4] = conf->errpipe[1];
++		close(conf->errpipe[0]);
++		conf->errpipe[0] = -1;
++		ret = lxc_check_inherited(conf, true, keepfds,
++					  sizeof(keepfds) / sizeof(keepfds[0]));
++#else
+ 		ret = inherit_fds(handler, true);
++#endif
+ 		if (ret < 0)
+ 			_exit(EXIT_FAILURE);
+ 
+@@ -1057,6 +1235,9 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
+ 		if (w < 0) {
+ 			free_init_cmd(init_cmd);
+ 			lxc_put_handler(handler);
++#ifdef HAVE_ISULAD
++			lxc_close_error_pipe(conf->errpipe);
++#endif
+ 
+ 			SYSERROR("Failed to write monitor pid to \"%s\"", c->pidfile);
+ 
+@@ -1070,6 +1251,9 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
+ 		if (ret < 0) {
+ 			free_init_cmd(init_cmd);
+ 			lxc_put_handler(handler);
++#ifdef HAVE_ISULAD
++			lxc_close_error_pipe(conf->errpipe);
++#endif
+ 
+ 			SYSERROR("Failed to write monitor pid to \"%s\"", c->pidfile);
+ 
+@@ -1080,6 +1264,19 @@ static bool do_lxcapi_start(struct lxc_container *c, int useinit, char * const a
+ 		}
+ 	}
+ 
++#ifdef HAVE_ISULAD
++	/* isulad: open exit fifo */
++	if (c->exit_fifo) {
++		conf->exit_fd = lxc_open(c->exit_fifo, O_WRONLY | O_NONBLOCK | O_CLOEXEC, 0);
++		if (conf->exit_fd < 0) {
++			ERROR("Failed to open exit fifo %s: %s.", c->exit_fifo, strerror(errno));
++			lxc_put_handler(handler);
++			ret = 1;
++			goto on_error;
++		}
++	}
++#endif
++
+ 	conf->reboot = REBOOT_NONE;
+ 
+ 	/* Unshare the mount namespace if requested */
+@@ -1111,19 +1308,53 @@ reboot:
+ 		}
+ 	}
+ 
++#ifdef HAVE_ISULAD
++	keepfds[0] = handler->conf->maincmd_fd;
++	keepfds[1] = handler->state_socket_pair[0];
++	keepfds[2] = handler->state_socket_pair[1];
++
++	/* keep exit fifo fd */
++	if (conf->exit_fd >= 0) {
++		keepfds[3] = conf->exit_fd;
++	}
++	/* isulad: keep errpipe fd */
++	if (c->daemonize)
++		keepfds[4] = conf->errpipe[1];
++
++	ret = lxc_check_inherited(conf, c->daemonize, keepfds,
++				  sizeof(keepfds) / sizeof(keepfds[0]));
++	if (ret < 0) {
++		lxc_put_handler(handler);
++		ret = 1;
++		goto on_error;
++	}
++#else
+ 	ret = inherit_fds(handler, c->daemonize);
+ 	if (ret < 0) {
+ 		lxc_put_handler(handler);
+ 		ret = 1;
+ 		goto on_error;
+ 	}
++#endif
+ 
++#ifndef HAVE_ISULAD
+ 	if (useinit)
+ 		ret = lxc_execute(c->name, argv, 1, handler, c->config_path,
+ 				  c->daemonize, &c->error_num);
+ 	else
+ 		ret = lxc_start(argv, handler, c->config_path, c->daemonize,
+ 				&c->error_num);
++#else
++	if (useinit) {
++		ret = lxc_execute(c->name, argv, 1, handler, c->config_path,
++				  c->daemonize, &c->error_num, c->start_timeout);
++	} else {
++		handler->disable_pty = c->disable_pty;
++		handler->open_stdin = c->open_stdin;
++		ret = lxc_start(argv, handler, c->config_path, c->daemonize,
++				&c->error_num, c->start_timeout);
++	}
++#endif
+ 
+ 	if (conf->reboot == REBOOT_REQ) {
+ 		INFO("Container requested reboot");
+@@ -2065,7 +2296,12 @@ WRAP_API_1(bool, lxcapi_reboot2, int)
+ static bool do_lxcapi_shutdown(struct lxc_container *c, int timeout)
+ {
+ 	__do_close int pidfd = -EBADF, state_client_fd = -EBADF;
++#ifdef HAVE_ISULAD
++	// isulad: keep default signal the same as docker
++	int haltsignal = SIGTERM;
++#else
+ 	int haltsignal = SIGPWR;
++#endif
+ 	pid_t pid = -1;
+ 	lxc_state_t states[MAX_STATE] = {0};
+ 	int killret, ret;
+@@ -2084,9 +2320,10 @@ static bool do_lxcapi_shutdown(struct lxc_container *c, int timeout)
+ 	/* Detect whether we should send SIGRTMIN + 3 (e.g. systemd). */
+ 	if (c->lxc_conf && c->lxc_conf->haltsignal)
+ 		haltsignal = c->lxc_conf->haltsignal;
++#ifndef HAVE_ISULAD
+ 	else if (task_blocks_signal(pid, (SIGRTMIN + 3)))
+ 		haltsignal = (SIGRTMIN + 3);
+-
++#endif
+ 
+ 	/*
+ 	 * Add a new state client before sending the shutdown signal so
+@@ -2939,6 +3176,21 @@ static int lxc_unlink_exec_wrapper(void *data)
+ 	return unlink(arg);
+ }
+ 
++#ifdef HAVE_ISULAD
++static void container_sock_dir_delete(const char *name)
++{
++	__do_free char *sock_dir = NULL;
++
++	sock_dir = generate_named_unix_sock_dir(name);
++	if (sock_dir == NULL) {
++		ERROR("Failed to generate exec unix sock dir");
++		return;
++	}
++
++	(void)lxc_rmdir_onedev(sock_dir, NULL);
++}
++#endif
++
+ static bool container_destroy(struct lxc_container *c,
+ 			      struct lxc_storage *storage)
+ {
+@@ -2949,8 +3201,19 @@ static bool container_destroy(struct lxc_container *c,
+ 	bool bret = false;
+ 	int ret = 0;
+ 
++#ifdef HAVE_ISULAD
++	if (!c)
++		return false;
++	// isulad: if container is not defined, we need to remove disk lock file
++	// which is created in lxc_container_new.
++	if (!do_lxcapi_is_defined(c)) {
++		container_disk_removelock(c);
++		return false;
++	}
++#else
+ 	if (!c || !do_lxcapi_is_defined(c))
+ 		return false;
++#endif
+ 
+ 	conf = c->lxc_conf;
+ 	if (container_disk_lock(c))
+@@ -3070,8 +3333,20 @@ static bool container_destroy(struct lxc_container *c,
+ 	if (ret < 0) {
+ 		ERROR("Failed to destroy directory \"%s\" for \"%s\"", path,
+ 		      c->name);
++#ifdef HAVE_ISULAD
++		char msg[BUFSIZ] = { 0 };
++		ret = snprintf(msg, BUFSIZ, "Failed to destroy directory \"%s\": %s", path, errno ? strerror(errno) : "error");
++		if (ret < 0 || ret >= BUFSIZ) {
++			ERROR("Sprintf failed");
++			goto out;
++		}
++		c->error_string = safe_strdup(msg);
++#endif
+ 		goto out;
+ 	}
++#ifdef HAVE_ISULAD
++	container_sock_dir_delete(c->name);
++#endif
+ 	INFO("Destroyed directory \"%s\" for \"%s\"", path, c->name);
+ 
+ on_success:
+@@ -3082,6 +3357,11 @@ out:
+ 		free(path);
+ 
+ 	container_disk_unlock(c);
++#ifdef HAVE_ISULAD
++	if (bret && container_disk_removelock(c)) {
++		bret = false;
++	}
++#endif
+ 	return bret;
+ }
+ 
+@@ -4042,8 +4322,13 @@ static int lxcapi_attach(struct lxc_container *c,
+ 
+ 	current_config = c->lxc_conf;
+ 
++#ifdef HAVE_ISULAD
++	ret = lxc_attach(c, exec_function, exec_payload, options,
++			 attached_process, &c->lxc_conf->errmsg);
++#else
+ 	ret = lxc_attach(c, exec_function, exec_payload, options,
+ 			 attached_process);
++#endif
+ 	current_config = NULL;
+ 	return ret;
+ }
+@@ -4063,7 +4348,11 @@ static int do_lxcapi_attach_run_wait(struct lxc_container *c,
+ 	command.program = (char *)program;
+ 	command.argv = (char **)argv;
+ 
++#ifdef HAVE_ISULAD
++	ret = lxc_attach(c, lxc_attach_run_command, &command, options, &pid, NULL);
++#else
+ 	ret = lxc_attach(c, lxc_attach_run_command, &command, options, &pid);
++#endif
+ 	if (ret < 0)
+ 		return ret;
+ 
+@@ -5257,6 +5546,560 @@ static int do_lxcapi_seccomp_notify_fd_active(struct lxc_container *c)
+ 
+ WRAP_API(int, lxcapi_seccomp_notify_fd_active)
+ 
++#ifdef HAVE_ISULAD
++/* isulad add set console fifos*/
++static bool do_lxcapi_set_terminal_default_fifos(struct lxc_container *c, const char *in, const char *out, const char *err)
++{
++	struct lxc_conf *conf = NULL;
++
++	if (!c || !c->lxc_conf)
++		return false;
++	if (container_mem_lock(c)) {
++		ERROR("Error getting mem lock");
++		return false;
++	}
++
++	conf = c->lxc_conf;
++	if (in) {
++		if (conf->console.init_fifo[0])
++			free(conf->console.init_fifo[0]);
++		conf->console.init_fifo[0] = safe_strdup(in);
++	}
++	if (out) {
++		if (conf->console.init_fifo[1])
++			free(conf->console.init_fifo[1]);
++		conf->console.init_fifo[1] = safe_strdup(out);
++	}
++	if (err) {
++		if (conf->console.init_fifo[2])
++			free(conf->console.init_fifo[2]);
++		conf->console.init_fifo[2] = safe_strdup(err);
++	}
++
++	container_mem_unlock(c);
++	return true;
++}
++
++WRAP_API_3(bool, lxcapi_set_terminal_default_fifos, const char *, const char *, const char *)
++
++/* isulad add set info file path */
++static bool do_lxcapi_set_container_info_file(struct lxc_container *c, const char *info_file)
++{
++	struct lxc_conf *conf = NULL;
++
++	if (!c || !c->lxc_conf || !info_file)
++		return false;
++	if (container_mem_lock(c)) {
++		ERROR("Error getting mem lock");
++		return false;
++	}
++
++	conf = c->lxc_conf;
++	if (conf->container_info_file)
++		free(conf->container_info_file);
++	conf->container_info_file = safe_strdup(info_file);
++
++	container_mem_unlock(c);
++	return true;
++}
++
++WRAP_API_1(bool, lxcapi_set_container_info_file, const char *)
++
++static bool do_lxcapi_want_disable_pty(struct lxc_container *c, bool state)
++{
++	if (!c || !c->lxc_conf)
++		return false;
++
++	if (container_mem_lock(c))
++		return false;
++
++	c->disable_pty = state;
++
++	container_mem_unlock(c);
++
++	return true;
++}
++
++WRAP_API_1(bool, lxcapi_want_disable_pty, bool)
++
++static bool do_lxcapi_want_open_stdin(struct lxc_container *c, bool state)
++{
++	if (!c || !c->lxc_conf)
++		return false;
++
++	if (container_mem_lock(c))
++		return false;
++
++	c->open_stdin = state;
++
++	container_mem_unlock(c);
++
++	return true;
++}
++
++WRAP_API_1(bool, lxcapi_want_open_stdin, bool)
++
++/* isulad add clean resources */
++static bool do_lxcapi_add_terminal_fifo(struct lxc_container *c, const char *in_fifo, const char *out_fifo, const char *err_fifo)
++{
++	bool ret = true;
++
++	if (!c || !c->lxc_conf)
++		return false;
++	if (container_mem_lock(c)) {
++		ERROR("Error getting mem lock");
++		return false;
++	}
++
++	if (lxc_cmd_set_terminal_fifos(c->name, c->config_path, in_fifo, out_fifo, err_fifo)) {
++		ERROR("Error set console fifos");
++		ret = false;
++	}
++
++	container_mem_unlock(c);
++	return ret;
++}
++
++WRAP_API_3(bool, lxcapi_add_terminal_fifo, const char *, const char *, const char *)
++
++static bool do_lxcapi_set_terminal_winch(struct lxc_container *c, unsigned int height, unsigned int width)
++{
++	bool ret = true;
++
++	if (!c || !c->lxc_conf)
++		return false;
++	if (container_mem_lock(c)) {
++		ERROR("Error getting mem lock");
++		return false;
++	}
++
++	if (lxc_cmd_set_terminal_winch(c->name, c->config_path, height, width)) {
++		ERROR("Error set terminal winch");
++		ret = false;
++	}
++
++	container_mem_unlock(c);
++	return ret;
++}
++
++WRAP_API_2(bool, lxcapi_set_terminal_winch, unsigned int, unsigned int)
++
++static bool do_lxcapi_set_exec_terminal_winch(struct lxc_container *c, const char *suffix, unsigned int height, unsigned int width)
++{
++	bool ret = true;
++
++	if (!c || !c->lxc_conf)
++		return false;
++	if (container_mem_lock(c)) {
++		ERROR("Error getting mem lock");
++		return false;
++	}
++
++	if (lxc_exec_cmd_set_terminal_winch(c->name, c->config_path, suffix, height, width)) {
++		ERROR("Error set terminal winch");
++		ret = false;
++	}
++
++	container_mem_unlock(c);
++	return ret;
++}
++
++WRAP_API_3(bool, lxcapi_set_exec_terminal_winch, const char *, unsigned int, unsigned int)
++
++/* isulad add clean resources */
++static bool do_lxcapi_clean_container_resource(struct lxc_container *c, pid_t pid)
++{
++	int ret;
++
++	if (!c)
++		return false;
++
++	ret = do_lxcapi_clean_resource(c->name, c->config_path, c->lxc_conf, pid);
++	if (ret)
++		ERROR("Failed to clean container %s resource", c->name);
++	return ret == 0;
++
++}
++
++WRAP_API_1(bool, lxcapi_clean_container_resource, pid_t)
++
++/* isulad get coantainer pids */
++static bool do_lxcapi_get_container_pids(struct lxc_container *c, pid_t **pids,size_t *pids_len)
++{
++	int ret;
++
++	if (!c)
++		return false;
++
++	ret = do_lxcapi_get_pids(c->name, c->config_path, c->lxc_conf, pids,pids_len);
++	if (ret)
++		ERROR("Failed to get container %s pids", c->name);
++	return ret == 0;
++
++}
++
++WRAP_API_2(bool, lxcapi_get_container_pids, pid_t **,size_t *)
++
++/* isulad add start timeout */
++static bool do_lxcapi_set_start_timeout(struct lxc_container *c,  unsigned int start_timeout)
++{
++	if (!c || !c->lxc_conf)
++		return false;
++	if (container_mem_lock(c)) {
++		ERROR("Error getting mem lock");
++		return false;
++	}
++	c->start_timeout = start_timeout;
++	container_mem_unlock(c);
++	return true;
++}
++
++WRAP_API_1(bool, lxcapi_set_start_timeout, unsigned int)
++
++/* isulad add set image type */
++static bool do_lxcapi_set_oci_type(struct lxc_container *c,  bool image_type_oci)
++{
++	if (!c || !c->lxc_conf)
++		return false;
++	if (container_mem_lock(c)) {
++		ERROR("Error getting mem lock");
++		return false;
++	}
++	c->image_type_oci = image_type_oci;
++	container_mem_unlock(c);
++	return true;
++}
++
++WRAP_API_1(bool, lxcapi_set_oci_type, bool)
++
++static uint64_t metrics_get_ull(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item)
++{
++    char buf[81] = {0};
++    int len = 0;
++    uint64_t val = 0;
++
++    len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf) - 1, c->name, c->config_path);
++    if (len <= 0) {
++        DEBUG("unable to read cgroup item %s", item);
++        return 0;
++    }
++
++    val = strtoull(buf, NULL, 0);
++    return val;
++}
++
++static uint64_t metrics_get_ull_with_max(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item)
++{
++    char buf[81] = {0};
++    int len = 0;
++    uint64_t val = 0;
++
++    len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf) - 1, c->name, c->config_path);
++    if (len <= 0) {
++        DEBUG("unable to read cgroup item %s", item);
++        return 0;
++    }
++
++    if (strcmp(buf, "max") == 0) {
++	return ULONG_MAX;
++    }
++
++    val = strtoull(buf, NULL, 0);
++    return val;
++}
++
++static inline bool is_blk_metrics_read(const char *value)
++{
++    return strcmp(value, "Read") == 0;
++}
++
++static inline bool is_blk_metrics_write(const char *value)
++{
++    return strcmp(value, "Write") == 0;
++}
++
++static inline bool is_blk_metrics_total(const char *value)
++{
++    return strcmp(value, "Total") == 0;
++}
++
++static void metrics_get_blk_stats(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item, struct lxc_blkio_metrics *stats)
++{
++    char *buf = NULL;
++    int i = 0;
++    int len = 0;
++    int ret = 0;
++    char **lines = NULL;
++    char **cols = NULL;
++
++    len = cgroup_ops->get(cgroup_ops, item, NULL, 0, c->name, c->config_path);
++    if (len <= 0) {
++        DEBUG("unable to read cgroup item %s", item);
++        return;
++    }
++
++    buf = malloc(len + 1);
++    (void)memset(buf, 0, len + 1);
++    ret = cgroup_ops->get(cgroup_ops, item, buf, len, c->name, c->config_path);
++    if (ret <= 0) {
++        DEBUG("unable to read cgroup item %s", item);
++        goto out;
++    }
++
++    lines = lxc_string_split_and_trim(buf, '\n');
++    if (lines == NULL) {
++        goto out;
++    }
++
++    (void)memset(stats, 0, sizeof(struct lxc_blkio_metrics));
++
++    for (i = 0; lines[i]; i++) {
++        cols = lxc_string_split_and_trim(lines[i], ' ');
++        if (cols == NULL) {
++            goto err_out;
++        }
++        if (lxc_array_len((void **)cols) == 3) {
++            if (is_blk_metrics_read(cols[1])) {
++                stats->read += strtoull(cols[2], NULL, 0);
++            } else if (is_blk_metrics_write(cols[1])) {
++                stats->write += strtoull(cols[2], NULL, 0);
++            }
++        }
++        if (lxc_array_len((void **)cols) == 2 && is_blk_metrics_total(cols[0])) {
++            stats->total = strtoull(cols[1], NULL, 0);
++        }
++
++        lxc_free_array((void **)cols, free);
++    }
++err_out:
++    lxc_free_array((void **)lines, free);
++out:
++    free(buf);
++    return;
++}
++
++static void metrics_get_io_stats_v2(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item, struct lxc_blkio_metrics *stats, func_is_io_stat_read is_io_stat_read, func_is_io_stat_write is_io_stat_write)
++{
++    char *buf = NULL;
++    int i = 0;
++    int j = 0;
++    int len = 0;
++    int ret = 0;
++    char **lines = NULL;
++    char **cols = NULL;
++    char **kv = NULL;
++
++    len = cgroup_ops->get(cgroup_ops, item, NULL, 0, c->name, c->config_path);
++    if (len <= 0) {
++        DEBUG("unable to read cgroup item %s", item);
++        return;
++    }
++
++    buf = malloc(len + 1);
++    (void)memset(buf, 0, len + 1);
++    ret = cgroup_ops->get(cgroup_ops, item, buf, len, c->name, c->config_path);
++    if (ret <= 0) {
++        DEBUG("unable to read cgroup item %s", item);
++        goto out;
++    }
++
++    lines = lxc_string_split_and_trim(buf, '\n');
++    if (lines == NULL) {
++        goto out;
++    }
++
++    (void)memset(stats, 0, sizeof(struct lxc_blkio_metrics));
++    // line example:
++    // 259:0 rbytes=0 wbytes=12288 rios=0 wios=4 dbytes=0 dios=0
++    for (i = 0; lines[i]; i++) {
++        cols = lxc_string_split_and_trim(lines[i], ' ');
++        if (cols == NULL || lxc_array_len((void **)cols) < 2) {
++            goto err_out;
++        }
++        len = lxc_array_len((void **)cols);
++        for (j = 1; j < len; j++) {
++            kv = lxc_string_split(cols[j], '=');
++            if (kv == NULL || lxc_array_len((void **)kv) != 2) {
++                lxc_free_array((void **)kv, free);
++                continue;
++            }
++            if (is_io_stat_read(kv[0])) {
++                stats->read += strtoull(kv[1], NULL, 0);
++            } else if (is_io_stat_write(kv[0])) {
++                stats->write += strtoull(kv[1], NULL, 0);
++            }
++            lxc_free_array((void **)kv, free);
++        }
++        lxc_free_array((void **)cols, free);
++    }
++
++    stats->total = stats->read + stats->write;
++
++err_out:
++    lxc_free_array((void **)lines, free);
++out:
++    free(buf);
++    return;
++}
++
++static uint64_t metrics_match_get_ull(struct lxc_container *c, struct cgroup_ops *cgroup_ops, const char *item, const char *match, int column)
++{
++#define BUFSIZE 4096
++    char buf[BUFSIZE] = {0};
++    int i = 0;
++    int j = 0;
++    int len = 0;
++    uint64_t val = 0;
++    char **lines = NULL;
++    char **cols = NULL;
++    size_t matchlen = 0;
++
++    len = cgroup_ops->get(cgroup_ops, item, buf, sizeof(buf) - 1, c->name, c->config_path);
++    if (len <= 0) {
++        DEBUG("unable to read cgroup item %s", item);
++        goto err_out;
++    }
++
++    lines = lxc_string_split_and_trim(buf, '\n');
++    if (lines == NULL) {
++        goto err_out;
++    }
++
++    matchlen = strlen(match);
++    for (i = 0; lines[i]; i++) {
++        if (strncmp(lines[i], match, matchlen) != 0) {
++            continue;
++        }
++
++        cols = lxc_string_split_and_trim(lines[i], ' ');
++        if (cols == NULL) {
++            goto err1;
++        }
++        for (j = 0; cols[j]; j++) {
++            if (j == column) {
++                val = strtoull(cols[j], NULL, 0);
++                break;
++            }
++        }
++        lxc_free_array((void **)cols, free);
++        break;
++    }
++err1:
++    lxc_free_array((void **)lines, free);
++err_out:
++    return val;
++}
++
++static bool is_io_stat_rbytes(const char *value)
++{
++    return strcmp(value, "rbytes") == 0;
++}
++
++static bool is_io_stat_wbytes(const char *value)
++{
++    return strcmp(value, "wbytes") == 0;
++}
++
++static bool is_io_stat_rios(const char *value)
++{
++    return strcmp(value, "rios") == 0;
++}
++
++static bool is_io_stat_wios(const char *value)
++{
++    return strcmp(value, "wios") == 0;
++}
++
++static bool unified_metrics_get(struct lxc_container *c, struct cgroup_ops *cgroup_ops, struct lxc_container_metrics *metrics)
++{
++	// cpu
++	metrics->cpu_use_nanos = metrics_match_get_ull(c, cgroup_ops, "cpu.stat", "usage_usec", 1) * 1000;
++	metrics->cpu_use_user = metrics_match_get_ull(c, cgroup_ops, "cpu.stat", "user_usec", 1) * 1000;
++	metrics->cpu_use_sys = metrics_match_get_ull(c, cgroup_ops, "cpu.stat", "system_usec", 1) * 1000;
++
++	// io
++	metrics_get_io_stats_v2(c, cgroup_ops, "io.stat", &metrics->io_service_bytes, is_io_stat_rbytes, is_io_stat_wbytes);
++	metrics_get_io_stats_v2(c, cgroup_ops, "io.stat", &metrics->io_serviced, is_io_stat_rios, is_io_stat_wios);
++
++	// memory
++	metrics->mem_used = metrics_get_ull(c, cgroup_ops, "memory.current");
++	metrics->mem_limit = metrics_get_ull_with_max(c, cgroup_ops, "memory.max");
++	metrics->inactive_file_total = metrics_match_get_ull(c, cgroup_ops, "memory.stat", "inactive_file", 1);
++	metrics->cache = metrics_match_get_ull(c, cgroup_ops, "memory.stat", "file", 1);
++	metrics->cache_total = metrics->cache;
++
++	// cgroup v2 does not support kernel memory
++	metrics->kmem_used = 0;
++	metrics->kmem_limit = 0;
++
++	// pids
++	metrics->pids_current = metrics_get_ull(c, cgroup_ops, "pids.current");
++
++	return true;
++}
++
++/* isulad add get container metrics */
++static bool do_lxcapi_get_container_metrics(struct lxc_container *c,  struct lxc_container_metrics *metrics)
++{
++	call_cleaner(cgroup_exit) struct cgroup_ops *cgroup_ops = NULL;
++	const char *state = NULL;
++	if (c == NULL || c->lxc_conf == NULL || metrics == NULL) {
++		return false;
++	}
++
++	state = c->state(c);
++	metrics->state = state;
++
++	if (!is_stopped(c)) {
++		metrics->init = c->init_pid(c);
++	} else {
++		metrics->init = -1;
++	}
++
++	cgroup_ops = cgroup_init(c->lxc_conf);
++	if (cgroup_ops == NULL) {
++		return false;
++	}
++
++	if (cgroup_ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
++		return unified_metrics_get(c, cgroup_ops, metrics);
++	}
++
++	metrics->cpu_use_nanos = metrics_get_ull(c, cgroup_ops, "cpuacct.usage");
++	metrics->pids_current = metrics_get_ull(c, cgroup_ops, "pids.current");
++
++	metrics->rss_bytes = metrics_match_get_ull(c,cgroup_ops, "memory.stat", "rss", 1);
++	metrics->page_faults = metrics_match_get_ull(c,cgroup_ops, "memory.stat", "pgfault", 1);
++	metrics->major_page_faults = metrics_match_get_ull(c,cgroup_ops, "memory.stat", "pgmajfault", 1);
++
++	metrics->cpu_use_user = metrics_match_get_ull(c, cgroup_ops, "cpuacct.stat", "user", 1);
++	metrics->cpu_use_sys = metrics_match_get_ull(c, cgroup_ops, "cpuacct.stat", "system", 1);
++
++	// Try to read CFQ stats available on all CFQ enabled kernels first
++	metrics_get_blk_stats(c, cgroup_ops, "blkio.io_serviced_recursive", &metrics->io_serviced);
++	if (metrics->io_serviced.read == 0 && metrics->io_serviced.write == 0 && metrics->io_serviced.total == 0) {
++		metrics_get_blk_stats(c, cgroup_ops, "blkio.throttle.io_service_bytes", &metrics->io_service_bytes);
++		metrics_get_blk_stats(c, cgroup_ops, "blkio.throttle.io_serviced", &metrics->io_serviced);
++	} else {
++		metrics_get_blk_stats(c, cgroup_ops, "blkio.io_service_bytes_recursive", &metrics->io_service_bytes);
++	}
++
++	metrics->mem_used = metrics_get_ull(c, cgroup_ops, "memory.usage_in_bytes");
++	metrics->mem_limit = metrics_get_ull(c, cgroup_ops, "memory.limit_in_bytes");
++	metrics->kmem_used = metrics_get_ull(c, cgroup_ops, "memory.kmem.usage_in_bytes");
++	metrics->kmem_limit = metrics_get_ull(c, cgroup_ops, "memory.kmem.limit_in_bytes");
++
++	metrics->cache = metrics_match_get_ull(c, cgroup_ops, "memory.stat", "cache", 1);
++	metrics->cache_total = metrics_match_get_ull(c, cgroup_ops, "memory.stat", "total_cache", 1);
++	metrics->inactive_file_total = metrics_match_get_ull(c, cgroup_ops, "memory.stat", "total_inactive_file", 1);
++
++	return true;
++}
++
++WRAP_API_1(bool, lxcapi_get_container_metrics, struct lxc_container_metrics *)
++
++#endif
++
+ struct lxc_container *lxc_container_new(const char *name, const char *configpath)
+ {
+ 	struct lxc_container *c;
+@@ -5310,10 +6153,24 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
+ 		goto err;
+ 	}
+ 
++#ifdef HAVE_ISULAD
++	if (!set_oci_hook_config_filename(c)) {
++		fprintf(stderr, "Error allocating oci hooks file pathname\n");
++		goto err;
++	}
++
++	if (load_config && file_exists(c->configfile)) {
++		if (!lxcapi_load_config(c, NULL)) {
++			fprintf(stderr, "Failed to load config for %s\n", name);
++			goto err;
++		}
++	}
++#else
+ 	if (file_exists(c->configfile) && !lxcapi_load_config(c, NULL)) {
+ 		fprintf(stderr, "Failed to load config for %s\n", name);
+ 		goto err;
+ 	}
++#endif
+ 
+ 	rc = ongoing_create(c);
+ 	switch (rc) {
+@@ -5337,6 +6194,9 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
+ 
+ 	c->daemonize			= true;
+ 	c->pidfile			= NULL;
++#ifdef HAVE_ISULAD
++	c->image_type_oci = false;
++#endif
+ 
+ 	/* Assign the member functions. */
+ 	c->is_defined			= lxcapi_is_defined;
+@@ -5400,6 +6260,20 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath
+ 	c->umount			= lxcapi_umount;
+ 	c->seccomp_notify_fd		= lxcapi_seccomp_notify_fd;
+ 	c->seccomp_notify_fd_active	= lxcapi_seccomp_notify_fd_active;
++#ifdef HAVE_ISULAD
++	c->set_container_info_file = lxcapi_set_container_info_file;
++	c->set_terminal_init_fifos = lxcapi_set_terminal_default_fifos;
++	c->add_terminal_fifos = lxcapi_add_terminal_fifo;
++	c->set_terminal_winch = lxcapi_set_terminal_winch;
++	c->set_exec_terminal_winch = lxcapi_set_exec_terminal_winch;
++	c->want_disable_pty = lxcapi_want_disable_pty;
++	c->want_open_stdin = lxcapi_want_open_stdin;
++	c->clean_container_resource = lxcapi_clean_container_resource;
++	c->get_container_pids = lxcapi_get_container_pids;
++	c->set_start_timeout = lxcapi_set_start_timeout;
++	c->set_oci_type = lxcapi_set_oci_type;
++	c->get_container_metrics = lxcapi_get_container_metrics;
++#endif
+ 
+ 	return c;
+ 
+@@ -5408,6 +6282,19 @@ err:
+ 	return NULL;
+ }
+ 
++#ifdef HAVE_ISULAD
++// isulad: new container without load config to save time
++struct lxc_container *lxc_container_without_config_new(const char *name, const char *configpath)
++{
++	return do_lxc_container_new(name, configpath, false);
++}
++
++struct lxc_container *lxc_container_new(const char *name, const char *configpath)
++{
++	return do_lxc_container_new(name, configpath, true);
++}
++#endif
++
+ int lxc_get_wait_states(const char **states)
+ {
+ 	int i;
+@@ -5578,11 +6465,21 @@ int list_active_containers(const char *lxcpath, char ***nret,
+ 				continue;
+ 		}
+ 
++#ifdef HAVE_ISULAD
++		if (ct_name && ct_name_cnt) {
++			if (array_contains(&ct_name, p, ct_name_cnt)) {
++				if (is_hashed)
++					free(p);
++				continue;
++			}
++		}
++#else
+ 		if (array_contains(&ct_name, p, ct_name_cnt)) {
+ 			if (is_hashed)
+ 				free(p);
+ 			continue;
+ 		}
++#endif
+ 
+ 		if (!add_to_array(&ct_name, p, ct_name_cnt)) {
+ 			if (is_hashed)
+diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h
+index 3386bff..06e8f0b 100644
+--- a/src/lxc/lxccontainer.h
++++ b/src/lxc/lxccontainer.h
+@@ -26,6 +26,10 @@ extern "C" {
+ #define LXC_CREATE_MAXFLAGS       (1 << 1) /*!< Number of \c LXC_CREATE* flags */
+ #define LXC_MOUNT_API_V1		   1
+ 
++#ifdef HAVE_ISULAD
++#define LXC_IMAGE_OCI_KEY "lxc.imagetype.oci"
++#endif
++
+ struct bdev_specs;
+ 
+ struct lxc_snapshot;
+@@ -40,6 +44,44 @@ struct lxc_mount {
+ 	int version;
+ };
+ 
++#ifdef HAVE_ISULAD
++struct lxc_blkio_metrics {
++	uint64_t read;
++	uint64_t write;
++	uint64_t total;
++};
++
++struct lxc_container_metrics {
++    /* State of container */
++    const char *state;
++    /* The process ID of the init container */
++    pid_t init;
++    /* Current pids */
++    uint64_t pids_current;
++    /* CPU usage */
++    uint64_t cpu_use_nanos;
++    uint64_t cpu_use_user;
++    uint64_t cpu_use_sys;
++    /* BlkIO usage */
++    struct lxc_blkio_metrics io_service_bytes;
++    struct lxc_blkio_metrics io_serviced;
++    /* Memory usage */
++    uint64_t mem_used;
++    uint64_t mem_limit;
++    uint64_t rss_bytes;
++    uint64_t page_faults;
++    uint64_t major_page_faults;
++    /* Kernel Memory usage */
++    uint64_t kmem_used;
++    uint64_t kmem_limit;
++    /* Cache usage */
++    uint64_t cache;
++    uint64_t cache_total;
++    /* total inactive file */
++    uint64_t inactive_file_total;
++};
++#endif
++
+ /*!
+  * An LXC container.
+  *
+@@ -107,6 +149,38 @@ struct lxc_container {
+ 	/*! Full path to configuration file */
+ 	char *config_path;
+ 
++#ifdef HAVE_ISULAD
++	/*! isulad:
++	 * \private
++	 * exit FIFO File to open used monitor the state of lxc monitor process.
++	 */
++	char *exit_fifo;
++	/*! Whether container wishes to create pty or pipes for console log */
++	bool disable_pty;
++
++	/*! Whether container wishes to keep stdin active */
++	bool open_stdin;
++
++	/*!
++	 * \private
++	 * isulad: support oci hook from json file
++	 * full path of json file
++	 * */
++	char *ocihookfile;
++
++	/*! isulad:
++	 * \private
++	 * start_timeout.
++	 */
++	unsigned int start_timeout;
++
++	/*! isulad:
++	 * \private
++	 *  image_type_oci
++	 */
++	bool image_type_oci;
++#endif
++
+ 	/*!
+ 	 * \brief Determine if \c /var/lib/lxc/$name/config exists.
+ 	 *
+@@ -884,6 +958,115 @@ struct lxc_container {
+ 	 * \return Mount fd of the container's devpts instance.
+ 	 */
+ 	int (*devpts_fd)(struct lxc_container *c);
++
++#ifdef HAVE_ISULAD
++	/*! isulad add
++	 * \brief An API call to set the path of info file
++	 *
++	 * \param c Container.
++	 * \param info_file Value of the path of info file.
++	 *
++	 * \return \c true on success, else \c false.
++	 */
++	bool (*set_container_info_file) (struct lxc_container *c, const char *info_file);
++
++	/*! isulad add
++	 * \brief An API call to change the path of the console default fifos
++	 *
++	 * \param c Container.
++	 * \param path Value of the console path.
++	 *
++	 * \return \c true on success, else \c false.
++	 */
++	bool (*set_terminal_init_fifos)(struct lxc_container *c, const char *in, const char *out, const char *err);
++
++	/*! isulad add
++	 * \brief An API call to add the path of terminal fifos
++	 *
++	 * \param c Container.
++	 * \param path Value of the console path..
++	 *
++	 * \return \c true on success, else \c false.
++	 */
++	bool (*add_terminal_fifos)(struct lxc_container *c, const char *in, const char *out, const char *err);
++
++	bool (*set_terminal_winch)(struct lxc_container *c, unsigned int height, unsigned int width);
++
++	bool (*set_exec_terminal_winch)(struct lxc_container *c, const char *suffix, unsigned int height, unsigned int width);
++
++	/*!
++	 * \brief Change whether the container wants to create pty or pipes
++	 * from the console log.
++	 *
++	 * \param c Container.
++	 * \param state Value for the disable pty bit (0 or 1).
++	 *
++	 * \return \c true on success, else \c false.
++	 */
++	bool (*want_disable_pty)(struct lxc_container *c, bool state);
++
++	/*!
++	 * \brief Change whether the container wants to keep stdin active
++	 * for parent process of container
++	 *
++	 * \param c Container.
++	 * \param state Value for the open_stdin bit (0 or 1).
++	 *
++	 * \return \c true on success, else \c false.
++	 */
++	bool (*want_open_stdin)(struct lxc_container *c, bool state);
++
++	/*! isulad add
++	 * \brief An API call to clean resources of container
++	 *
++	 * \param c Container.
++	 * \param pid Value of container process.
++	 *
++	 * \return \c true on success, else \c false.
++	 */
++	bool (*clean_container_resource) (struct lxc_container *c, pid_t pid);
++
++	/*! isulad add
++	 * \brief An API call to get container pids
++	 *
++	 * \param c Container.
++	 * \param pids Value of container pids.
++	 * \param pids_len Value of container pids len.
++	 * \param pid Value of container pid.
++	 * \return \c true on success, else \c false.
++	 */
++	bool (*get_container_pids)(struct lxc_container *c,pid_t **pids,size_t *pids_len);
++
++	/*! isulad add
++	 * \brief An API call to set start timeout
++	 *
++	 * \param c Container.
++	 * \param start_timeout Value of start timeout.
++	 *
++	 * \return \c true on success, else \c false.
++	 */
++	bool (*set_start_timeout)(struct lxc_container *c, unsigned int start_timeout);
++
++	/*! isulad add
++	 * \brief An API call to set oci type
++	 *
++	 * \param c Container.
++	 * \param image_type_oci image oci type.
++	 *
++	 * \return \c true on success, else \c false.
++	 */
++	bool (*set_oci_type)(struct lxc_container *c, bool image_type_oci);
++	
++	/*! isulad add
++	 * \brief An API call to set start timeout
++	 *
++	 * \param c Container.
++	 * \param start_timeout Value of start timeout.
++	 *
++	 * \return \c true on success, else \c false.
++	 */
++	bool (*get_container_metrics)(struct lxc_container *c, struct lxc_container_metrics *metrics);
++#endif
+ };
+ 
+ /*!
+@@ -1017,6 +1200,20 @@ struct lxc_console_log {
+  */
+ struct lxc_container *lxc_container_new(const char *name, const char *configpath);
+ 
++#ifdef HAVE_ISULAD
++/*!
++ * \brief Create a new container without loading config.
++ *
++ * \param name Name to use for container.
++ * \param configpath Full path to configuration file to use.
++ *
++ * \return Newly-allocated container, or \c NULL on error.
++ *
++ * \note This function can only used for listing container.
++ */
++struct lxc_container *lxc_container_without_config_new(const char *name, const char *configpath);
++#endif
++
+ /*!
+  * \brief Add a reference to the specified container.
+  *
+diff --git a/src/lxc/start.c b/src/lxc/start.c
+index 9f68304..70af128 100644
+--- a/src/lxc/start.c
++++ b/src/lxc/start.c
+@@ -344,7 +344,11 @@ static int setup_signal_fd(sigset_t *oldmask)
+ {
+ 	int ret;
+ 	sigset_t mask;
++#ifdef HAVE_ISULAD
++	const int signals[] = {SIGBUS, SIGILL, SIGSEGV, SIGWINCH, SIGTERM};
++#else
+ 	const int signals[] = {SIGBUS, SIGILL, SIGSEGV, SIGWINCH};
++#endif
+ 
+ 	/* Block everything except serious error signals. */
+ 	ret = sigfillset(&mask);
+@@ -625,6 +629,16 @@ int lxc_poll(const char *name, struct lxc_handler *handler)
+ 
+ 	TRACE("Mainloop is ready");
+ 
++#ifdef HAVE_ISULAD
++	// iSulad: close stdin pipe if we do not want open_stdin with container stdin
++	if (!handler->conf->console.open_stdin) {
++		if (handler->conf->console.pipes[0][1] > 0) {
++			close(handler->conf->console.pipes[0][1]);
++			handler->conf->console.pipes[0][1] = -1;
++		}
++	}
++#endif
++
+ 	ret = lxc_mainloop(&descr, -1);
+ 	if (descr.type == LXC_MAINLOOP_EPOLL)
+ 		close_prot_errno_disarm(descr.epfd);
+@@ -634,7 +648,11 @@ int lxc_poll(const char *name, struct lxc_handler *handler)
+ 	if (console) {
+ 		ret = lxc_terminal_mainloop_add(&descr_console, console);
+ 		if (ret == 0)
++#ifdef HAVE_ISULAD
++			ret = isulad_safe_mainloop(&descr_console, 100);
++#else
+ 			ret = lxc_mainloop(&descr_console, 0);
++#endif
+ 	}
+ 
+ out_mainloop_console:
+@@ -718,6 +736,12 @@ struct lxc_handler *lxc_init_handler(struct lxc_handler *old,
+ 	}
+ 
+ 	handler->name = name;
++
++#ifdef HAVE_ISULAD
++	handler->exit_code = -1; /* isulad: record exit code of container */
++	handler->image_type_oci = false;
++#endif
++
+ 	if (daemonize)
+ 		handler->transient_pid = lxc_raw_getpid();
+ 	else
+@@ -768,6 +792,10 @@ int lxc_init(const char *name, struct lxc_handler *handler)
+ 	int ret;
+ 	const char *loglevel;
+ 	struct lxc_conf *conf = handler->conf;
++#ifdef HAVE_ISULAD
++	conf->console.disable_pty = handler->disable_pty;
++	conf->console.open_stdin = handler->open_stdin;
++#endif
+ 
+ 	handler->monitor_pid = lxc_raw_getpid();
+ 	status_fd = open("/proc/self/status", O_RDONLY | O_CLOEXEC);
+@@ -908,6 +936,186 @@ void lxc_expose_namespace_environment(const struct lxc_handler *handler)
+ 	}
+ }
+ 
++
++#ifdef HAVE_ISULAD
++/* isulad: start timeout thread */
++typedef enum {
++	START_INIT,
++	START_TIMEOUT,
++	START_MAX,
++} start_timeout_t;
++
++static start_timeout_t global_timeout_state = START_INIT;
++static sem_t global_timeout_sem;
++
++struct start_timeout_conf {
++	unsigned int timeout;
++	int errfd;
++};
++
++void trim_line(char *s)
++{
++	size_t len;
++
++	len = strlen(s);
++	while ((len > 1) && (s[len - 1] == '\n'))
++		s[--len] = '\0';
++}
++
++static int _read_procs_file(const char *path, pid_t **pids, size_t *len)
++{
++	FILE *f;
++	char *line = NULL;
++	size_t sz = 0;
++	pid_t *tmp_pids = NULL;
++
++	f = fopen_cloexec(path, "r");
++	if (!f)
++		return -1;
++
++	while (getline(&line, &sz, f) != -1) {
++		pid_t pid;
++		trim_line(line);
++		pid = (pid_t)atoll(line);
++		if (lxc_mem_realloc((void **)&tmp_pids, sizeof(pid_t) * (*len + 1), *pids, sizeof(pid_t) * (*len)) != 0) {
++			free(*pids);
++			*pids = NULL;
++			ERROR("out of memory");
++			free(line);
++			fclose(f);
++			return -1;
++		}
++		*pids = tmp_pids;
++
++		(*pids)[*len] = pid;
++		(*len)++;
++	}
++
++	free(line);
++	fclose(f);
++	return 0;
++}
++
++static int _recursive_read_cgroup_procs(const char *dirpath, pid_t **pids, size_t *len)
++{
++	struct dirent *direntp = NULL;
++	DIR *dir = NULL;
++	int ret, failed = 0;
++	char pathname[PATH_MAX];
++
++	dir = opendir(dirpath);
++	if (dir == NULL) {
++		WARN("Failed to open \"%s\"", dirpath);
++		return 0;
++	}
++
++	while ((direntp = readdir(dir))) {
++		struct stat mystat;
++		int rc;
++
++		if (!strcmp(direntp->d_name, ".") ||
++		                !strcmp(direntp->d_name, ".."))
++			continue;
++
++		rc = snprintf(pathname, PATH_MAX, "%s/%s", dirpath, direntp->d_name);
++		if (rc < 0 || rc >= PATH_MAX) {
++			failed = 1;
++			continue;
++		}
++
++		if (strcmp(direntp->d_name, "cgroup.procs") == 0) {
++			if (_read_procs_file(pathname, pids, len)) {
++				failed = 1;
++
++			}
++			continue;
++		}
++
++		ret = lstat(pathname, &mystat);
++		if (ret) {
++			failed = 1;
++			continue;
++		}
++
++		if (S_ISDIR(mystat.st_mode)) {
++			if (_recursive_read_cgroup_procs(pathname, pids, len) < 0)
++				failed = 1;
++		}
++	}
++
++	ret = closedir(dir);
++	if (ret) {
++		WARN("Failed to close directory \"%s\"", dirpath);
++		failed = 1;
++	}
++
++	return failed ? -1 : 0;
++}
++
++int get_all_pids(struct cgroup_ops *cg_ops, pid_t **pids, size_t *len)
++{
++	const char *devices_path = NULL;
++
++	devices_path = cg_ops->get_cgroup_full_path(cg_ops, "devices");
++	if (!file_exists(devices_path)) {
++		return 0;
++	}
++
++	return _recursive_read_cgroup_procs(devices_path, pids, len);
++}
++
++static int set_cgroup_freezer(struct cgroup_ops *cg_ops, const char *value)
++{
++	char *fullpath;
++	int ret;
++
++	fullpath = must_make_path(cg_ops->get_cgroup_full_path(cg_ops, "freezer"), "freezer.state", NULL);
++	ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
++	free(fullpath);
++	return ret;
++}
++
++/* isulad: kill all process in container cgroup path */
++static void signal_all_processes(struct lxc_handler *handler)
++{
++	int ret;
++	struct cgroup_ops *cg_ops = handler->cgroup_ops;
++	pid_t *pids = NULL;
++	size_t len = 0, i;
++
++	ret = set_cgroup_freezer(cg_ops, "FROZEN");
++	if (ret < 0 && errno != ENOENT) {
++		WARN("cgroup_set frozen failed");
++	}
++
++	ret = get_all_pids(cg_ops, &pids, &len);
++	if (ret < 0) {
++		WARN("failed to get all pids");
++	}
++
++	for (i = 0; i < len; i++) {
++		ret = kill(pids[i], SIGKILL);
++		if (ret < 0 && errno != ESRCH) {
++			WARN("Can not kill process (pid=%d) with SIGKILL for container %s", pids[i], handler->name);
++		}
++	}
++
++	ret = set_cgroup_freezer(cg_ops, "THAWED");
++	if (ret < 0 && errno != ENOENT) {
++		WARN("cgroup_set thawed failed");
++	}
++
++	for (i = 0; i < len; i++) {
++		ret = lxc_wait_for_pid_status(pids[i]);
++		if (ret < 0 && errno != ECHILD) {
++			WARN("Failed to wait pid %d for container %s: %s", pids[i], handler->name, strerror(errno));
++		}
++	}
++
++	free(pids);
++}
++#endif
++
+ void lxc_end(struct lxc_handler *handler)
+ {
+ 	int ret;
+@@ -945,14 +1153,44 @@ void lxc_end(struct lxc_handler *handler)
+ 
+ 	handler->lsm_ops->cleanup(handler->lsm_ops, handler->conf, handler->lxcpath);
+ 
++
++#ifdef HAVE_ISULAD
++	// close maincmd fd before destroy cgroup for isulad
++	if (handler->conf->reboot == REBOOT_NONE) {
++		/* For all new state clients simply close the command socket.
++		 * This will inform all state clients that the container is
++		 * STOPPED and also prevents a race between a open()/close() on
++		 * the command socket causing a new process to get ECONNREFUSED
++		 * because we haven't yet closed the command socket.
++		 */
++		close_prot_errno_disarm(handler->conf->maincmd_fd);
++		TRACE("Closed command socket");
++	}
++	int retry_count = 0;
++	int max_retry = 10;
++retry:
++	if (cgroup_ops != NULL && !cgroup_ops->payload_destroy(cgroup_ops, handler)) {
++	        TRACE("Trying to kill all subprocess");
++	        signal_all_processes(handler);
++	        TRACE("Finished kill all subprocess");
++	        if (retry_count < max_retry) {
++				usleep(100 * 1000); /* 100 millisecond */
++				retry_count++;
++				goto retry;
++		}
++		SYSERROR("Failed to destroy cgroup path for container: \"%s\"", handler->name);
++	}
++#else
+ 	if (cgroup_ops) {
+ 		cgroup_ops->payload_destroy(cgroup_ops, handler);
+ 		cgroup_ops->monitor_destroy(cgroup_ops, handler);
+ 	}
++#endif
+ 
+ 	put_lxc_rootfs(&handler->conf->rootfs, true);
+ 
+ 	if (handler->conf->reboot == REBOOT_NONE) {
++#ifndef HAVE_ISULAD
+ 		/* For all new state clients simply close the command socket.
+ 		 * This will inform all state clients that the container is
+ 		 * STOPPED and also prevents a race between a open()/close() on
+@@ -961,12 +1199,23 @@ void lxc_end(struct lxc_handler *handler)
+ 		 */
+ 		close_prot_errno_disarm(handler->conf->maincmd_fd);
+ 		TRACE("Closed command socket");
++#endif
+ 
+ 		/* This function will try to connect to the legacy lxc-monitord
+ 		 * state server and only exists for backwards compatibility.
+ 		 */
+ 		lxc_monitor_send_state(name, STOPPED, handler->lxcpath);
+ 
++#ifdef HAVE_ISULAD
++		/* isuald: write exit code to exit fifo */
++		if (handler->conf->exit_fd >= 0) {
++			ret = write(handler->conf->exit_fd, &handler->exit_code, sizeof(int));
++			if (ret != sizeof(int)) {
++				SYSERROR("Failed to write to exit code to exit fifo.");
++			}
++		}
++#endif
++
+ 		/* The command socket is closed so no one can acces the command
+ 		 * socket anymore so there's no need to lock it.
+ 		 */
+@@ -1060,6 +1309,25 @@ static int do_start(void *data)
+ 
+ 	lxc_sync_fini_parent(handler);
+ 
++#ifdef HAVE_ISULAD
++	sigset_t mask;
++
++	/*isulad: restore default signal handlers and unblock all signals*/
++	for (int i = 1; i < NSIG; i++)
++		signal(i, SIG_DFL);
++
++	ret = sigfillset(&mask);
++	if (ret < 0) {
++		SYSERROR("Failed to fill signal mask");
++		goto out_warn_father;
++	}
++	ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
++	if (ret < 0) {
++		SYSERROR("Failed to set signal mask");
++		goto out_warn_father;
++	}
++#endif
++
+ 	if (lxc_abstract_unix_recv_one_fd(data_sock1, &status_fd, NULL, 0) < 0) {
+ 		ERROR("Failed to receive status file descriptor from parent process");
+ 		goto out_warn_father;
+@@ -1153,7 +1421,11 @@ static int do_start(void *data)
+ 	 * means that migration won't work, but at least we won't spew output
+ 	 * where it isn't wanted.
+ 	 */
++#ifdef HAVE_ISULAD
++	if (!handler->disable_pty && handler->daemonize && !handler->conf->autodev) {
++#else
+ 	if (handler->daemonize && !handler->conf->autodev) {
++#endif
+ 		char path[PATH_MAX];
+ 
+ 		ret = strnprintf(path, sizeof(path), "%s/dev/null",
+@@ -1269,6 +1541,9 @@ static int do_start(void *data)
+ 	/* Setup the container, ip, names, utsname, ... */
+ 	ret = lxc_setup(handler);
+ 	if (ret < 0) {
++#ifdef HAVE_ISULAD
++		lxc_write_error_message(handler->conf->errpipe[1], "Failed to setup lxc, please check the config file.");
++#endif
+ 		ERROR("Failed to setup container \"%s\"", handler->name);
+ 		goto out_warn_father;
+ 	}
+@@ -1291,6 +1566,43 @@ static int do_start(void *data)
+ 		DEBUG("Set PR_SET_NO_NEW_PRIVS to block execve() gainable privileges");
+ 	}
+ 
++#ifdef HAVE_ISULAD
++	/* isulad: dup2 pipe[0][0] to container stdin, pipe[1][1] to container stdout, pipe[2][1] to container stderr */
++	if (handler->disable_pty) {
++		if (handler->conf->console.pipes[0][1] >= 0) {
++			close(handler->conf->console.pipes[0][1]);
++			handler->conf->console.pipes[0][1] = -1;
++		}
++
++		if (handler->conf->console.pipes[0][0] >= 0) {
++			ret = dup2(handler->conf->console.pipes[0][0], STDIN_FILENO);
++			if (ret < 0)
++				goto out_warn_father;
++		}
++
++		if (handler->conf->console.pipes[1][0] >= 0) {
++			close(handler->conf->console.pipes[1][0]);
++			handler->conf->console.pipes[1][0] = -1;
++		}
++
++		if (handler->conf->console.pipes[1][1] >= 0) {
++			ret = dup2(handler->conf->console.pipes[1][1], STDOUT_FILENO);
++			if (ret < 0)
++				goto out_warn_father;
++		}
++		if (handler->conf->console.pipes[2][0] >= 0) {
++			close(handler->conf->console.pipes[2][0]);
++			handler->conf->console.pipes[2][0] = -1;
++		}
++
++		if (handler->conf->console.pipes[2][1] >= 0) {
++			ret = dup2(handler->conf->console.pipes[2][1], STDERR_FILENO);
++			if (ret < 0)
++				goto out_warn_father;
++		}
++	}
++#endif
++
+ 	/* If we mounted a temporary proc, then unmount it now. */
+ 	tmp_proc_unmount(handler->conf);
+ 
+@@ -1307,7 +1619,11 @@ static int do_start(void *data)
+ 
+ 	close_prot_errno_disarm(handler->sigfd);
+ 
++#ifdef HAVE_ISULAD
++	if (!handler->disable_pty && handler->conf->console.pty < 0 && handler->daemonize) {
++#else
+ 	if (handler->conf->console.pty < 0 && handler->daemonize) {
++#endif
+ 		if (devnull_fd < 0) {
+ 			devnull_fd = open_devnull();
+ 			if (devnull_fd < 0)
+@@ -1326,6 +1642,16 @@ static int do_start(void *data)
+ 	setsid();
+ 
+ 	if (handler->conf->init_cwd) {
++#ifdef HAVE_ISULAD
++		/* try to craete workdir if not exist */
++		struct stat st;
++		if (stat(handler->conf->init_cwd, &st) < 0 && mkdir_p(handler->conf->init_cwd, 0755) < 0) {
++			SYSERROR("Try to create directory \"%s\" as workdir failed", handler->conf->init_cwd);
++			lxc_write_error_message(handler->conf->errpipe[1], "%s:%d: Failed to create workdir: %s.",
++			                        __FILE__, __LINE__, strerror(errno));
++			goto out_warn_father;
++		}
++#endif
+ 		ret = chdir(handler->conf->init_cwd);
+ 		if (ret < 0) {
+ 			SYSERROR("Could not change directory to \"%s\"",
+@@ -1372,12 +1698,26 @@ static int do_start(void *data)
+ 		}
+ 	}
+ 
++#ifdef HAVE_ISULAD
++	if (prctl(PR_SET_KEEPCAPS, 1) < 0) {
++		SYSERROR("Failed to keep permitted capabilities");
++		goto out_warn_father;
++	}
++#endif
++
+ 	/* The container has been setup. We can now switch to an unprivileged
+ 	 * uid/gid.
+ 	 */
+ 	new_uid = handler->conf->init_uid;
+ 	new_gid = handler->conf->init_gid;
+ 
++#ifdef HAVE_ISULAD
++	// isulad: set env home in container, must before "Avoid unnecessary syscalls."
++	if (lxc_setup_env_home(new_uid) < 0) {
++		goto out_warn_father;
++	}
++#endif
++
+ 	/* Avoid unnecessary syscalls. */
+ 	if (new_uid == nsuid)
+ 		new_uid = LXC_INVALID_UID;
+@@ -1419,6 +1759,19 @@ static int do_start(void *data)
+ 		goto out_warn_father;
+ 	}
+ 
++#ifdef HAVE_ISULAD
++	/* isulad: drop the cap of current process */
++	if (prctl(PR_SET_KEEPCAPS, 0) < 0) {
++		SYSERROR("Failed to clear permitted capabilities");
++		goto out_warn_father;
++	}
++
++	if (lxc_drop_caps(handler->conf)) {
++		SYSERROR("Failed to drop caps");
++		goto out_warn_father;
++	}
++#endif
++
+ 	if (handler->conf->monitor_signal_pdeath != SIGKILL) {
+ 		ret = lxc_set_death_signal(handler->conf->monitor_signal_pdeath,
+ 					   handler->monitor_pid, status_fd);
+@@ -1433,7 +1786,12 @@ static int do_start(void *data)
+ 	 * After this call, we are in error because this ops should not return
+ 	 * as it execs.
+ 	 */
++#ifdef HAVE_ISULAD
++	close_prot_errno_disarm(status_fd);
++	handler->ops->start(handler, handler->data, handler->daemonize ? handler->conf->errpipe[1] : -1);
++#else
+ 	handler->ops->start(handler, handler->data);
++#endif
+ 
+ out_warn_father:
+ 	/*
+@@ -1604,6 +1962,94 @@ static inline void resolve_cgroup_clone_flags(struct lxc_handler *handler)
+ 	handler->ns_unshare_flags	|= CLONE_NEWCGROUP;
+ }
+ 
++#ifdef HAVE_ISULAD
++static int lxc_write_container_info(char *filename, pid_t pid, pid_t p_pid,
++		unsigned long long start_at, unsigned long long p_start_at)
++{
++	FILE *pid_fp = NULL;
++	int ret = 0;
++
++	pid_fp = lxc_fopen(filename, "w");
++	if (pid_fp == NULL) {
++		SYSERROR("Failed to create pidfile '%s'",filename);
++		ret = -1;
++		goto out;
++	}
++
++	if (fprintf(pid_fp, "%d %llu %d %llu\n", pid, start_at, p_pid, p_start_at) < 0) {
++		SYSERROR("Failed to write '%s'", filename);
++		ret = -1;
++		goto out;
++	}
++out:
++	if (pid_fp)
++		fclose(pid_fp);
++	pid_fp = NULL;
++	return ret;
++}
++
++static int lxc_check_container_info(char *filename, pid_t pid, pid_t p_pid,
++		unsigned long long start_at, unsigned long long p_start_at)
++{
++	int ret = 0;
++	int num;
++	char sbuf[1024] = {0}; /* bufs for stat */
++	int saved_pid;		/* process id */
++	int saved_ppid;		/* pid of parent process */
++	unsigned long long saved_start_time;	/* start time of process -- seconds since 1-1-70 */
++	unsigned long long saved_pstart_time;	/* start time of parent process -- seconds since 1-1-70 */
++
++	if ((lxc_file2str(filename, sbuf, sizeof(sbuf))) == -1) {
++		SYSERROR("Failed to read pidfile %s", filename);
++		ret = -1;
++		goto out;
++	}
++
++	num = sscanf(sbuf, "%d %Lu %d %Lu", &saved_pid, &saved_start_time, &saved_ppid, &saved_pstart_time);
++	if (num != 4) {
++		SYSERROR("Call sscanf error");
++		ret = -1;
++		goto out;
++	}
++
++	if (pid != saved_pid || p_pid != saved_ppid
++	                || start_at != saved_start_time || p_start_at != saved_pstart_time) {
++		ERROR("Check container info failed");
++		ret = -1;
++		goto out;
++	}
++
++out:
++	return ret;
++}
++
++/* isuald: save pid/ppid info */
++static int lxc_save_container_info(char *filename, pid_t pid)
++{
++	int ret = 0;
++	pid_t p_pid = 0;
++	unsigned long long start_at = 0;
++	unsigned long long p_start_at = 0;
++
++	start_at = lxc_get_process_startat(pid);
++	p_pid = getpid();
++	p_start_at = lxc_get_process_startat(p_pid);
++
++	ret = lxc_write_container_info(filename, pid, p_pid, start_at, p_start_at);
++	if (ret != 0) {
++		goto out;
++	}
++
++	ret = lxc_check_container_info(filename, pid, p_pid, start_at, p_start_at);
++	if (ret != 0) {
++		goto out;
++	}
++
++out:
++	return ret;
++}
++#endif
++
+ /* lxc_spawn() performs crucial setup tasks and clone()s the new process which
+  * exec()s the requested container binary.
+  * Note that lxc_spawn() runs in the parent namespaces. Any operations performed
+@@ -1741,6 +2187,32 @@ static int lxc_spawn(struct lxc_handler *handler)
+ 		handler->clone_flags &= ~CLONE_PIDFD;
+ 	TRACE("Cloned child process %d", handler->pid);
+ 
++#ifdef HAVE_ISULAD
++	/* isulad: close pipe after clone */
++	if (handler->conf->console.pipes[0][0] >= 0) {
++		close(handler->conf->console.pipes[0][0]);
++		handler->conf->console.pipes[0][0] = -1;
++	}
++
++	if (handler->conf->console.pipes[1][1] >= 0) {
++		close(handler->conf->console.pipes[1][1]);
++		handler->conf->console.pipes[1][1] = -1;
++	}
++
++	if (handler->conf->console.pipes[2][1] >= 0) {
++		close(handler->conf->console.pipes[2][1]);
++		handler->conf->console.pipes[2][1] = -1;
++	}
++
++	/* isulad: save pid/ppid info into file*/
++	if (handler->conf->container_info_file) {
++		if (lxc_save_container_info(handler->conf->container_info_file, handler->pid)) {
++			ERROR("Failed to save cloned container pid");
++			goto out_delete_net;
++		}
++	}
++#endif
++
+ 	ret = core_scheduling(handler);
+ 	if (ret < 0)
+ 		goto out_delete_net;
+@@ -1757,6 +2229,13 @@ static int lxc_spawn(struct lxc_handler *handler)
+ 	if (ret < 0)
+ 		SYSERROR("Failed to set environment variable: LXC_PID=%s", pidstr);
+ 
++#ifdef HAVE_ISULAD
++	if (handler->cgroup_ops->container_cgroup) {
++		if (setenv("LXC_CGROUP_PATH", handler->cgroup_ops->container_cgroup, 1))
++			SYSERROR("Failed to set environment variable: LXC_CGROUP_PATH=%s.", handler->cgroup_ops->container_cgroup);
++	}
++#endif
++
+ 	for (i = 0; i < LXC_NS_MAX; i++)
+ 		if (handler->ns_on_clone_flags & ns_info[i].clone_flag)
+ 			INFO("Cloned %s", ns_info[i].flag_name);
+@@ -1848,7 +2327,11 @@ static int lxc_spawn(struct lxc_handler *handler)
+ 		goto out_delete_net;
+ 	}
+ 
++#ifdef HAVE_ISULAD
++	ret = setup_resource_limits(conf, handler->pid, conf->errpipe[1]);
++#else
+ 	ret = setup_resource_limits(conf, handler->pid);
++#endif
+ 	if (ret < 0) {
+ 		ERROR("Failed to setup resource limits");
+ 		goto out_delete_net;
+@@ -1911,6 +2394,27 @@ static int lxc_spawn(struct lxc_handler *handler)
+ 		goto out_delete_net;
+ 	}
+ 
++#ifdef HAVE_ISULAD
++	/* isulad: Run oci prestart hook at here */
++	ret = run_oci_hooks(name, "oci-prestart", conf, lxcpath);
++	if (ret < 0) {
++		ERROR("Failed to run oci prestart hooks");
++		goto out_delete_net;
++	}
++
++	if (START_TIMEOUT == global_timeout_state) {
++		lxc_write_error_message(conf->errpipe[1], "Starting the container \"%s\" timeout.", name);
++		ERROR("Starting the container \"%s\" timeout.", name);
++		goto out_delete_net;
++	}
++
++       /* Tell the child to continue its initialization. We'll get
++        * LXC_SYNC_POST_OCI_PRESTART_HOOK when it is ready for us to run oci prestart hooks.
++        */
++       if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_OCI_PRESTART_HOOK))
++               goto out_delete_net;
++#endif
++
+ 	if (!lxc_sync_wake_child(handler, START_SYNC_FDS))
+ 		goto out_delete_net;
+ 
+@@ -1969,6 +2473,22 @@ static int lxc_spawn(struct lxc_handler *handler)
+ 	if (ret < 0)
+ 		goto out_abort;
+ 
++#ifdef HAVE_ISULAD
++	/* isulad: Run oci prestart hook at here */
++	ret = run_oci_hooks(name, "oci-poststart", conf, lxcpath);
++	if (ret < 0) {
++		ERROR("Failed to run oci poststart hooks");
++		goto out_abort;
++	}
++
++	if (START_TIMEOUT == global_timeout_state) {
++		lxc_write_error_message(conf->errpipe[1], "Starting the container \"%s\" timeout.", name);
++		ERROR("Starting the container \"%s\" timeout.", name);
++		goto out_abort;
++	}
++
++#endif
++
+ 	ret = lxc_set_state(name, handler, RUNNING);
+ 	if (ret < 0) {
+ 		ERROR("Failed to set state to \"%s\"", lxc_state2str(RUNNING));
+@@ -2014,9 +2534,82 @@ static int lxc_inherit_namespaces(struct lxc_handler *handler)
+ 	return 0;
+ }
+ 
++#ifdef HAVE_ISULAD
++/* isulad: start timeout thread function */
++static void* wait_start_timeout(void *arg)
++{
++	struct start_timeout_conf *conf = (struct start_timeout_conf *)arg;
++
++	sem_post(&global_timeout_sem);
++
++	if (!conf || conf->timeout < 1)
++		goto out;
++
++	sleep(conf->timeout);
++
++	global_timeout_state = START_TIMEOUT;
++
++out:
++	free(conf);
++	return ((void *)0);
++}
++
++/* isulad: create start timeout thread */
++static int create_start_timeout_thread(struct lxc_conf *conf, unsigned int start_timeout)
++{
++	int ret = 0;
++	pthread_t ptid;
++	pthread_attr_t attr;
++	struct start_timeout_conf *timeout_conf = NULL;
++
++	if (sem_init(&global_timeout_sem, 0, 0)) {
++		ERROR("Failed to init start timeout semaphore");/*lint !e613*/
++		ret = -1;
++		return ret;
++	}
++
++	timeout_conf = malloc(sizeof(struct start_timeout_conf));
++	if (timeout_conf == NULL) {
++		ERROR("Failed to malloc start timeout conf");
++		ret = -1;
++		goto out;
++	}
++
++	memset(timeout_conf, 0, sizeof(struct start_timeout_conf));
++	timeout_conf->errfd = conf->errpipe[1];
++	timeout_conf->timeout = start_timeout;
++
++	pthread_attr_init(&attr);
++	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
++	ret = pthread_create(&ptid, &attr, wait_start_timeout, timeout_conf);
++	pthread_attr_destroy(&attr);
++	if (ret != 0) {
++		ERROR("Create start wait timeout thread failed");
++		free(timeout_conf);
++		goto out;
++	}
++
++	sem_wait(&global_timeout_sem);
++out:
++	sem_destroy(&global_timeout_sem);
++	return ret;
++}
++
++// isulad: send '128 + signal' if container is killed by signal.
++#define EXIT_SIGNAL_OFFSET 128
++#endif
++
++#ifdef HAVE_ISULAD
++int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
++		void *data, const char *lxcpath, bool daemonize, int *error_num,
++		unsigned int start_timeout)
++{
++	int exit_code;
++#else
+ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
+ 		void *data, const char *lxcpath, bool daemonize, int *error_num)
+ {
++#endif
+ 	int ret, status;
+ 	const char *name = handler->name;
+ 	struct lxc_conf *conf = handler->conf;
+@@ -2032,6 +2625,17 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
+ 	handler->daemonize = daemonize;
+ 	cgroup_ops = handler->cgroup_ops;
+ 
++#ifdef HAVE_ISULAD
++	/* isulad: add start timeout limit */
++	if (start_timeout > 0) {
++		ret = create_start_timeout_thread(conf, start_timeout);
++		if (ret) {
++			ERROR("Failed to create start timeout thread for container \"%s\".", name);
++			goto out_abort;
++		}
++	}
++#endif
++
+ 	if (!attach_block_device(handler->conf)) {
+ 		ERROR("Failed to attach block device");
+ 		ret = -1;
+@@ -2116,11 +2720,13 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
+ 		goto out_delete_network;
+ 	}
+ 
++#ifndef HAVE_ISULAD
+ 	if (!handler->init_died && handler->pid > 0) {
+ 		ERROR("Child process is not killed");
+ 		ret = -1;
+ 		goto out_delete_network;
+ 	}
++#endif
+ 
+ 	status = lxc_wait_for_pid_status(handler->pid);
+ 	if (status < 0)
+@@ -2130,6 +2736,20 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
+ 	 * reboot. This should mean it was an lxc-execute which simply exited.
+ 	 * In any case, treat it as a 'halt'.
+ 	 */
++#ifdef HAVE_ISULAD
++	// isulad: recored log for container init exit
++	if (WIFSIGNALED(status)) {
++		int signal_nr = WTERMSIG(status);
++		exit_code = EXIT_SIGNAL_OFFSET + signal_nr;
++		ERROR("Container \"%s\" init exited with signal %d", name, signal_nr);
++	} else if (WIFEXITED(status)) {
++		exit_code = WEXITSTATUS(status);
++		ERROR("Container \"%s\" init exited with status %d", name, exit_code);
++	} else {
++		exit_code = -1;
++		ERROR("Container \"%s\" init exited with unknown status", name);
++	}
++#else
+ 	if (WIFSIGNALED(status)) {
+ 		int signal_nr = WTERMSIG(status);
+ 		switch(signal_nr) {
+@@ -2148,16 +2768,25 @@ int __lxc_start(struct lxc_handler *handler, struct lxc_operations *ops,
+ 			break;
+ 		}
+ 	}
++#endif
+ 
+ 	ret = lxc_restore_phys_nics_to_netns(handler);
+ 	if (ret < 0)
+ 		ERROR("Failed to move physical network devices back to parent network namespace");
+ 
++#ifdef HAVE_ISULAD
++	lxc_monitor_send_exit_code(name, exit_code, handler->lxcpath);
++#else
+ 	lxc_monitor_send_exit_code(name, status, handler->lxcpath);
++#endif
+ 	lxc_error_set_and_log(handler->pid, status);
+ 	if (error_num)
+ 		*error_num = handler->exit_status;
+ 
++#ifdef HAVE_ISULAD
++	handler->exit_code = exit_code; /* record exit code */
++#endif
++
+ 	lxc_delete_network(handler);
+ 	detach_block_device(handler->conf);
+ 	lxc_end(handler);
+@@ -2187,7 +2816,11 @@ struct start_args {
+ 	char *const *argv;
+ };
+ 
++#ifdef HAVE_ISULAD
++static int start(struct lxc_handler *handler, void* data, int fd)
++#else
+ static int start(struct lxc_handler *handler, void* data)
++#endif
+ {
+ 	struct start_args *arg = data;
+ 
+@@ -2195,6 +2828,9 @@ static int start(struct lxc_handler *handler, void* data)
+ 
+ 	execvp(arg->argv[0], arg->argv);
+ 	SYSERROR("Failed to exec \"%s\"", arg->argv[0]);
++#ifdef HAVE_ISULAD
++	lxc_write_error_message(fd, "exec: \"%s\": %s.", arg->argv[0], strerror(errno));
++#endif
+ 	return 0;
+ }
+ 
+@@ -2212,14 +2848,22 @@ static struct lxc_operations start_ops = {
+ };
+ 
+ int lxc_start(char *const argv[], struct lxc_handler *handler,
++#ifdef HAVE_ISULAD
++	      const char *lxcpath, bool daemonize, int *error_num, unsigned int start_timeout)
++#else
+ 	      const char *lxcpath, bool daemonize, int *error_num)
++#endif
+ {
+ 	struct start_args start_arg = {
+ 		.argv = argv,
+ 	};
+ 
+ 	TRACE("Doing lxc_start");
++#ifdef HAVE_ISULAD
++	return __lxc_start(handler, &start_ops, &start_arg, lxcpath, daemonize, error_num, start_timeout);
++#else
+ 	return __lxc_start(handler, &start_ops, &start_arg, lxcpath, daemonize, error_num);
++#endif
+ }
+ 
+ static void lxc_destroy_container_on_signal(struct lxc_handler *handler,
+@@ -2291,3 +2935,261 @@ static bool do_destroy_container(struct lxc_handler *handler)
+ 
+ 	return storage_destroy(handler->conf);
+ }
++
++#ifdef HAVE_ISULAD
++/*isulad: set env for clean resources */
++static int clean_resource_set_env(struct lxc_handler *handler)
++{
++	const char *name = handler->name;
++	struct lxc_conf *conf = handler->conf;
++	char bufstr[PATH_MAX + 1];
++	int i = 0;
++	int j = 0;
++	int len = 2; //set "LXC_PID" and "LXC_CGNS_AWARE"
++
++	if (conf == NULL || conf->ocihooks == NULL || conf->ocihooks->poststop_len == 0) {
++		return 0;
++	}
++
++	if (name) {
++		len++;
++	}
++	if (conf->rcfile) {
++		len++;
++	}
++	if (conf->rootfs.mount) {
++		len++;
++	}
++	if (conf->rootfs.path) {
++		len++;
++	}
++	if (conf->console.path) {
++		len++;
++	}
++	if (conf->console.log_path) {
++		len++;
++	}
++	if (handler->cgroup_ops->container_cgroup) {
++		len++;
++	}
++
++	for (; i < conf->ocihooks->poststop_len; i++) {
++		size_t cap = conf->ocihooks->poststop[i]->env_len;
++		size_t newcap = cap + len + 1;
++		if (lxc_grow_array((void ***)&(conf->ocihooks->poststop[i]->env), &cap, newcap, 1) != 0) {
++			return -1;
++		}
++		j = conf->ocihooks->poststop[i]->env_len;
++		/* Start of environment variable setup for hooks. */
++		if (name) {
++			snprintf(bufstr, PATH_MAX + 1, "LXC_NAME=%s", name);
++			conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr);
++		}
++		if (conf->rcfile) {
++			snprintf(bufstr, PATH_MAX + 1, "LXC_CONFIG_FILE=%s", conf->rcfile);
++			conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr);
++		}
++		if (conf->rootfs.mount) {
++			snprintf(bufstr, PATH_MAX + 1, "LXC_ROOTFS_MOUNT=%s", conf->rootfs.mount);
++			conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr);
++		}
++		if (conf->rootfs.path) {
++			snprintf(bufstr, PATH_MAX + 1, "LXC_ROOTFS_PATH=%s", conf->rootfs.path);
++			conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr);
++		}
++		if (conf->console.path) {
++			snprintf(bufstr, PATH_MAX + 1, "LXC_CONSOLE=%s", conf->console.path);
++			conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr);
++		}
++		if (conf->console.log_path) {
++			snprintf(bufstr, PATH_MAX + 1, "LXC_CONSOLE_LOGPATH=%s", conf->console.log_path);
++			conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr);
++		}
++		conf->ocihooks->poststop[i]->env[j++] = safe_strdup("LXC_CGNS_AWARE=1");
++
++		snprintf(bufstr, PATH_MAX + 1, "LXC_PID=%d", handler->pid);
++		conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr);
++		if (handler->cgroup_ops->container_cgroup) {
++			snprintf(bufstr, PATH_MAX + 1, "LXC_CGROUP_PATH=%s", handler->cgroup_ops->container_cgroup);
++			conf->ocihooks->poststop[i]->env[j++] = safe_strdup(bufstr);
++		}
++		conf->ocihooks->poststop[i]->env_len = j;
++		/* End of environment variable setup for hooks. */
++	}
++	return 0;
++}
++
++/*isulad: init handler for clean */
++static struct lxc_handler *lxc_init_clean_handler(char *name, char *lxcpath, struct lxc_conf *conf, pid_t pid)
++{
++	int i;
++	struct lxc_handler *handler;
++
++	handler = malloc(sizeof(*handler));
++	if (handler == NULL)
++		return NULL;
++
++	memset(handler, 0, sizeof(*handler));
++
++	/* Note that am_guest_unpriv() checks the effective uid. We
++	 * probably don't care if we are real root only if we are running
++	 * as root so this should be fine.
++	 */
++	handler->am_root = !am_guest_unpriv();
++	handler->data_sock[0] = handler->data_sock[1] = -1;
++	handler->conf = conf;
++	handler->lxcpath = lxcpath;
++	handler->pinfd = -1;
++	handler->sigfd = -EBADF;
++	handler->pidfd = -EBADF;
++	handler->init_died = false;
++	handler->monitor_status_fd = -EBADF;
++	handler->pid = pid;
++	handler->state_socket_pair[0] = handler->state_socket_pair[1] = -1;
++	if (handler->conf->reboot == REBOOT_NONE)
++		lxc_list_init(&handler->conf->state_clients);
++
++	for (i = 0; i < LXC_NS_MAX; i++)
++		handler->nsfd[i] = -1;
++
++	handler->name = name;
++	handler->exit_code = -1; /* isulad: record exit code of container */
++
++	handler->cgroup_ops = cgroup_init(conf);
++	if (!handler->cgroup_ops) {
++		ERROR("Failed to initialize cgroup driver");
++		goto on_error;
++	}
++
++	INFO("Container \"%s\" 's clean handler is initialized.", name);
++
++	return handler;
++
++on_error:
++	lxc_put_handler(handler);
++
++	return NULL;
++}
++
++/*isulad: init handler for clean */
++static struct lxc_handler *lxc_init_pids_handler(char *name, char *lxcpath, struct lxc_conf *conf)
++{
++	int i;
++	struct lxc_handler *handler;
++
++	handler = malloc(sizeof(*handler));
++	if (handler == NULL)
++		return NULL;
++
++	memset(handler, 0, sizeof(*handler));
++
++	/* Note that am_guest_unpriv() checks the effective uid. We
++	 * probably don't care if we are real root only if we are running
++	 * as root so this should be fine.
++	 */
++	handler->am_root = !am_guest_unpriv();
++	handler->data_sock[0] = handler->data_sock[1] = -1;
++	handler->conf = conf;
++	handler->lxcpath = lxcpath;
++	handler->pinfd = -1;
++	handler->sigfd = -EBADF;
++	handler->init_died = false;
++	handler->state_socket_pair[0] = handler->state_socket_pair[1] = -1;
++	handler->monitor_status_fd = -EBADF;
++	handler->pidfd = -EBADF;
++	if (handler->conf->reboot == REBOOT_NONE)
++		lxc_list_init(&handler->conf->state_clients);
++
++	for (i = 0; i < LXC_NS_MAX; i++)
++		handler->nsfd[i] = -1;
++
++	handler->name = name;
++	handler->exit_code = -1; /* isulad: record exit code of container */
++
++	handler->cgroup_ops = cgroup_init(conf);
++	if (!handler->cgroup_ops) {
++		ERROR("Failed to initialize cgroup driver");
++		goto on_error;
++	}
++
++	INFO("Container \"%s\" 's clean handler is initialized.", name);
++
++	return handler;
++
++on_error:
++	lxc_put_handler(handler);
++
++	return NULL;
++}
++
++/*isulad: do_lxcapi_clean_resource */
++int do_lxcapi_clean_resource(char *name, char *lxcpath, struct lxc_conf *conf, pid_t pid)
++{
++	int ret = 0;
++	struct lxc_handler *handler = NULL;
++	int retry_count = 0;
++	int max_retry = 10;
++
++	handler = lxc_init_clean_handler(name, lxcpath, conf, pid);
++	if (!handler) {
++		ERROR("Failed to init container %s clean handler", name);
++		ret = -1;
++		goto out;
++	}
++
++	if (clean_resource_set_env(handler) != 0) {
++		ERROR("Failed to set env for poststop hooks");
++		ret = -1;
++		goto out;
++	}
++
++	if (run_oci_hooks(handler->name, "oci-poststop", handler->conf, handler->lxcpath)) {
++		ERROR("Failed to run lxc.hook.post-stop for container \"%s\".", handler->name);
++		ret = -1;
++	}
++
++retry:
++	if (!handler->cgroup_ops->payload_destroy(handler->cgroup_ops, handler)) {
++		TRACE("Trying to kill all subprocess");
++		signal_all_processes(handler);
++		TRACE("Finished kill all subprocess");
++		if (retry_count < max_retry) {
++			usleep(100 * 1000); /* 100 millisecond */
++			retry_count++;
++			goto retry;
++		}
++		SYSERROR("Failed to destroy cgroup path for container: \"%s\"", handler->name);
++		ret = -1;
++	}
++
++out:
++	lxc_put_handler(handler);
++	return ret;
++}
++
++/*isulad: do_lxcapi_get_pids */
++int do_lxcapi_get_pids(char *name, char *lxcpath, struct lxc_conf *conf, pid_t **pids,size_t *pids_len)
++{
++	int ret = 0;
++	struct lxc_handler *handler = NULL;
++	struct cgroup_ops *cg_ops = NULL;
++
++	handler = lxc_init_pids_handler(name, lxcpath, conf);
++	if (!handler) {
++		ERROR("Failed to init container %s clean handler", name);
++		ret = -1;
++		goto out;
++	}
++
++	cg_ops = handler->cgroup_ops;
++	ret = get_all_pids(cg_ops, pids, pids_len);
++	if (ret < 0) {
++		WARN("failed to get all pids");
++	}
++
++out:
++	lxc_put_handler(handler);
++	return ret;
++}
++
++#endif
+diff --git a/src/lxc/start.h b/src/lxc/start.h
+index bbd1a83..d03e5d5 100644
+--- a/src/lxc/start.h
++++ b/src/lxc/start.h
+@@ -153,7 +153,11 @@ struct execute_args {
+ };
+ 
+ struct lxc_operations {
++#ifdef HAVE_ISULAD
++	int (*start)(struct lxc_handler *, void *, int);
++#else
+ 	int (*start)(struct lxc_handler *, void *);
++#endif
+ 	int (*post_start)(struct lxc_handler *, void *);
+ };
+ 
+@@ -184,12 +188,26 @@ static inline int inherit_fds(struct lxc_handler *handler, bool closeall)
+ 				   ARRAY_SIZE(handler->keep_fds));
+ }
+ 
++#ifdef HAVE_ISULAD
++__hidden extern int __lxc_start(struct lxc_handler *handler,
++		struct lxc_operations* ops, void *data, const char *lxcpath,
++		bool daemonize, int *error_num, unsigned int start_timeout);
++#else
+ __hidden extern int __lxc_start(struct lxc_handler *, struct lxc_operations *, void *, const char *,
+ 				bool, int *);
++#endif
+ 
+ __hidden extern int resolve_clone_flags(struct lxc_handler *handler);
+ __hidden extern void lxc_expose_namespace_environment(const struct lxc_handler *handler);
+ 
++#ifdef HAVE_ISULAD
++/*isulad: do_lxcapi_clean_resource */
++extern int do_lxcapi_clean_resource(char *name, char *lxcpath, struct lxc_conf *conf, pid_t pid);
++
++/*isulad: do_lxcapi_get_pids */
++extern int do_lxcapi_get_pids(char *name, char *lxcpath, struct lxc_conf *conf, pid_t **pids,size_t *pids_len);
++#endif
++
+ static inline bool container_uses_namespace(const struct lxc_handler *handler,
+ 					    unsigned int ns_flag)
+ {
+-- 
+2.25.1
+
diff --git a/0005-fix-compile-error.patch b/0005-fix-compile-error.patch
new file mode 100644
index 0000000000000000000000000000000000000000..0b6e0690dc40544dccae064579bce38bf276ef32
--- /dev/null
+++ b/0005-fix-compile-error.patch
@@ -0,0 +1,5735 @@
+From 1b72c39b668d736f29f5b3e6eac84c4967dbdd82 Mon Sep 17 00:00:00 2001
+From: zhangxiaoyu <zhangxiaoyu58@huawei.com>
+Date: Tue, 1 Aug 2023 09:36:57 +0800
+Subject: [PATCH] fix compile error
+
+Signed-off-by: zhangxiaoyu <zhangxiaoyu58@huawei.com>
+---
+ meson.build                     |    2 +-
+ src/lxc/af_unix.c               |   66 +
+ src/lxc/af_unix.h               |    2 +
+ src/lxc/attach.c                |   27 +-
+ src/lxc/attach_options.h        |    3 +
+ src/lxc/cgroups/cgfsng.c        |    3 +
+ src/lxc/cgroups/cgroup.h        |    5 +
+ src/lxc/cgroups/isulad_cgfsng.c | 2784 ++++++++++++++++++++-----------
+ src/lxc/commands.c              |    4 +-
+ src/lxc/conf.c                  |  197 ++-
+ src/lxc/conf.h                  |    4 +
+ src/lxc/confile.c               |   35 +-
+ src/lxc/exec_commands.c         |   23 +-
+ src/lxc/exec_commands.h         |    4 +-
+ src/lxc/execute.c               |   15 +
+ src/lxc/isulad_utils.c          |    6 +-
+ src/lxc/isulad_utils.h          |    8 +-
+ src/lxc/lsm/lsm.c               |   28 +
+ src/lxc/lsm/lsm.h               |    5 +
+ src/lxc/lsm/selinux.c           |    2 +-
+ src/lxc/lxc.h                   |   11 +
+ src/lxc/lxccontainer.c          |    4 +
+ src/lxc/mainloop.c              |    2 +-
+ src/lxc/mainloop.h              |    2 +-
+ src/lxc/seccomp.c               |   26 +-
+ src/lxc/start.c                 |   56 +-
+ src/lxc/sync.c                  |    6 +
+ src/lxc/sync.h                  |   13 +-
+ src/lxc/terminal.c              |  373 ++++-
+ src/lxc/tools/lxc_ls.c          |    2 +-
+ src/lxc/utils.c                 |    3 +
+ src/tests/aa.c                  |    4 +
+ src/tests/capabilities.c        |   12 +
+ src/tests/mount_injection.c     |    4 +
+ src/tests/proc_pid.c            |    4 +
+ src/tests/rootfs_options.c      |    4 +
+ src/tests/sys_mixed.c           |    4 +
+ src/tests/sysctls.c             |    4 +
+ 38 files changed, 2661 insertions(+), 1096 deletions(-)
+
+diff --git a/meson.build b/meson.build
+index fda8045..05bcbb2 100644
+--- a/meson.build
++++ b/meson.build
+@@ -231,7 +231,7 @@ possible_link_flags = [
+ ]
+ 
+ if want_isulad
+-    possible_cc_flags += ['-D_FORTIFY_SOURCE=2']
++    possible_cc_flags += ['-D_FORTIFY_SOURCE=2', '-O2']
+     yajldep = dependency('yajl', version : '>=2')
+     srcconf.set('HAVE_ISULAD', yajldep.found())
+     liblxc_dependencies += yajldep
+diff --git a/src/lxc/af_unix.c b/src/lxc/af_unix.c
+index 6db1864..e0a4892 100644
+--- a/src/lxc/af_unix.c
++++ b/src/lxc/af_unix.c
+@@ -175,10 +175,18 @@ int __lxc_abstract_unix_send_two_fds(int fd, int fd_first, int fd_second,
+ 	return lxc_abstract_unix_send_fds(fd, fd_send, 2, data, size);
+ }
+ 
++#ifdef HAVE_ISULAD
++static ssize_t lxc_abstract_unix_recv_fds_iov(int fd,
++					      struct unix_fds *ret_fds,
++					      struct iovec *ret_iov,
++					      size_t size_ret_iov,
++						  unsigned int timeout)
++#else
+ static ssize_t lxc_abstract_unix_recv_fds_iov(int fd,
+ 					      struct unix_fds *ret_fds,
+ 					      struct iovec *ret_iov,
+ 					      size_t size_ret_iov)
++#endif
+ {
+ 	__do_free char *cmsgbuf = NULL;
+ 	ssize_t ret;
+@@ -209,6 +217,22 @@ static ssize_t lxc_abstract_unix_recv_fds_iov(int fd,
+ 	msg.msg_iov	= ret_iov;
+ 	msg.msg_iovlen	= size_ret_iov;
+ 
++#ifdef HAVE_ISULAD
++	struct timeval out;
++	if (timeout > 0) {
++		memset(&out, 0, sizeof(out));
++		out.tv_sec = timeout / 1000000;
++		out.tv_usec = timeout % 1000000;
++		ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO,
++		                 (const void *)&out, sizeof(out));
++		if (ret < 0) {
++			ERROR("Failed to set %u timeout on containter "
++			      "state socket", timeout);
++			return -1;
++		}
++	}
++#endif
++
+ again:
+ 	ret = recvmsg(fd, &msg, MSG_CMSG_CLOEXEC);
+ 	if (ret < 0) {
+@@ -329,7 +353,11 @@ ssize_t lxc_abstract_unix_recv_fds(int fd, struct unix_fds *ret_fds,
+ 	};
+ 	ssize_t ret;
+ 
++#ifdef HAVE_ISULAD
++	ret = lxc_abstract_unix_recv_fds_iov(fd, ret_fds, &iov, 1, 0);
++#else
+ 	ret = lxc_abstract_unix_recv_fds_iov(fd, ret_fds, &iov, 1);
++#endif
+ 	if (ret < 0)
+ 		return ret;
+ 
+@@ -351,7 +379,11 @@ ssize_t lxc_abstract_unix_recv_one_fd(int fd, int *ret_fd, void *ret_data,
+ 		.fd_count_max = 1,
+ 	};
+ 
++#ifdef HAVE_ISULAD
++	ret = lxc_abstract_unix_recv_fds_iov(fd, fds, &iov, 1, 0);
++#else
+ 	ret = lxc_abstract_unix_recv_fds_iov(fd, fds, &iov, 1);
++#endif
+ 	if (ret < 0)
+ 		return ret;
+ 
+@@ -381,7 +413,11 @@ ssize_t __lxc_abstract_unix_recv_two_fds(int fd, int *fd_first, int *fd_second,
+ 		.fd_count_max = 2,
+ 	};
+ 
++#ifdef HAVE_ISULAD
++	ret = lxc_abstract_unix_recv_fds_iov(fd, fds, &iov, 1, 0);
++#else
+ 	ret = lxc_abstract_unix_recv_fds_iov(fd, fds, &iov, 1);
++#endif
+ 	if (ret < 0)
+ 		return ret;
+ 
+@@ -551,6 +587,36 @@ int lxc_socket_set_timeout(int fd, int rcv_timeout, int snd_timeout)
+ }
+ 
+ #ifdef HAVE_ISULAD
++ssize_t lxc_abstract_unix_recv_one_fd_timeout(int fd, int *ret_fd, void *ret_data,
++				      size_t size_ret_data, unsigned int timeout)
++{
++	call_cleaner(put_unix_fds) struct unix_fds *fds = NULL;
++	char buf[1] = {};
++	struct iovec iov = {
++		.iov_base	= ret_data ? ret_data : buf,
++		.iov_len	= ret_data ? size_ret_data : sizeof(buf),
++	};
++	ssize_t ret;
++
++	fds = &(struct unix_fds){
++		.fd_count_max = 1,
++	};
++
++	ret = lxc_abstract_unix_recv_fds_iov(fd, fds, &iov, 1, timeout);
++	if (ret < 0)
++		return ret;
++
++	if (ret == 0)
++		return ret_errno(ENODATA);
++
++	if (fds->fd_count_ret != fds->fd_count_max)
++		*ret_fd = -EBADF;
++	else
++		*ret_fd = move_fd(fds->fd[0]);
++
++	return ret;
++}
++
+ int lxc_named_unix_open(const char *path, int type, int flags)
+ {
+ 	__do_close int fd = -EBADF;
+diff --git a/src/lxc/af_unix.h b/src/lxc/af_unix.h
+index 605afc2..de5731f 100644
+--- a/src/lxc/af_unix.h
++++ b/src/lxc/af_unix.h
+@@ -169,6 +169,8 @@ static inline void put_unix_fds(struct unix_fds *fds)
+ define_cleanup_function(struct unix_fds *, put_unix_fds);
+ 
+ #ifdef HAVE_ISULAD
++__hidden extern ssize_t lxc_abstract_unix_recv_one_fd_timeout(int fd, int *ret_fd, void *ret_data,
++				     			 							  size_t size_ret_data, unsigned int timeout);
+ __hidden extern int lxc_named_unix_open(const char *path, int type, int flags);
+ __hidden extern int lxc_named_unix_connect(const char *path);
+ #endif
+diff --git a/src/lxc/attach.c b/src/lxc/attach.c
+index 1a89001..066eb5c 100644
+--- a/src/lxc/attach.c
++++ b/src/lxc/attach.c
+@@ -1203,10 +1203,10 @@ __noreturn static void do_attach(struct attach_payload *ap)
+ 	sigset_t mask;
+ 
+ 	/*isulad: record errpipe fd*/
+-	msg_fd = init_ctx->container->lxc_conf->errpipe[1];
+-	init_ctx->container->lxc_conf->errpipe[1] = -1;
++	msg_fd = ctx->container->lxc_conf->errpipe[1];
++	ctx->container->lxc_conf->errpipe[1] = -1;
+ 	/*isulad: set system umask */
+-	umask(init_ctx->container->lxc_conf->umask);
++	umask(ctx->container->lxc_conf->umask);
+ 
+ 	/*isulad: restore default signal handlers and unblock all signals*/
+ 	for (int i = 1; i < NSIG; i++)
+@@ -1528,7 +1528,11 @@ __noreturn static void do_attach(struct attach_payload *ap)
+ 	put_attach_payload(ap);
+ 
+ 	/* We're done, so we can now do whatever the user intended us to do. */
++#ifdef HAVE_ISULAD
++	_exit(attach_function(attach_function_args, msg_fd));
++#else
+ 	_exit(attach_function(attach_function_args));
++#endif
+ 
+ on_error:
+ 	ERROR("Failed to attach to container");
+@@ -1668,7 +1672,7 @@ out:
+ }
+ 
+ static int attach_signal_handler(int fd, uint32_t events, void *data,
+-			  struct lxc_epoll_descr *descr)
++			  struct lxc_async_descr *descr)
+ {
+ 	int ret;
+ 	siginfo_t info;
+@@ -1703,7 +1707,7 @@ static int isulad_setup_signal_fd(sigset_t *oldmask)
+ 	if (ret < 0)
+ 		return -EBADF;
+ 
+-	for (int sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) {
++	for (size_t sig = 0; sig < (sizeof(signals) / sizeof(signals[0])); sig++) {
+ 		ret = sigdelset(&mask, signals[sig]);
+ 		if (ret < 0)
+ 			return -EBADF;
+@@ -1753,7 +1757,7 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
+ 
+ 	int isulad_sigfd;
+ 	sigset_t isulad_oldmask;
+-	struct lxc_epoll_descr isulad_descr = {0};
++	struct lxc_async_descr isulad_descr = {0};
+ #endif
+ 
+ 	if (!container)
+@@ -1786,9 +1790,9 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
+ 
+ #ifdef HAVE_ISULAD
+ 	// always switch uid and gid for attach
+-	if (options->uid == -1)
++	if (options->uid == (uid_t)-1)
+ 		options->uid = conf->init_uid;
+-	if (options->gid == -1)
++	if (options->gid == (gid_t)-1)
+ 		options->gid = conf->init_gid;
+ #endif
+ 
+@@ -2111,7 +2115,11 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
+ 		goto on_error;
+ 
+ 	/* Setup resource limits */
++#ifdef HAVE_ISULAD
++	ret = setup_resource_limits(conf, pid, -1);
++#else
+ 	ret = setup_resource_limits(conf, pid);
++#endif
+ 	if (ret < 0)
+ 		goto on_error;
+ 
+@@ -2228,7 +2236,8 @@ int lxc_attach(struct lxc_container *container, lxc_attach_exec_t exec_function,
+ 		goto close_mainloop;
+ 	}
+ 	if (options->attach_flags & LXC_ATTACH_TERMINAL) {
+-		ret = lxc_mainloop_add_handler(&descr, isulad_sigfd, attach_signal_handler, &tmp_pid);
++		ret = lxc_mainloop_add_handler(&descr, isulad_sigfd, attach_signal_handler, default_cleanup_handler, &tmp_pid,
++									   "attach_signal_handler");
+ 		if (ret < 0) {
+ 			ERROR("Failed to add signal handler for %d to mainloop", tmp_pid);
+ 			goto close_mainloop;
+diff --git a/src/lxc/attach_options.h b/src/lxc/attach_options.h
+index a4052fb..fe8bf6d 100644
+--- a/src/lxc/attach_options.h
++++ b/src/lxc/attach_options.h
+@@ -4,6 +4,9 @@
+ #define __LXC_ATTACH_OPTIONS_H
+ 
+ #include <sys/types.h>
++#ifdef HAVE_ISULAD
++#include <stdbool.h>
++#endif
+ 
+ #ifdef  __cplusplus
+ extern "C" {
+diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
+index cecc9bc..4e4ae0c 100644
+--- a/src/lxc/cgroups/cgfsng.c
++++ b/src/lxc/cgroups/cgfsng.c
+@@ -3634,6 +3634,9 @@ static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
+ 			controller_list = unified_controllers(dfd, "cgroup.controllers");
+ 			if (!controller_list) {
+ 				TRACE("No controllers are enabled for delegation in the unified hierarchy");
++#ifdef HAVE_ISULAD
++				ops->no_controller = true;
++#endif
+ 				controller_list = list_new();
+ 				if (!controller_list)
+ 					return syserror_set(-ENOMEM, "Failed to create empty controller list");
+diff --git a/src/lxc/cgroups/cgroup.h b/src/lxc/cgroups/cgroup.h
+index ebfd3a1..d9159f4 100644
+--- a/src/lxc/cgroups/cgroup.h
++++ b/src/lxc/cgroups/cgroup.h
+@@ -208,6 +208,11 @@ struct cgroup_ops {
+ 	char *container_limit_cgroup;
+ 	char *monitor_cgroup;
+ 
++#ifdef HAVE_ISULAD
++	int errfd;
++	bool no_controller;
++#endif
++
+ 	/* @hierarchies
+ 	 * - A NULL-terminated array of struct hierarchy, one per legacy
+ 	 *   hierarchy. No duplicates. First sufficient, writeable mounted
+diff --git a/src/lxc/cgroups/isulad_cgfsng.c b/src/lxc/cgroups/isulad_cgfsng.c
+index 38ad677..1160af5 100644
+--- a/src/lxc/cgroups/isulad_cgfsng.c
++++ b/src/lxc/cgroups/isulad_cgfsng.c
+@@ -34,6 +34,7 @@
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
++#include <sys/epoll.h>
+ #include <sys/types.h>
+ #include <unistd.h>
+ 
+@@ -43,41 +44,55 @@
+ #include "cgroup2_devices.h"
+ #include "cgroup_utils.h"
+ #include "commands.h"
++#include "commands_utils.h"
+ #include "conf.h"
+ #include "config.h"
+ #include "log.h"
+ #include "macro.h"
+ #include "mainloop.h"
+ #include "memory_utils.h"
++#include "open_utils.h"
+ #include "storage/storage.h"
+ #include "utils.h"
+ 
+-#ifndef HAVE_STRLCPY
++#if !HAVE_STRLCPY
+ #include "include/strlcpy.h"
+ #endif
+ 
+-#ifndef HAVE_STRLCAT
++#if !HAVE_STRLCAT
+ #include "include/strlcat.h"
+ #endif
+ 
++#if HAVE_LIBSYSTEMD
++#include <systemd/sd-bus.h>
++#include <systemd/sd-event.h>
++#endif
++
+ lxc_log_define(isulad_cgfsng, cgroup);
+ 
+-/* Given a pointer to a null-terminated array of pointers, realloc to add one
++/*
++ * Given a pointer to a null-terminated array of pointers, realloc to add one
+  * entry, and point the new entry to NULL. Do not fail. Return the index to the
+  * second-to-last entry - that is, the one which is now available for use
+  * (keeping the list null-terminated).
+  */
+-static int append_null_to_list(void ***list)
++static int cg_list_add(void ***list)
+ {
+-	int newentry = 0;
++	int idx = 0;
++	void **p;
+ 
+ 	if (*list)
+-		for (; (*list)[newentry]; newentry++)
++		for (; (*list)[idx]; idx++)
+ 			;
+ 
+-	*list = must_realloc(*list, (newentry + 2) * sizeof(void **));
+-	(*list)[newentry + 1] = NULL;
+-	return newentry;
++	p = realloc(*list, (idx + 2) * sizeof(void **));
++	if (!p)
++		return ret_errno(ENOMEM);
++
++	p[idx + 1] = NULL;
++	*list = p;
++
++	return idx;
+ }
+ 
+ /* Given a null-terminated array of strings, check whether @entry is one of the
+@@ -95,63 +110,10 @@ static bool string_in_list(char **list, const char *entry)
+ 	return false;
+ }
+ 
+-/* Return a copy of @entry prepending "name=", i.e.  turn "systemd" into
+- * "name=systemd". Do not fail.
+- */
+-static char *cg_legacy_must_prefix_named(char *entry)
+-{
+-	size_t len;
+-	char *prefixed;
+-
+-	len = strlen(entry);
+-	prefixed = must_realloc(NULL, len + 6);
+-
+-	memcpy(prefixed, "name=", STRLITERALLEN("name="));
+-	memcpy(prefixed + STRLITERALLEN("name="), entry, len);
+-	prefixed[len + 5] = '\0';
+-
+-	return prefixed;
+-}
+-
+-/* Append an entry to the clist. Do not fail. @clist must be NULL the first time
+- * we are called.
+- *
+- * We also handle named subsystems here. Any controller which is not a kernel
+- * subsystem, we prefix "name=". Any which is both a kernel and named subsystem,
+- * we refuse to use because we're not sure which we have here.
+- * (TODO: We could work around this in some cases by just remounting to be
+- * unambiguous, or by comparing mountpoint contents with current cgroup.)
+- *
+- * The last entry will always be NULL.
+- */
+-static void must_append_controller(char **klist, char **nlist, char ***clist,
+-				   char *entry)
+-{
+-	int newentry;
+-	char *copy;
+-
+-	if (string_in_list(klist, entry) && string_in_list(nlist, entry)) {
+-		ERROR("Refusing to use ambiguous controller \"%s\"", entry);
+-		ERROR("It is both a named and kernel subsystem");
+-		return;
+-	}
+-
+-	newentry = append_null_to_list((void ***)clist);
+-
+-	if (strncmp(entry, "name=", 5) == 0)
+-		copy = must_copy_string(entry);
+-	else if (string_in_list(klist, entry))
+-		copy = must_copy_string(entry);
+-	else
+-		copy = cg_legacy_must_prefix_named(entry);
+-
+-	(*clist)[newentry] = copy;
+-}
+-
+ /* Given a handler's cgroup data, return the struct hierarchy for the controller
+  * @c, or NULL if there is none.
+  */
+-struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
++static struct hierarchy *get_hierarchy(const struct cgroup_ops *ops, const char *controller)
+ {
+ 	if (!ops->hierarchies)
+ 		return log_trace_errno(NULL, errno, "There are no useable cgroup controllers");
+@@ -159,15 +121,28 @@ struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
+ 	for (int i = 0; ops->hierarchies[i]; i++) {
+ 		if (!controller) {
+ 			/* This is the empty unified hierarchy. */
+-			if (ops->hierarchies[i]->controllers &&
+-			    !ops->hierarchies[i]->controllers[0])
++			if (ops->hierarchies[i]->controllers && !ops->hierarchies[i]->controllers[0])
+ 				return ops->hierarchies[i];
++
+ 			continue;
+-		} else if (pure_unified_layout(ops) &&
+-			   strcmp(controller, "devices") == 0) {
+-			if (ops->unified->bpf_device_controller)
+-				return ops->unified;
+-			break;
++		}
++
++		/*
++		 * Handle controllers with significant implementation changes
++		 * from cgroup to cgroup2.
++		 */
++		if (pure_unified_layout(ops)) {
++			if (strequal(controller, "devices")) {
++				if (device_utility_controller(ops->unified))
++					return ops->unified;
++
++				break;
++			} else if (strequal(controller, "freezer")) {
++				if (freezer_utility_controller(ops->unified))
++					return ops->unified;
++
++				break;
++			}
+ 		}
+ 
+ 		if (string_in_list(ops->hierarchies[i]->controllers, controller))
+@@ -182,6 +157,38 @@ struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
+ 	return ret_set_errno(NULL, ENOENT);
+ }
+ 
++int prepare_cgroup_fd(const struct cgroup_ops *ops, struct cgroup_fd *fd, bool limit)
++{
++	int dfd;
++	const struct hierarchy *h;
++
++	h = get_hierarchy(ops, fd->controller);
++	if (!h)
++		return ret_errno(ENOENT);
++
++	/*
++	 * The client requested that the controller must be in a specific
++	 * cgroup version.
++	 */
++	if (fd->type != 0 && (cgroupfs_type_magic_t)fd->type != h->fs_type)
++		return ret_errno(EINVAL);
++
++	if (limit)
++		dfd = h->dfd_con;
++	else
++		dfd = h->dfd_lim;
++	if (dfd < 0)
++		return ret_errno(EBADF);
++
++	fd->layout = ops->cgroup_layout;
++	fd->type = h->fs_type;
++	if (fd->type == UNIFIED_HIERARCHY)
++		fd->utilities = h->utilities;
++	fd->fd = dfd;
++
++	return 0;
++}
++
+ #define BATCH_SIZE 50
+ static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
+ {
+@@ -223,44 +230,24 @@ static char *read_file(const char *fnam)
+ 
+ static inline bool is_unified_hierarchy(const struct hierarchy *h)
+ {
+-	return h->version == CGROUP2_SUPER_MAGIC;
+-}
+-
+-/* Given two null-terminated lists of strings, return true if any string is in
+- * both.
+- */
+-static bool controller_lists_intersect(char **l1, char **l2)
+-{
+-	if (!l1 || !l2)
+-		return false;
+-
+-	for (int i = 0; l1[i]; i++)
+-		if (string_in_list(l2, l1[i]))
+-			return true;
+-
+-	return false;
++	return h->fs_type == UNIFIED_HIERARCHY;
+ }
+ 
+-/* For a null-terminated list of controllers @clist, return true if any of those
+- * controllers is already listed the null-terminated list of hierarchies @hlist.
+- * Realistically, if one is present, all must be present.
+- */
+-static bool controller_list_is_dup(struct hierarchy **hlist, char **clist)
++static char *trim(char *s)
+ {
+-	if (!hlist)
+-		return false;
++	size_t len;
+ 
+-	for (int i = 0; hlist[i]; i++)
+-		if (controller_lists_intersect(hlist[i]->controllers, clist))
+-			return true;
++	len = strlen(s);
++	while ((len > 1) && (s[len - 1] == '\n'))
++		s[--len] = '\0';
+ 
+-	return false;
++	return s;
+ }
+ 
+ /* Return true if the controller @entry is found in the null-terminated list of
+  * hierarchies @hlist.
+  */
+-static bool controller_found(struct hierarchy **hlist, char *entry)
++static bool controller_available(struct hierarchy **hlist, char *entry)
+ {
+ 	if (!hlist)
+ 		return false;
+@@ -272,10 +259,7 @@ static bool controller_found(struct hierarchy **hlist, char *entry)
+ 	return false;
+ }
+ 
+-/* Return true if all of the controllers which we require have been found.  The
+- * required list is  freezer and anything in lxc.cgroup.use.
+- */
+-static bool all_controllers_found(struct cgroup_ops *ops)
++static bool controllers_available(struct cgroup_ops *ops)
+ {
+ 	struct hierarchy **hlist;
+ 
+@@ -284,335 +268,139 @@ static bool all_controllers_found(struct cgroup_ops *ops)
+ 
+ 	hlist = ops->hierarchies;
+ 	for (char **cur = ops->cgroup_use; cur && *cur; cur++)
+-		if (!controller_found(hlist, *cur))
+-			return log_error(false, "No %s controller mountpoint found", *cur);
++		if (!controller_available(hlist, *cur))
++			return log_error(false, "The %s controller found", *cur);
+ 
+ 	return true;
+ }
+ 
+-/* Get the controllers from a mountinfo line There are other ways we could get
+- * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
+- * could parse the mount options. But we simply assume that the mountpoint must
+- * be /sys/fs/cgroup/controller-list
+- */
+-static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line,
+-					int type)
++static char **list_new(void)
+ {
+-	/* The fourth field is /sys/fs/cgroup/comma-delimited-controller-list
+-	 * for legacy hierarchies.
+-	 */
+-	__do_free_string_list char **aret = NULL;
+-	int i;
+-	char *p2, *tok;
+-	char *p = line, *sep = ",";
+-
+-	for (i = 0; i < 4; i++) {
+-		p = strchr(p, ' ');
+-		if (!p)
+-			return NULL;
+-		p++;
+-	}
+-
+-	/* Note, if we change how mountinfo works, then our caller will need to
+-	 * verify /sys/fs/cgroup/ in this field.
+-	 */
+-	if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0)
+-		return log_warn(NULL, "Found hierarchy not under " DEFAULT_CGROUP_MOUNTPOINT ": \"%s\"", p);
+-
+-	p += 15;
+-	p2 = strchr(p, ' ');
+-	if (!p2)
+-		return log_error(NULL, "Corrupt mountinfo");
+-	*p2 = '\0';
+-
+-	if (type == CGROUP_SUPER_MAGIC) {
+-		__do_free char *dup = NULL;
+-
+-		/* strdup() here for v1 hierarchies. Otherwise
+-		 * lxc_iterate_parts() will destroy mountpoints such as
+-		 * "/sys/fs/cgroup/cpu,cpuacct".
+-		 */
+-		dup = must_copy_string(p);
+-		if (!dup)
+-			return NULL;
+-
+-		lxc_iterate_parts (tok, dup, sep)
+-			must_append_controller(klist, nlist, &aret, tok);
+-	}
+-	*p2 = ' ';
+-
+-	return move_ptr(aret);
+-}
++	__do_free_string_list char **list = NULL;
++	int idx;
+ 
+-static char **cg_unified_make_empty_controller(void)
+-{
+-	__do_free_string_list char **aret = NULL;
+-	int newentry;
++	idx = cg_list_add((void ***)&list);
++	if (idx < 0)
++		return NULL;
+ 
+-	newentry = append_null_to_list((void ***)&aret);
+-	aret[newentry] = NULL;
+-	return move_ptr(aret);
++	list[idx] = NULL;
++	return move_ptr(list);
+ }
+ 
+-static char **cg_unified_get_controllers(const char *file)
++static int list_add_string(char ***list, char *entry)
+ {
+-	__do_free char *buf = NULL;
+-	__do_free_string_list char **aret = NULL;
+-	char *sep = " \t\n";
+-	char *tok;
+-
+-	buf = read_file(file);
+-	if (!buf)
+-		return NULL;
++	__do_free char *dup = NULL;
++	int idx;
+ 
+-	lxc_iterate_parts(tok, buf, sep) {
+-		int newentry;
+-		char *copy;
++	dup = strdup(entry);
++	if (!dup)
++		return ret_errno(ENOMEM);
+ 
+-		newentry = append_null_to_list((void ***)&aret);
+-		copy = must_copy_string(tok);
+-		aret[newentry] = copy;
+-	}
++	idx = cg_list_add((void ***)list);
++	if (idx < 0)
++		return idx;
+ 
+-	return move_ptr(aret);
++	(*list)[idx] = move_ptr(dup);
++	return 0;
+ }
+ 
+-static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char *mountpoint,
+-				       char *container_base_path, int type)
++static char **list_add_controllers(char *controllers)
+ {
+-	struct hierarchy *new;
+-	int newentry;
++	__do_free_string_list char **list = NULL;
++	char *it;
+ 
+-	new = zalloc(sizeof(*new));
+-	new->controllers = clist;
+-	new->at_mnt = mountpoint;
+-	new->at_base = container_base_path;
+-	new->fs_type = type;
+-	new->dfd_con = -EBADF;
+-	new->dfd_mon = -EBADF;
+-
+-	newentry = append_null_to_list((void ***)h);
+-	(*h)[newentry] = new;
+-	return new;
+-}
+-
+-/* Get a copy of the mountpoint from @line, which is a line from
+- * /proc/self/mountinfo.
+- */
+-static char *cg_hybrid_get_mountpoint(char *line)
+-{
+-	char *p = line, *sret = NULL;
+-	size_t len;
+-	char *p2;
++	lxc_iterate_parts(it, controllers, ", \t\n") {
++		int ret;
+ 
+-	for (int i = 0; i < 4; i++) {
+-		p = strchr(p, ' ');
+-		if (!p)
++		ret = list_add_string(&list, it);
++		if (ret < 0)
+ 			return NULL;
+-		p++;
+ 	}
+ 
+-	if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0)
+-		return NULL;
+-
+-	p2 = strchr(p + 15, ' ');
+-	if (!p2)
+-		return NULL;
+-	*p2 = '\0';
+-
+-	len = strlen(p);
+-	sret = must_realloc(NULL, len + 1);
+-	memcpy(sret, p, len);
+-	sret[len] = '\0';
+-
+-	return sret;
++	return move_ptr(list);
+ }
+ 
+-/* Given a multi-line string, return a null-terminated copy of the current line. */
+-static char *copy_to_eol(char *p)
++static char **unified_controllers(int dfd, const char *file)
+ {
+-	char *p2, *sret;
+-	size_t len;
++	__do_free char *buf = NULL;
+ 
+-	p2 = strchr(p, '\n');
+-	if (!p2)
++	buf = read_file_at(dfd, file, PROTECT_OPEN, 0);
++	if (!buf)
+ 		return NULL;
+ 
+-	len = p2 - p;
+-	sret = must_realloc(NULL, len + 1);
+-	memcpy(sret, p, len);
+-	sret[len] = '\0';
+-
+-	return sret;
++	return list_add_controllers(buf);
+ }
+ 
+-/* cgline: pointer to character after the first ':' in a line in a \n-terminated
+- * /proc/self/cgroup file. Check whether controller c is present.
+- */
+-static bool controller_in_clist(char *cgline, char *c)
++static bool skip_hierarchy(const struct cgroup_ops *ops, char **controllers)
+ {
+-	__do_free char *tmp = NULL;
+-	char *tok, *eol;
+-	size_t len;
+-
+-	eol = strchr(cgline, ':');
+-	if (!eol)
++	if (!ops->cgroup_use)
+ 		return false;
+ 
+-	len = eol - cgline;
+-	tmp = must_realloc(NULL, len + 1);
+-	memcpy(tmp, cgline, len);
+-	tmp[len] = '\0';
+-
+-	lxc_iterate_parts(tok, tmp, ",")
+-		if (strcmp(tok, c) == 0)
+-			return true;
+-
+-	return false;
+-}
+-
+-/* @basecginfo is a copy of /proc/$$/cgroup. Return the current cgroup for
+- * @controller.
+- */
+-static char *cg_hybrid_get_current_cgroup(char *basecginfo, char *controller,
+-					  int type)
+-{
+-	char *p = basecginfo;
+-
+-	for (;;) {
+-		bool is_cgv2_base_cgroup = false;
+-
+-		/* cgroup v2 entry in "/proc/<pid>/cgroup": "0::/some/path" */
+-		if ((type == CGROUP2_SUPER_MAGIC) && (*p == '0'))
+-			is_cgv2_base_cgroup = true;
++	for (char **cur_ctrl = controllers; cur_ctrl && *cur_ctrl; cur_ctrl++) {
++		bool found = false;
+ 
+-		p = strchr(p, ':');
+-		if (!p)
+-			return NULL;
+-		p++;
++		for (char **cur_use = ops->cgroup_use; cur_use && *cur_use; cur_use++) {
++			if (!strequal(*cur_use, *cur_ctrl))
++				continue;
+ 
+-		if (is_cgv2_base_cgroup || (controller && controller_in_clist(p, controller))) {
+-			p = strchr(p, ':');
+-			if (!p)
+-				return NULL;
+-			p++;
+-			return copy_to_eol(p);
++			found = true;
++			break;
+ 		}
+ 
+-		p = strchr(p, '\n');
+-		if (!p)
+-			return NULL;
+-		p++;
+-	}
+-}
+-
+-static void must_append_string(char ***list, char *entry)
+-{
+-	int newentry;
+-	char *copy;
+-
+-	newentry = append_null_to_list((void ***)list);
+-	copy = must_copy_string(entry);
+-	(*list)[newentry] = copy;
+-}
+-
+-static int get_existing_subsystems(char ***klist, char ***nlist)
+-{
+-	__do_free char *line = NULL;
+-	__do_fclose FILE *f = NULL;
+-	size_t len = 0;
+-
+-	f = fopen("/proc/self/cgroup", "re");
+-	if (!f)
+-		return -1;
+-
+-	while (getline(&line, &len, f) != -1) {
+-		char *p, *p2, *tok;
+-		p = strchr(line, ':');
+-		if (!p)
+-			continue;
+-		p++;
+-		p2 = strchr(p, ':');
+-		if (!p2)
+-			continue;
+-		*p2 = '\0';
+-
+-		/* If the kernel has cgroup v2 support, then /proc/self/cgroup
+-		 * contains an entry of the form:
+-		 *
+-		 *	0::/some/path
+-		 *
+-		 * In this case we use "cgroup2" as controller name.
+-		 */
+-		if ((p2 - p) == 0) {
+-			must_append_string(klist, "cgroup2");
++		if (found)
+ 			continue;
+-		}
+ 
+-		lxc_iterate_parts(tok, p, ",") {
+-			if (strncmp(tok, "name=", 5) == 0)
+-				must_append_string(nlist, tok);
+-			else
+-				must_append_string(klist, tok);
+-		}
++		return true;
+ 	}
+ 
+-	return 0;
++	return false;
+ }
+ 
+-static char *trim(char *s)
++static int cgroup_hierarchy_add(struct cgroup_ops *ops, int dfd_mnt, char *mnt,
++				int dfd_base, char *base_cgroup,
++				char **controllers, cgroupfs_type_magic_t fs_type)
+ {
+-	size_t len;
+-
+-	len = strlen(s);
+-	while ((len > 1) && (s[len - 1] == '\n'))
+-		s[--len] = '\0';
++	__do_free struct hierarchy *new = NULL;
++	int idx;
+ 
+-	return s;
+-}
++	if (abspath(base_cgroup))
++		return syserror_set(-EINVAL, "Container base path must be relative to controller mount");
+ 
+-static void lxc_cgfsng_print_hierarchies(struct cgroup_ops *ops)
+-{
+-	int i;
+-	struct hierarchy **it;
++	new = zalloc(sizeof(*new));
++	if (!new)
++		return ret_errno(ENOMEM);
+ 
+-	if (!ops->hierarchies) {
+-		TRACE("  No hierarchies found");
+-		return;
+-	}
++	new->dfd_con		= -EBADF;
++	new->dfd_lim		= -EBADF;
++	new->dfd_mon		= -EBADF;
+ 
+-	TRACE("  Hierarchies:");
+-	for (i = 0, it = ops->hierarchies; it && *it; it++, i++) {
+-		int j;
+-		char **cit;
++	new->fs_type		= fs_type;
++	new->controllers	= controllers;
++	new->at_mnt		= mnt;
++	new->at_base		= base_cgroup;
+ 
+-		TRACE("  %d: base_cgroup: %s", i, (*it)->at_base ? (*it)->at_base : "(null)");
+-		TRACE("      at_mnt:  %s", (*it)->at_mnt ? (*it)->at_mnt : "(null)");
+-		TRACE("      controllers:");
+-		for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++)
+-			TRACE("      %d: %s", j, *cit);
+-	}
+-}
++	new->dfd_mnt		= dfd_mnt;
++	new->dfd_base		= dfd_base;
+ 
+-static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist,
+-					      char **nlist)
+-{
+-	int k;
+-	char **it;
++	TRACE("Adding cgroup hierarchy mounted at %s and base cgroup %s",
++	      mnt, maybe_empty(base_cgroup));
++	for (char *const *it = new->controllers; it && *it; it++)
++		TRACE("The hierarchy contains the %s controller", *it);
+ 
+-	TRACE("basecginfo is:");
+-	TRACE("%s", basecginfo);
++	idx = cg_list_add((void ***)&ops->hierarchies);
++	if (idx < 0)
++		return ret_errno(idx);
+ 
+-	for (k = 0, it = klist; it && *it; it++, k++)
+-		TRACE("kernel subsystem %d: %s", k, *it);
++	if (fs_type == UNIFIED_HIERARCHY)
++		ops->unified = new;
++	(ops->hierarchies)[idx] = move_ptr(new);
+ 
+-	for (k = 0, it = nlist; it && *it; it++, k++)
+-		TRACE("named subsystem %d: %s", k, *it);
++	return 0;
+ }
+ 
+ struct generic_userns_exec_data {
+ 	struct hierarchy **hierarchies;
+-	const char *container_cgroup;
++	const char *path_prune;
+ 	struct lxc_conf *conf;
+ 	uid_t origuid; /* target uid in parent namespace */
+ 	char *path;
+@@ -655,7 +443,7 @@ static int isulad_cgroup_tree_remove_wrapper(void *data)
+ 	gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
+ 	int ret;
+ 
+-	if (!lxc_setgroups(0, NULL) && errno != EPERM)
++	if (!lxc_drop_groups() && errno != EPERM)
+ 		return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)");
+ 
+ 	ret = setresgid(nsgid, nsgid, nsgid);
+@@ -668,7 +456,7 @@ static int isulad_cgroup_tree_remove_wrapper(void *data)
+ 		return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)",
+ 				       (int)nsuid, (int)nsuid, (int)nsuid);
+ 
+-	return isulad_cgroup_tree_remove(arg->hierarchies, arg->container_cgroup);
++	return isulad_cgroup_tree_remove(arg->hierarchies, arg->path_prune);
+ }
+ 
+ __cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops,
+@@ -707,10 +495,10 @@ __cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops,
+ 		WARN("Failed to detach bpf program from cgroup");
+ #endif
+ 
+-	if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) {
++	if (!list_empty(&handler->conf->id_map) && !handler->am_root) {
+ 		struct generic_userns_exec_data wrap = {
+ 			.conf			= handler->conf,
+-			.container_cgroup	= ops->container_cgroup,
++			.path_prune		= ops->container_limit_cgroup,
+ 			.hierarchies		= ops->hierarchies,
+ 			.origuid		= 0,
+ 		};
+@@ -733,58 +521,408 @@ __cgfsng_ops static void isulad_cgfsng_monitor_destroy(struct cgroup_ops *ops,
+ 	return;
+ }
+ 
+-__cgfsng_ops static inline bool isulad_cgfsng_monitor_create(struct cgroup_ops *ops,
+-						      struct lxc_handler *handler)
++#define SYSTEMD_SCOPE_FAILED 2
++#define SYSTEMD_SCOPE_UNSUPP 1
++#define SYSTEMD_SCOPE_SUCCESS 0
++
++#if HAVE_LIBSYSTEMD
++struct sd_callback_data {
++	char *scope_name;
++	bool job_complete;
++};
++
++static int systemd_jobremoved_callback(sd_bus_message *m, void *userdata, sd_bus_error *error)
+ {
+-	return true;
++	char *path, *unit, *result;
++	struct sd_callback_data *sd_data = userdata;
++	uint32_t id;
++	int r;
++
++	r = sd_bus_message_read(m, "uoss", &id, &path, &unit, &result);
++	if (r < 0)
++		return log_error(-1, "bad message received in callback: %s", strerror(-r));
++
++	if (sd_data->scope_name && strcmp(unit, sd_data->scope_name) != 0)
++		return log_trace(-1, "unit was '%s' not '%s'", unit, sd_data->scope_name);
++	if (strcmp(result, "done") == 0) {
++		sd_data->job_complete = true;
++		return log_info(1, "job is done");
++	}
++	return log_debug(0, "result was '%s', not 'done'", result);
+ }
+ 
+-static bool isulad_copy_parent_file(char *path, char *file)
++#define DESTINATION "org.freedesktop.systemd1"
++#define PATH "/org/freedesktop/systemd1"
++#define INTERFACE "org.freedesktop.systemd1.Manager"
++#define MEMBER "StartTransientUnit"
++static bool start_scope(sd_bus *bus, struct sd_callback_data *data, struct sd_event *event)
+ {
+-	int ret;
+-	int len = 0;
+-	char *value = NULL;
+-	char *current = NULL;
+-	char *fpath = NULL;
+-	char *lastslash = NULL;
+-	char oldv;
+-
+-	fpath = must_make_path(path, file, NULL);
+-	current = read_file(fpath);
+-
+-	if (current == NULL) {
+-		SYSERROR("Failed to read file \"%s\"", fpath);
+-		free(fpath);
+-		return false;
++	__attribute__((__cleanup__(sd_bus_error_free))) sd_bus_error error = SD_BUS_ERROR_NULL;;
++	__attribute__((__cleanup__(sd_bus_message_unrefp))) sd_bus_message *reply = NULL;
++	__attribute__((__cleanup__(sd_bus_message_unrefp))) sd_bus_message *m = NULL;
++	char *path = NULL;
++	int r;
++
++	r = sd_bus_message_new_method_call(bus, &m,
++		DESTINATION, PATH, INTERFACE, MEMBER);
++	if (r < 0)
++		return log_error(false, "Failed creating sdbus message");
++
++	r = sd_bus_message_append(m, "ss", data->scope_name, "fail");
++	if (r < 0)
++		return log_error(false, "Failed setting systemd scope name");
++
++	r = sd_bus_message_open_container(m, 'a', "(sv)");
++	if (r < 0)
++		return log_error(false, "Failed allocating sdbus msg properties");
++
++	r = sd_bus_message_append(m, "(sv)(sv)(sv)",
++		"PIDs", "au", 1, getpid(),
++		"Delegate", "b", 1,
++		"CollectMode", "s", "inactive-or-failed");
++	if (r < 0)
++		return log_error(false, "Failed setting properties on sdbus message");
++
++	r = sd_bus_message_close_container(m);
++	if (r < 0)
++		return log_error(false, "Failed closing sdbus message properties");
++
++	r = sd_bus_message_append(m, "a(sa(sv))", 0);
++	if (r < 0)
++		return log_error(false, "Failed appending aux boilerplate\n");
++
++	r = sd_bus_call(NULL, m, 0, &error, &reply);
++	if (r < 0)
++		return log_error(false,  "Failed sending sdbus message: %s", error.message);
++
++	/* Parse the response message */
++	r = sd_bus_message_read(reply, "o", &path);
++	if (r < 0)
++		return log_error(false, "Failed to parse response message: %s", strerror(-r));
++
++	/* Now spin up a mini-event-loop to wait for the "job completed" message */
++	int tries = 0;
++
++	while (!data->job_complete) {
++		r = sd_event_run(event, 1000 * 1000);
++		if (r < 0) {
++			log_debug(stderr, "Error waiting for JobRemoved: %s\n", strerror(-r));
++			continue;
++		}
++		if (data->job_complete || tries == 5)
++			break;
++		if (r > 0) {
++			log_trace(stderr, "Debug: we processed an event (%d), but not the one we wanted\n", r);
++			continue;
++		}
++		if (r == 0) // timeout
++			tries++;
+ 	}
+-
+-	if (strcmp(current, "\n") != 0) {
+-		free(fpath);
+-		free(current);
+-		return true;
++	if (!data->job_complete) {
++		return log_error(false, "Error: %s job was never removed", data->scope_name);
+ 	}
++	return true;
++}
+ 
+-	free(fpath);
+-	free(current);
++static bool string_pure_unified_system(char *contents)
++{
++	char *p;
++	bool first_line_read = false;
+ 
+-	lastslash = strrchr(path, '/');
+-	if (lastslash == NULL) {
+-		ERROR("Failed to detect \"/\" in \"%s\"", path);
+-		return false;
++	lxc_iterate_parts(p, contents, "\n") {
++		if (first_line_read) // if >1 line, this is not pure unified
++			return false;
++		first_line_read = true;
++
++		if (strlen(p) > 3 && strncmp(p, "0:", 2) == 0)
++			return true;
+ 	}
+-	oldv = *lastslash;
+-	*lastslash = '\0';
+-	fpath = must_make_path(path, file, NULL);
+-	*lastslash = oldv;
+-	len = lxc_read_from_file(fpath, NULL, 0);
+-	if (len <= 0)
+-		goto on_error;
+ 
+-	value = must_realloc(NULL, len + 1);
+-	ret = lxc_read_from_file(fpath, value, len);
+-	if (ret != len)
+-		goto on_error;
+-	free(fpath);
++	return false;
++}
++
++/*
++ * Only call get_current_unified_cgroup() when we are in a pure
++ * unified (v2-only) cgroup
++ */
++static char *get_current_unified_cgroup(void)
++{
++	__do_free char *buf = NULL;
++	__do_free_string_list char **list = NULL;
++	char *p;
++
++	buf = read_file_at(-EBADF, "/proc/self/cgroup", PROTECT_OPEN, 0);
++	if (!buf)
++		return NULL;
++
++	if (!string_pure_unified_system(buf))
++		return NULL;
++
++	// 0::/user.slice/user-1000.slice/session-136.scope
++	// Get past the "0::"
++	p = buf;
++	if (strnequal(p, "0::", STRLITERALLEN("0::")))
++		p += STRLITERALLEN("0::");
++
++	return strdup(p);
++}
++
++static bool pure_unified_system(void)
++{
++	__do_free char *buf = NULL;
++
++	buf = read_file_at(-EBADF, "/proc/self/cgroup", PROTECT_OPEN, 0);
++	if (!buf)
++		return false;
++
++	return string_pure_unified_system(buf);
++}
++
++#define MEMBER_JOIN "AttachProcessesToUnit"
++static bool enter_scope(char *scope_name, pid_t pid)
++{
++	__attribute__((__cleanup__(sd_bus_unrefp))) sd_bus *bus = NULL;
++	__attribute__((__cleanup__(sd_bus_error_free))) sd_bus_error error = SD_BUS_ERROR_NULL;;
++	__attribute__((__cleanup__(sd_bus_message_unrefp))) sd_bus_message *reply = NULL;
++	__attribute__((__cleanup__(sd_bus_message_unrefp))) sd_bus_message *m = NULL;
++	int r;
++
++	r = sd_bus_open_user(&bus);
++	if (r < 0)
++		return log_error(false, "Failed to connect to user bus: %s", strerror(-r));
++
++	r = sd_bus_message_new_method_call(bus, &m,
++		DESTINATION, PATH, INTERFACE, MEMBER_JOIN);
++	if (r < 0)
++		return log_error(false, "Failed creating sdbus message");
++
++	r = sd_bus_message_append(m, "ssau", scope_name, "/init", 1, pid);
++	if (r < 0)
++		return log_error(false, "Failed setting systemd scope name");
++
++
++	r = sd_bus_call(NULL, m, 0, &error, &reply);
++	if (r < 0)
++		return log_error(false,  "Failed sending sdbus message: %s", error.message);
++
++	return true;
++}
++
++static bool enable_controllers_delegation(int fd_dir, char *cg)
++{
++	__do_free char *rbuf = NULL;
++	__do_free char *wbuf = NULL;
++	__do_free_string_list char **cpulist = NULL;
++	char *controller;
++	size_t full_len = 0;
++	bool first = true;
++	int ret;
++
++	rbuf = read_file_at(fd_dir, "cgroup.controllers", PROTECT_OPEN, 0);
++	if (!rbuf)
++		return false;
++
++	lxc_iterate_parts(controller, rbuf, " ") {
++		full_len += strlen(controller) + 2;
++		wbuf = must_realloc(wbuf, full_len + 1);
++		if (first) {
++			wbuf[0] = '\0';
++			first = false;
++		} else {
++			(void)strlcat(wbuf, " ", full_len + 1);
++		}
++		strlcat(wbuf, "+", full_len + 1);
++		strlcat(wbuf, controller, full_len + 1);
++	}
++	if (!wbuf)
++		return log_debug(true, "No controllers to delegate!");
++
++	ret = lxc_writeat(fd_dir, "cgroup.subtree_control", wbuf, strlen(wbuf));
++	if (ret < 0)
++		return log_error_errno(false, errno, "Failed to write \"%s\" to %s/cgroup.subtree_control", wbuf, cg);
++
++	return true;
++}
++
++/*
++ * systemd places us in say .../lxc-1.scope.  We create lxc-1.scope/init,
++ * move ourselves to there, then enable controllers in lxc-1.scope
++ */
++static bool move_and_delegate_unified(char *parent_cgroup)
++{
++	__do_free char *buf = NULL;
++	__do_close int fd_parent = -EBADF;
++	int ret;
++
++	fd_parent = open_at(-EBADF, parent_cgroup, O_DIRECTORY, 0, 0);
++	if (fd_parent < 0)
++		return syserror_ret(false, "Failed opening cgroup dir \"%s\"", parent_cgroup);
++
++	ret = mkdirat(fd_parent, "init", 0755);
++	if (ret < 0 && errno != EEXIST)
++		return syserror_ret(false, "Failed to create \"%d/init\" cgroup", fd_parent);
++
++	buf = read_file_at(fd_parent, "cgroup.procs", PROTECT_OPEN, 0);
++	if (!buf)
++		return false;
++
++	ret = lxc_writeat(fd_parent, "init/cgroup.procs", buf, strlen(buf));
++	if (ret)
++		return syserror_ret(false, "Failed to escape to cgroup \"init/cgroup.procs\"");
++
++	/* enable controllers in parent_cgroup */
++	return enable_controllers_delegation(fd_parent, parent_cgroup);
++}
++
++static int unpriv_systemd_create_scope(struct cgroup_ops *ops, struct lxc_conf *conf)
++{
++	__do_free char *full_scope_name = NULL;
++	__do_free char *fs_cg_path = NULL;
++	sd_event *event = NULL;
++	__attribute__((__cleanup__(sd_bus_unrefp))) sd_bus *bus = NULL; // free the bus before the names it references, just to be sure
++	struct sd_callback_data sd_data;
++	int idx = 0;
++	size_t len;
++	int r;
++
++	if (geteuid() == 0)
++		return log_info(SYSTEMD_SCOPE_UNSUPP, "Running privileged, not using a systemd unit");
++	// Pure_unified_layout() can't be used as that info is not yet setup.  At
++	// the same time, we don't want to calculate current cgroups until after
++	// we optionally enter a new systemd user scope.  So let's just do a quick
++	// check for pure unified cgroup system: single line /proc/self/cgroup with
++	// only index '0:'
++	if (!pure_unified_system())
++		return log_info(SYSTEMD_SCOPE_UNSUPP, "Not in unified layout, not using a systemd unit");
++
++	r = sd_bus_open_user(&bus);
++	if (r < 0)
++		return log_error(SYSTEMD_SCOPE_FAILED, "Failed to connect to user bus: %s", strerror(-r));
++
++	r = sd_bus_call_method_async(bus, NULL, DESTINATION, PATH, INTERFACE, "Subscribe", NULL, NULL, NULL);
++	if (r < 0)
++		return log_error(SYSTEMD_SCOPE_FAILED, "Failed to subscribe to signals: %s", strerror(-r));
++
++	sd_data.job_complete = false;
++	sd_data.scope_name = NULL;
++	r = sd_bus_match_signal(bus,
++		NULL, // no slot
++		DESTINATION, PATH, INTERFACE, "JobRemoved",
++		systemd_jobremoved_callback, &sd_data);
++	if (r < 0)
++		return log_error(SYSTEMD_SCOPE_FAILED, "Failed to register systemd event loop signal handler: %s", strerror(-r));
++
++	// NEXT: create and attach event
++	r = sd_event_new(&event);
++	if (r < 0)
++		return log_error(SYSTEMD_SCOPE_FAILED, "Failed allocating new event: %s\n", strerror(-r));
++	r = sd_bus_attach_event(bus, event, SD_EVENT_PRIORITY_NORMAL);
++	if (r < 0) {
++		// bus won't clean up event since the attach failed
++		sd_event_unrefp(&event);
++		return log_error(SYSTEMD_SCOPE_FAILED, "Failed attaching event: %s\n", strerror(-r));
++	}
++
++	// "lxc-" + (conf->name) + "-NN" + ".scope" + '\0'
++	len = STRLITERALLEN("lxc-") + strlen(conf->name) + 3 + STRLITERALLEN(".scope") + 1;
++	full_scope_name = malloc(len);
++	if (!full_scope_name)
++		return syserror("Out of memory");
++
++	do {
++		r = strnprintf(full_scope_name, len, "lxc-%s-%d.scope", conf->name, idx);
++		if (r < 0)
++			return log_error_errno(-1, errno, "Failed to build scope name for \"%s\"", conf->name);
++		sd_data.scope_name = full_scope_name;
++		if (start_scope(bus, &sd_data, event)) {
++			conf->cgroup_meta.systemd_scope = get_current_unified_cgroup();
++			if (!conf->cgroup_meta.systemd_scope)
++				return log_trace(SYSTEMD_SCOPE_FAILED, "Out of memory");
++			fs_cg_path = must_make_path("/sys/fs/cgroup", conf->cgroup_meta.systemd_scope, NULL);
++			if (!move_and_delegate_unified(fs_cg_path))
++				return log_error(SYSTEMD_SCOPE_FAILED, "Failed delegating the controllers to our cgroup");
++			return log_trace(SYSTEMD_SCOPE_SUCCESS, "Created systemd scope %s", full_scope_name);
++		}
++		idx++;
++	} while (idx < 99);
++
++	return SYSTEMD_SCOPE_FAILED; // failed, let's try old-school after all
++}
++#else /* !HAVE_LIBSYSTEMD */
++static int unpriv_systemd_create_scope(struct cgroup_ops *ops, struct lxc_conf *conf)
++{
++	TRACE("unpriv_systemd_create_scope: no systemd support");
++	return SYSTEMD_SCOPE_UNSUPP; // not supported
++}
++#endif /* HAVE_LIBSYSTEMD */
++
++// Return a duplicate of cgroup path @cg without leading /, so
++// that caller can own+free it and be certain it's not abspath.
++static char *cgroup_relpath(char *cg)
++{
++	char *p;
++
++	if (!cg || strequal(cg, "/"))
++		return NULL;
++	p = strdup(deabs(cg));
++	if (!p)
++		return ERR_PTR(-ENOMEM);
++
++	return p;
++}
++
++__cgfsng_ops static inline bool isulad_cgfsng_monitor_create(struct cgroup_ops *ops,
++						      struct lxc_handler *handler)
++{
++	return true;
++}
++
++static bool isulad_copy_parent_file(char *path, char *file)
++{
++	int ret;
++	int len = 0;
++	char *value = NULL;
++	char *current = NULL;
++	char *fpath = NULL;
++	char *lastslash = NULL;
++	char oldv;
++
++	fpath = must_make_path(path, file, NULL);
++	current = read_file(fpath);
++
++	if (current == NULL) {
++		SYSERROR("Failed to read file \"%s\"", fpath);
++		free(fpath);
++		return false;
++	}
++
++	if (strcmp(current, "\n") != 0) {
++		free(fpath);
++		free(current);
++		return true;
++	}
++
++	free(fpath);
++	free(current);
++
++	lastslash = strrchr(path, '/');
++	if (lastslash == NULL) {
++		ERROR("Failed to detect \"/\" in \"%s\"", path);
++		return false;
++	}
++	oldv = *lastslash;
++	*lastslash = '\0';
++	fpath = must_make_path(path, file, NULL);
++	*lastslash = oldv;
++	len = lxc_read_from_file(fpath, NULL, 0);
++	if (len <= 0)
++		goto on_error;
++
++	value = must_realloc(NULL, len + 1);
++	ret = lxc_read_from_file(fpath, value, len);
++	if (ret != len)
++		goto on_error;
++	free(fpath);
+ 
+ 	fpath = must_make_path(path, file, NULL);
+ 	ret = lxc_write_to_file(fpath, value, len, false, 0666);
+@@ -926,8 +1064,8 @@ static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, int err
+ 		return false;
+ 	}
+ 
+-	h->cgfd_con = lxc_open_dirfd(path);
+-	if (h->cgfd_con < 0)
++	h->dfd_con = lxc_open_dirfd(path);
++	if (h->dfd_con < 0)
+ 		return log_error_errno(false, errno, "Failed to open %s", path);
+ 
+ 	if (h->path_con == NULL) {
+@@ -1071,7 +1209,7 @@ static int chown_cgroup_wrapper(void *data)
+ 	uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
+ 	gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
+ 
+-	if (!lxc_setgroups(0, NULL) && errno != EPERM)
++	if (!lxc_drop_groups() && errno != EPERM)
+ 		return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)");
+ 
+ 	ret = setresgid(nsgid, nsgid, nsgid);
+@@ -1089,7 +1227,10 @@ static int chown_cgroup_wrapper(void *data)
+ 		destuid = 0;
+ 
+ 	for (int i = 0; arg->hierarchies[i]; i++) {
+-		int dirfd = arg->hierarchies[i]->cgfd_con;
++		int dirfd = arg->hierarchies[i]->dfd_con;
++
++		if (dirfd < 0)
++			return syserror_set(-EBADF, "Invalid cgroup file descriptor");
+ 
+ 		(void)fchowmodat(dirfd, "", destuid, nsgid, 0775);
+ 
+@@ -1101,15 +1242,15 @@ static int chown_cgroup_wrapper(void *data)
+ 		 * files (which systemd in wily insists on doing).
+ 		 */
+ 
+-		if (arg->hierarchies[i]->fs_type == CGROUP_SUPER_MAGIC)
++		if (arg->hierarchies[i]->fs_type == LEGACY_HIERARCHY)
+ 			(void)fchowmodat(dirfd, "tasks", destuid, nsgid, 0664);
+ 
+ 		(void)fchowmodat(dirfd, "cgroup.procs", destuid, nsgid, 0664);
+ 
+-		if (arg->hierarchies[i]->fs_type != CGROUP2_SUPER_MAGIC)
++		if (arg->hierarchies[i]->fs_type != UNIFIED_HIERARCHY)
+ 			continue;
+ 
+-		for (char **p = arg->hierarchies[i]->cgroup2_chown; p && *p; p++)
++		for (char **p = arg->hierarchies[i]->delegate; p && *p; p++)
+ 			(void)fchowmodat(dirfd, *p, destuid, nsgid, 0664);
+ 	}
+ 
+@@ -1133,7 +1274,7 @@ __cgfsng_ops static bool isulad_cgfsng_chown(struct cgroup_ops *ops,
+ 	if (!conf)
+ 		return ret_set_errno(false, EINVAL);
+ 
+-	if (lxc_list_empty(&conf->id_map))
++	if (list_empty(&conf->id_map))
+ 		return true;
+ 
+ 	wrap.origuid = geteuid();
+@@ -1147,7 +1288,7 @@ __cgfsng_ops static bool isulad_cgfsng_chown(struct cgroup_ops *ops,
+ 	return true;
+ }
+ 
+-__cgfsng_ops void isulad_cgfsng_payload_finalize(struct cgroup_ops *ops)
++__cgfsng_ops static void isulad_cgfsng_finalize(struct cgroup_ops *ops)
+ {
+ 	if (!ops)
+ 		return;
+@@ -1164,15 +1305,33 @@ __cgfsng_ops void isulad_cgfsng_payload_finalize(struct cgroup_ops *ops)
+ 
+ 	for (int i = 0; ops->hierarchies[i]; i++) {
+ 		struct hierarchy *h = ops->hierarchies[i];
+-		/*
+-		 * we don't keep the fds for non-unified hierarchies around
+-		 * mainly because we don't make use of them anymore after the
+-		 * core cgroup setup is done but also because there are quite a
+-		 * lot of them.
+-		 */
+-		if (!is_unified_hierarchy(h))
+-			close_prot_errno_disarm(h->cgfd_con);
++
++		/* Close all monitor cgroup file descriptors. */
++		close_prot_errno_disarm(h->dfd_mon);
+ 	}
++	/* Close the cgroup root file descriptor. */
++	close_prot_errno_disarm(ops->dfd_mnt);
++
++	/*
++	 * The checking for freezer support should obviously be done at cgroup
++	 * initialization time but that doesn't work reliable. The freezer
++	 * controller has been demoted (rightly so) to a simple file located in
++	 * each non-root cgroup. At the time when the container is created we
++	 * might still be located in /sys/fs/cgroup and so checking for
++	 * cgroup.freeze won't tell us anything because this file doesn't exist
++	 * in the root cgroup. We could then iterate through /sys/fs/cgroup and
++	 * find an already existing cgroup and then check within that cgroup
++	 * for the existence of cgroup.freeze but that will only work on
++	 * systemd based hosts. Other init systems might not manage cgroups and
++	 * so no cgroup will exist. So we defer until we have created cgroups
++	 * for our container which means we check here.
++	 */
++        if (pure_unified_layout(ops) &&
++            !faccessat(ops->unified->dfd_con, "cgroup.freeze", F_OK,
++                       AT_SYMLINK_NOFOLLOW)) {
++		TRACE("Unified hierarchy supports freezer");
++		ops->unified->utilities |= FREEZER_CONTROLLER;
++        }
+ }
+ 
+ /* cgroup-full:* is done, no need to create subdirs */
+@@ -1235,6 +1394,118 @@ static int cg_legacy_mount_controllers(int type, struct hierarchy *h,
+ 	return 0;
+ }
+ 
++/* __cgroupfs_mount
++ *
++ * Mount cgroup hierarchies directly without using bind-mounts. The main
++ * uses-cases are mounting cgroup hierarchies in cgroup namespaces and mounting
++ * cgroups for the LXC_AUTO_CGROUP_FULL option.
++ */
++static int __cgroupfs_mount(int cgroup_automount_type, struct hierarchy *h,
++			    struct lxc_rootfs *rootfs, int dfd_mnt_cgroupfs,
++			    const char *hierarchy_mnt)
++{
++	__do_close int fd_fs = -EBADF;
++	unsigned int flags = 0;
++	char *fstype;
++	int ret;
++
++	if (dfd_mnt_cgroupfs < 0)
++		return ret_errno(EINVAL);
++
++	flags |= MOUNT_ATTR_NOSUID;
++	flags |= MOUNT_ATTR_NOEXEC;
++	flags |= MOUNT_ATTR_NODEV;
++	flags |= MOUNT_ATTR_RELATIME;
++
++	if ((cgroup_automount_type == LXC_AUTO_CGROUP_RO) ||
++	    (cgroup_automount_type == LXC_AUTO_CGROUP_FULL_RO) ||
++	    (cgroup_automount_type == LXC_AUTO_CGROUP2_RO))
++		flags |= MOUNT_ATTR_RDONLY;
++
++	if (is_unified_hierarchy(h))
++		fstype = "cgroup2";
++	else
++		fstype = "cgroup";
++
++	if (can_use_mount_api()) {
++		fd_fs = fs_prepare(fstype, -EBADF, "", 0, 0);
++		if (fd_fs < 0)
++			return log_error_errno(-errno, errno, "Failed to prepare filesystem context for %s", fstype);
++
++		if (!is_unified_hierarchy(h)) {
++			for (const char **it = (const char **)h->controllers; it && *it; it++) {
++				if (strnequal(*it, "name=", STRLITERALLEN("name=")))
++					ret = fs_set_property(fd_fs, "name", *it + STRLITERALLEN("name="));
++				else
++					ret = fs_set_property(fd_fs, *it, "");
++				if (ret < 0)
++					return log_error_errno(-errno, errno, "Failed to add %s controller to cgroup filesystem context %d(dev)", *it, fd_fs);
++			}
++		}
++
++		ret = fs_attach(fd_fs, dfd_mnt_cgroupfs, hierarchy_mnt,
++				PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH,
++				flags);
++	} else {
++		__do_free char *controllers = NULL, *target = NULL;
++		unsigned int old_flags = 0;
++		const char *rootfs_mnt;
++
++		if (!is_unified_hierarchy(h)) {
++			controllers = lxc_string_join(",", (const char **)h->controllers, false);
++			if (!controllers)
++				return ret_errno(ENOMEM);
++		}
++
++		rootfs_mnt = get_rootfs_mnt(rootfs);
++		ret = mnt_attributes_old(flags, &old_flags);
++		if (ret)
++			return log_error_errno(-EINVAL, EINVAL, "Unsupported mount properties specified");
++
++		target = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, hierarchy_mnt, NULL);
++#ifdef HAVE_ISULAD
++		ret = safe_mount(NULL, target, fstype, old_flags, controllers, rootfs_mnt, NULL);
++#else
++		ret = safe_mount(NULL, target, fstype, old_flags, controllers, rootfs_mnt);
++#endif
++	}
++	if (ret < 0)
++		return log_error_errno(ret, errno, "Failed to mount %s filesystem onto %d(%s)",
++				       fstype, dfd_mnt_cgroupfs, maybe_empty(hierarchy_mnt));
++
++	DEBUG("Mounted cgroup filesystem %s onto %d(%s)",
++	      fstype, dfd_mnt_cgroupfs, maybe_empty(hierarchy_mnt));
++	return 0;
++}
++
++static inline int cgroupfs_mount(int cgroup_automount_type, struct hierarchy *h,
++				 struct lxc_rootfs *rootfs,
++				 int dfd_mnt_cgroupfs, const char *hierarchy_mnt)
++{
++	return __cgroupfs_mount(cgroup_automount_type, h, rootfs,
++				dfd_mnt_cgroupfs, hierarchy_mnt);
++}
++
++static inline int cgroupfs_bind_mount(int cgroup_automount_type, struct hierarchy *h,
++				      struct lxc_rootfs *rootfs,
++				      int dfd_mnt_cgroupfs,
++				      const char *hierarchy_mnt)
++{
++	switch (cgroup_automount_type) {
++	case LXC_AUTO_CGROUP_FULL_RO:
++		break;
++	case LXC_AUTO_CGROUP_FULL_RW:
++		break;
++	case LXC_AUTO_CGROUP_FULL_MIXED:
++		break;
++	default:
++		return 0;
++	}
++
++	return __cgroupfs_mount(cgroup_automount_type, h, rootfs,
++				dfd_mnt_cgroupfs, hierarchy_mnt);
++}
++
+ /* __cg_mount_direct
+  *
+  * Mount cgroup hierarchies directly without using bind-mounts. The main
+@@ -1289,139 +1560,300 @@ static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
+ }
+ 
+ __cgfsng_ops static bool isulad_cgfsng_mount(struct cgroup_ops *ops,
+-                                      struct lxc_handler *handler,
+-                                      const char *root, int type)
+-{
+-	int i, ret;
+-	char *tmpfspath = NULL;
+-	char *systemdpath = NULL;
+-	char *unifiedpath = NULL;
+-	bool has_cgns = false, retval = false, wants_force_mount = false;
++				      struct lxc_handler *handler, int cg_flags)
++{
++	__do_close int dfd_mnt_tmpfs = -EBADF, fd_fs = -EBADF;
++	__do_free char *cgroup_root = NULL;
++	int cgroup_automount_type;
++	bool in_cgroup_ns = false, wants_force_mount = false;
++	struct lxc_conf *conf = handler->conf;
++	struct lxc_rootfs *rootfs = &conf->rootfs;
++	const char *rootfs_mnt = get_rootfs_mnt(rootfs);
++	int ret;
++#ifdef HAVE_ISULAD
+ 	char **merged = NULL;
++	__do_free char *systemdpath = NULL;
++	__do_free char *unifiedpath = NULL;
++#endif
++
++	if (!ops)
++		return ret_set_errno(false, ENOENT);
+ 
+-	if ((type & LXC_AUTO_CGROUP_MASK) == 0)
++	if (!ops->hierarchies)
+ 		return true;
+ 
+-	if (type & LXC_AUTO_CGROUP_FORCE) {
+-		type &= ~LXC_AUTO_CGROUP_FORCE;
++	if (!conf)
++		return ret_set_errno(false, EINVAL);
++
++	if ((cg_flags & LXC_AUTO_CGROUP_MASK) == 0)
++		return log_trace(true, "No cgroup mounts requested");
++
++	if (cg_flags & LXC_AUTO_CGROUP_FORCE) {
++		cg_flags &= ~LXC_AUTO_CGROUP_FORCE;
+ 		wants_force_mount = true;
+ 	}
+ 
++	switch (cg_flags) {
++	case LXC_AUTO_CGROUP_RO:
++		TRACE("Read-only cgroup mounts requested");
++		break;
++	case LXC_AUTO_CGROUP_RW:
++		TRACE("Read-write cgroup mounts requested");
++		break;
++	case LXC_AUTO_CGROUP_MIXED:
++		TRACE("Mixed cgroup mounts requested");
++		break;
++	case LXC_AUTO_CGROUP_FULL_RO:
++		TRACE("Full read-only cgroup mounts requested");
++		break;
++	case LXC_AUTO_CGROUP_FULL_RW:
++		TRACE("Full read-write cgroup mounts requested");
++		break;
++	case LXC_AUTO_CGROUP_FULL_MIXED:
++		TRACE("Full mixed cgroup mounts requested");
++		break;
++	case LXC_AUTO_CGROUP2_RW:
++		TRACE("Read-write cgroup2 mount requested");
++		break;
++	case LXC_AUTO_CGROUP2_RO:
++		TRACE("Read-only cgroup2 mount requested");
++		break;
++	default:
++		return log_error_errno(false, EINVAL, "Invalid cgroup mount options specified");
++	}
++	cgroup_automount_type = cg_flags;
++
+ 	if (!wants_force_mount) {
+-		if (!lxc_list_empty(&handler->conf->keepcaps))
+-			wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
+-		else
+-			wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
++		wants_force_mount = !lxc_wants_cap(CAP_SYS_ADMIN, conf);
++
++		/*
++		 * Most recent distro versions currently have init system that
++		 * do support cgroup2 but do not mount it by default unless
++		 * explicitly told so even if the host is cgroup2 only. That
++		 * means they often will fail to boot. Fix this by pre-mounting
++		 * cgroup2 by default. We will likely need to be doing this a
++		 * few years until all distros have switched over to cgroup2 at
++		 * which point we can safely assume that their init systems
++		 * will mount it themselves.
++		 */
++		if (pure_unified_layout(ops))
++			wants_force_mount = true;
+ 	}
+ 
+-	has_cgns = cgns_supported();
+-	if (has_cgns && !wants_force_mount)
+-		return true;
++	if (cgns_supported() && container_uses_namespace(handler, CLONE_NEWCGROUP))
++		in_cgroup_ns = true;
+ 
+-	if (type == LXC_AUTO_CGROUP_NOSPEC)
+-		type = LXC_AUTO_CGROUP_MIXED;
+-	else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
+-		type = LXC_AUTO_CGROUP_FULL_MIXED;
++	if (in_cgroup_ns && !wants_force_mount)
++		return log_trace(true, "Mounting cgroups not requested or needed");
+ 
+-	/* Mount tmpfs */
+-	tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
+-	if (mkdir_p(tmpfspath, 0755) < 0) {
+-		ERROR("Failed to create directory: %s", tmpfspath);
+-		goto on_error;
++	/* This is really the codepath that we want. */
++	if (pure_unified_layout(ops) ||
++	    (cgroup_automount_type == LXC_AUTO_CGROUP2_RW) ||
++	    (cgroup_automount_type == LXC_AUTO_CGROUP2_RO)) {
++		__do_close int dfd_mnt_unified = -EBADF;
++
++		if (!ops->unified)
++			return log_error_errno(false, EINVAL, "No unified cgroup hierarchy mounted on the host");
++
++		dfd_mnt_unified = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
++					  PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
++		if (dfd_mnt_unified < 0)
++			return syserror_ret(false, "Failed to open %d(%s)",
++					    rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
++		/*
++		 * If cgroup namespaces are supported but the container will
++		 * not have CAP_SYS_ADMIN after it has started we need to mount
++		 * the cgroups manually.
++		 *
++		 * Note that here we know that wants_force_mount is true.
++		 * Otherwise we would've returned early above.
++		 */
++		if (in_cgroup_ns) {
++			/*
++			 *  1. cgroup:rw:force    -> Mount the cgroup2 filesystem.
++			 *  2. cgroup:ro:force    -> Mount the cgroup2 filesystem read-only.
++			 *  3. cgroup:mixed:force -> See comment above how this
++			 *                           does not apply so
++			 *                           cgroup:mixed is equal to
++			 *                           cgroup:rw when cgroup
++			 *                           namespaces are supported.
++
++			 *  4. cgroup:rw    -> No-op; init system responsible for mounting.
++			 *  5. cgroup:ro    -> No-op; init system responsible for mounting.
++			 *  6. cgroup:mixed -> No-op; init system responsible for mounting.
++                         *
++			 *  7. cgroup-full:rw    -> Not supported.
++			 *  8. cgroup-full:ro    -> Not supported.
++			 *  9. cgroup-full:mixed -> Not supported.
++
++			 * 10. cgroup-full:rw:force    -> Not supported.
++			 * 11. cgroup-full:ro:force    -> Not supported.
++			 * 12. cgroup-full:mixed:force -> Not supported.
++			 *
++			 * 13. cgroup2		-> No-op; init system responsible for mounting.
++			 * 14. cgroup2:ro	-> No-op; init system responsible for mounting.
++			 * 15. cgroup2:force	-> Mount the cgroup2 filesystem read-write
++			 * 16. cgroup2:ro:force	-> Mount the cgroup2 filesystem read-only
++			 */
++			ret = cgroupfs_mount(cgroup_automount_type, ops->unified, rootfs, dfd_mnt_unified, "");
++			if (ret < 0)
++				return syserror_ret(false, "Failed to force mount cgroup filesystem in cgroup namespace");
++
++			return log_trace(true, "Force mounted cgroup filesystem in new cgroup namespace");
++		} else {
++			/*
++			 * Either no cgroup namespace supported (highly
++			 * unlikely unless we're dealing with a Frankenkernel.
++			 * Or the user requested to keep the cgroup namespace
++			 * of the host or another container.
++			 */
++			errno = EOPNOTSUPP;
++			if (wants_force_mount)
++				SYSWARN("Force-mounting the unified cgroup hierarchy without cgroup namespace support is currently not supported");
++			else
++				SYSWARN("Mounting the unified cgroup hierarchy without cgroup namespace support is currently not supported");
++		}
++
++		return syserror_ret(false, "Failed to mount cgroups");
+ 	}
+ 
+-        if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
+-                if (has_cgns && wants_force_mount) {
+-                        /*
+-                         * If cgroup namespaces are supported but the container
+-                         * will not have CAP_SYS_ADMIN after it has started we
+-                         * need to mount the cgroups manually.
+-                         */
+-                        return cg_mount_in_cgroup_namespace(type, ops->unified, tmpfspath) == 0;
+-                }
++	/*
++	 * Mount a tmpfs over DEFAULT_CGROUP_MOUNTPOINT. Note that we're
++	 * relying on RESOLVE_BENEATH so we need to skip the leading "/" in the
++	 * DEFAULT_CGROUP_MOUNTPOINT define.
++	 */
++	if (can_use_mount_api()) {
++		fd_fs = fs_prepare("tmpfs", -EBADF, "", 0, 0);
++		if (fd_fs < 0)
++			return log_error_errno(false, errno, "Failed to create new filesystem context for tmpfs");
+ 
+-                return cg_mount_cgroup_full(type, ops->unified, tmpfspath) == 0;
+-        }
++		ret = fs_set_property(fd_fs, "mode", "0755");
++		if (ret < 0)
++			return log_error_errno(false, errno, "Failed to mount tmpfs onto %d(dev)", fd_fs);
++
++		ret = fs_set_property(fd_fs, "size", "10240k");
++		if (ret < 0)
++			return log_error_errno(false, errno, "Failed to mount tmpfs onto %d(dev)", fd_fs);
+ 
+-	ret = safe_mount(NULL, tmpfspath, "tmpfs",
+-	                 MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
+-	                 "size=10240k,mode=755", root, handler->conf->lsm_se_mount_context);
++		ret = fs_attach(fd_fs, rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
++				PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV,
++				MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV |
++				MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME);
++	} else {
++		cgroup_root = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, NULL);
++		ret = safe_mount(NULL, cgroup_root, "tmpfs",
++				 MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
++				 "size=10240k,mode=755", rootfs_mnt, handler->conf->rootfs.lsm_se_mount_context);
++	}
+ 	if (ret < 0)
+-		goto on_error;
++		return log_error_errno(false, errno, "Failed to mount tmpfs on %s",
++				       DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
+ 
+-	for (i = 0; ops->hierarchies[i]; i++) {
+-		char *controllerpath = NULL;
+-		char *path2 = NULL;
+-		struct hierarchy *h = ops->hierarchies[i];
+-		char *controller = strrchr(h->at_mnt, '/');
++	dfd_mnt_tmpfs = open_at(rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE,
++				PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_BENEATH_XDEV, 0);
++	if (dfd_mnt_tmpfs < 0)
++		return syserror_ret(false, "Failed to open %d(%s)",
++				    rootfs->dfd_mnt, DEFAULT_CGROUP_MOUNTPOINT_RELATIVE);
+ 
+-		if (!controller)
+-			continue;
+-		controller++;
++	for (int i = 0; ops->hierarchies[i]; i++) {
++		__do_free char *hierarchy_mnt = NULL, *path2 = NULL;
++		struct hierarchy *h = ops->hierarchies[i];
+ 
++#ifdef HAVE_ISULAD
+ 		// isulad: symlink subcgroup
+-		if (strchr(controller, ',') != NULL) {
++		if (strchr(h->at_mnt, ',') != NULL) {
+ 			int pret;
+-			pret = lxc_append_string(&merged, controller);
++			pret = lxc_append_string(&merged, h->at_mnt);
+ 			if (pret < 0)
+-				goto on_error;
+-		}
+-
+-		controllerpath = must_make_path(tmpfspath, controller, NULL);
+-		if (dir_exists(controllerpath)) {
+-			free(controllerpath);
+-			continue;
++				return false;
+ 		}
++#endif
+ 
+-		ret = mkdir(controllerpath, 0755);
++		ret = mkdirat(dfd_mnt_tmpfs, h->at_mnt, 0000);
++#ifdef HAVE_ISULAD
+ 		if (ret < 0) {
+-			SYSERROR("Error creating cgroup path: %s", controllerpath);
+-			free(controllerpath);
+-			goto on_error;
++			lxc_free_array((void **)merged, free);
++			return syserror_ret(false, "Failed to create cgroup at_mnt %d(%s)", dfd_mnt_tmpfs, h->at_mnt);
+ 		}
++#else
++		if (ret < 0)
++			return syserror_ret(false, "Failed to create cgroup at_mnt %d(%s)", dfd_mnt_tmpfs, h->at_mnt);
++#endif
+ 
+-		if (has_cgns && wants_force_mount) {
+-			/* If cgroup namespaces are supported but the container
++		if (in_cgroup_ns && wants_force_mount) {
++			/*
++			 * If cgroup namespaces are supported but the container
+ 			 * will not have CAP_SYS_ADMIN after it has started we
+ 			 * need to mount the cgroups manually.
+ 			 */
+-			ret = cg_mount_in_cgroup_namespace(type, h, controllerpath);
+-			free(controllerpath);
++			ret = cgroupfs_mount(cgroup_automount_type, h, rootfs,
++					     dfd_mnt_tmpfs, h->at_mnt);
++#ifdef HAVE_ISULAD
++			if (ret < 0) {
++				lxc_free_array((void **)merged, free);
++				return false;
++			}
++#else
+ 			if (ret < 0)
+-				goto on_error;
+-
++				return false;
++#endif
+ 			continue;
+ 		}
+ 
+-		ret = cg_mount_cgroup_full(type, h, controllerpath);
++		/* Here is where the ancient kernel section begins. */
++		ret = cgroupfs_bind_mount(cgroup_automount_type, h, rootfs,
++					  dfd_mnt_tmpfs, h->at_mnt);
++#ifdef HAVE_ISULAD
+ 		if (ret < 0) {
+-			free(controllerpath);
+-			goto on_error;
++			lxc_free_array((void **)merged, free);
++			return false;
+ 		}
++#else
++		if (ret < 0)
++			return false;
++#endif
+ 
+-		if (!cg_mount_needs_subdirs(type)) {
+-			free(controllerpath);
++		if (!cg_mount_needs_subdirs(cgroup_automount_type))
+ 			continue;
+-		}
+ 
++		if (!cgroup_root)
++			cgroup_root = must_make_path(rootfs_mnt, DEFAULT_CGROUP_MOUNTPOINT, NULL);
++
++		hierarchy_mnt = must_make_path(cgroup_root, h->at_mnt, NULL);
++#ifdef HAVE_ISULAD
+ 		// isulad: ignore ops->container_cgroup so we will not see directory lxc after /sys/fs/cgroup/xxx in container,
+-		// isulad: ignore h->at_base so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container
+-		path2 = must_make_path(controllerpath, NULL);
++		// isulad: ignore h->container_base_path so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container
++		path2 = must_make_path(h->at_mnt, NULL);
++#else
++		path2 = must_make_path(hierarchy_mnt, h->at_base,
++				       ops->container_cgroup, NULL);
++#endif
+ 		ret = mkdir_p(path2, 0755);
+-		if (ret < 0) {
+-			free(controllerpath);
+-			free(path2);
+-			goto on_error;
++#ifdef HAVE_ISULAD
++		if (ret < 0 && (errno != EEXIST)) {
++			lxc_free_array((void **)merged, free);
++			return false;
+ 		}
++#else
++		if (ret < 0 && (errno != EEXIST))
++			return false;
++#endif
+ 
+-		ret = cg_legacy_mount_controllers(type, h, controllerpath,
+-		                                  path2, ops->container_cgroup);
+-		free(controllerpath);
+-		free(path2);
++		ret = cg_legacy_mount_controllers(cgroup_automount_type, h,
++						  hierarchy_mnt, path2,
++						  ops->container_cgroup);
++#ifdef HAVE_ISULAD
++		if (ret < 0) {
++			lxc_free_array((void **)merged, free);
++			return false;
++		}
++#else
+ 		if (ret < 0)
+-			goto on_error;
++			return false;
++#endif
+ 	}
+ 
++#ifdef HAVE_ISULAD
+ 	// isulad: symlink subcgroup
+ 	if (merged) {
+ 		char **mc = NULL;
+@@ -1431,13 +1863,14 @@ __cgfsng_ops static bool isulad_cgfsng_mount(struct cgroup_ops *ops,
+ 			lxc_iterate_parts(token, copy, ",") {
+ 				int mret;
+ 				char *link;
+-				link = must_make_path(tmpfspath, token, NULL);
++				link = must_make_path(cgroup_root, token, NULL);
+ 				mret = symlink(*mc, link);
+ 				if (mret < 0 && errno != EEXIST) {
+ 					SYSERROR("Failed to create link %s for target %s", link, *mc);
+ 					free(copy);
+ 					free(link);
+-					goto on_error;
++					lxc_free_array((void **)merged, free);
++					return false;
+ 				}
+ 				free(link);
+ 			}
+@@ -1445,59 +1878,49 @@ __cgfsng_ops static bool isulad_cgfsng_mount(struct cgroup_ops *ops,
+ 		}
+ 	}
+ 
+-
+ 	// isulad: remount /sys/fs/cgroup to readonly
+-	if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_RO) {
+-		ret = mount(tmpfspath, tmpfspath, "bind",
++	if (cg_flags == LXC_AUTO_CGROUP_FULL_RO || cg_flags == LXC_AUTO_CGROUP_RO) {
++		ret = mount(cgroup_root, cgroup_root, "bind",
+ 		            MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_RDONLY|MS_BIND|MS_REMOUNT, NULL);
+ 		if (ret < 0) {
+ 			SYSERROR("Failed to remount /sys/fs/cgroup.");
+-			goto on_error;
++			lxc_free_array((void **)merged, free);
++			return false;
+ 		}
+ 	}
+ 
+ 	// isulad: remount /sys/fs/cgroup/systemd to readwrite for system container
+ 	if (handler->conf->systemd != NULL && strcmp(handler->conf->systemd, "true") == 0)
+ 	{
+-		unifiedpath = must_make_path(root, "/sys/fs/cgroup/unified", NULL);
++		unifiedpath = must_make_path(get_rootfs_mnt(rootfs), "/sys/fs/cgroup/unified", NULL);
+ 		if (dir_exists(unifiedpath))
+ 		{
+ 			ret = umount2(unifiedpath, MNT_DETACH);
+ 			if (ret < 0)
+ 			{
+ 				SYSERROR("Failed to umount /sys/fs/cgroup/unified.");
+-				goto on_error;
++				lxc_free_array((void **)merged, free);
++				return false;
+ 			}
+ 		}
+ 
+-		systemdpath = must_make_path(root, "/sys/fs/cgroup/systemd", NULL);
++		systemdpath = must_make_path(get_rootfs_mnt(rootfs), "/sys/fs/cgroup/systemd", NULL);
+ 		ret = mount(systemdpath, systemdpath, "bind",
+ 					MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME | MS_BIND | MS_REMOUNT, NULL);
+ 		if (ret < 0)
+ 		{
+ 			SYSERROR("Failed to remount /sys/fs/cgroup/systemd.");
+-			goto on_error;
++			lxc_free_array((void **)merged, free);
++			return false;
+ 		}
+ 	}
++#endif
+ 
+-	retval = true;
+-
+-on_error:
+-	free(tmpfspath);
+-	if (systemdpath != NULL)
+-	{
+-		free(systemdpath);
+-	}
+-	if (unifiedpath != NULL)
+-	{
+-		free(unifiedpath);
+-	}
+-	lxc_free_array((void **)merged, free);
+-	return retval;
++	return true;
+ }
+ 
+ /* Only root needs to escape to the cgroup of its init. */
+-__cgfsng_ops static bool isulad_cgfsng_escape(const struct cgroup_ops *ops,
++__cgfsng_ops static bool isulad_cgfsng_criu_escape(const struct cgroup_ops *ops,
+ 				       struct lxc_conf *conf)
+ {
+ 	if (!ops)
+@@ -1528,7 +1951,7 @@ __cgfsng_ops static bool isulad_cgfsng_escape(const struct cgroup_ops *ops,
+ 	return true;
+ }
+ 
+-__cgfsng_ops static int isulad_cgfsng_num_hierarchies(struct cgroup_ops *ops)
++__cgfsng_ops static int isulad_cgfsng_criu_num_hierarchies(struct cgroup_ops *ops)
+ {
+ 	int i = 0;
+ 
+@@ -1544,7 +1967,7 @@ __cgfsng_ops static int isulad_cgfsng_num_hierarchies(struct cgroup_ops *ops)
+ 	return i;
+ }
+ 
+-__cgfsng_ops static bool isulad_cgfsng_get_hierarchies(struct cgroup_ops *ops, int n,
++__cgfsng_ops static bool isulad_cgfsng_criu_get_hierarchies(struct cgroup_ops *ops, int n,
+ 						char ***out)
+ {
+ 	int i;
+@@ -1578,7 +2001,7 @@ static bool cg_legacy_freeze(struct cgroup_ops *ops)
+ }
+ 
+ static int freezer_cgroup_events_cb(int fd, uint32_t events, void *cbdata,
+-				    struct lxc_epoll_descr *descr)
++				    struct lxc_async_descr *descr)
+ {
+ 	__do_close int duped_fd = -EBADF;
+ 	__do_free char *line = NULL;
+@@ -1614,9 +2037,9 @@ static int freezer_cgroup_events_cb(int fd, uint32_t events, void *cbdata,
+ static int cg_unified_freeze(struct cgroup_ops *ops, int timeout)
+ {
+ 	__do_close int fd = -EBADF;
+-	call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL;
++	call_cleaner(lxc_mainloop_close) struct lxc_async_descr *descr_ptr = NULL;
+ 	int ret;
+-	struct lxc_epoll_descr descr;
++	struct lxc_async_descr descr;
+ 	struct hierarchy *h;
+ 
+ 	h = ops->unified;
+@@ -1641,7 +2064,8 @@ static int cg_unified_freeze(struct cgroup_ops *ops, int timeout)
+ 		/* automatically cleaned up now */
+ 		descr_ptr = &descr;
+ 
+-		ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, INT_TO_PTR((int){1}));
++		ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, default_cleanup_handler,
++									   INT_TO_PTR((int){1}), "freezer_cgroup_events");
+ 		if (ret < 0)
+ 			return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
+ 	}
+@@ -1682,9 +2106,9 @@ static int cg_legacy_unfreeze(struct cgroup_ops *ops)
+ static int cg_unified_unfreeze(struct cgroup_ops *ops, int timeout)
+ {
+ 	__do_close int fd = -EBADF;
+-	call_cleaner(lxc_mainloop_close)struct lxc_epoll_descr *descr_ptr = NULL;
++	call_cleaner(lxc_mainloop_close)struct lxc_async_descr *descr_ptr = NULL;
+ 	int ret;
+-	struct lxc_epoll_descr descr;
++	struct lxc_async_descr descr;
+ 	struct hierarchy *h;
+ 
+ 	h = ops->unified;
+@@ -1709,7 +2133,8 @@ static int cg_unified_unfreeze(struct cgroup_ops *ops, int timeout)
+ 		/* automatically cleaned up now */
+ 		descr_ptr = &descr;
+ 
+-		ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, INT_TO_PTR((int){0}));
++		ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, default_cleanup_handler,
++									   INT_TO_PTR((int){0}), "freezer_cgroup_events");
+ 		if (ret < 0)
+ 			return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
+ 	}
+@@ -1816,7 +2241,7 @@ static int cgroup_attach_leaf(const struct lxc_conf *conf, int unified_fd, pid_t
+ 		 * that a short write would cause a buffer overrun. So be on
+ 		 * the safe side.
+ 		 */
+-		if (ret < STRLITERALLEN(".lxc-/cgroup.procs"))
++		if ((size_t)ret < STRLITERALLEN(".lxc-/cgroup.procs"))
+ 			return log_error_errno(-EINVAL, EINVAL, "Unexpected short write would cause buffer-overrun");
+ 
+ 		slash = &attach_cgroup[ret] - STRLITERALLEN("/cgroup.procs");
+@@ -1848,7 +2273,7 @@ static int cgroup_attach_leaf(const struct lxc_conf *conf, int unified_fd, pid_t
+ }
+ 
+ static int cgroup_attach_create_leaf(const struct lxc_conf *conf,
+-				     int unified_fd, int *sk_fd)
++				     int unified_fd, int *sk_fd, bool unprivileged)
+ {
+ 	__do_close int sk = *sk_fd, target_fd0 = -EBADF, target_fd1 = -EBADF;
+ 	int target_fds[2];
+@@ -1857,73 +2282,116 @@ static int cgroup_attach_create_leaf(const struct lxc_conf *conf,
+ 	/* Create leaf cgroup. */
+ 	ret = mkdirat(unified_fd, ".lxc", 0755);
+ 	if (ret < 0 && errno != EEXIST)
+-		return log_error_errno(-1, errno, "Failed to create leaf cgroup \".lxc\"");
++		return syserror("Failed to create leaf cgroup \".lxc\"");
+ 
+-	target_fd0 = openat(unified_fd, ".lxc/cgroup.procs", O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
+-	if (target_fd0 < 0)
+-		return log_error_errno(-errno, errno, "Failed to open \".lxc/cgroup.procs\"");
+-	target_fds[0] = target_fd0;
++	if (unprivileged) {
++		target_fd0 = open_at(unified_fd, ".lxc/cgroup.procs", PROTECT_OPEN_W, PROTECT_LOOKUP_BENEATH, 0);
++		if (target_fd0 < 0)
++			return syserror("Failed to open \".lxc/cgroup.procs\"");
++		target_fds[0] = target_fd0;
+ 
+-	target_fd1 = openat(unified_fd, "cgroup.procs", O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
+-	if (target_fd1 < 0)
+-		return log_error_errno(-errno, errno, "Failed to open \".lxc/cgroup.procs\"");
+-	target_fds[1] = target_fd1;
++		target_fd1 = open_at(unified_fd, "cgroup.procs", PROTECT_OPEN_W, PROTECT_LOOKUP_BENEATH, 0);
++		if (target_fd1 < 0)
++			return syserror("Failed to open \".lxc/cgroup.procs\"");
++		target_fds[1] = target_fd1;
+ 
+-	ret = lxc_abstract_unix_send_fds(sk, target_fds, 2, NULL, 0);
+-	if (ret <= 0)
+-		return log_error_errno(-errno, errno, "Failed to send \".lxc/cgroup.procs\" fds %d and %d",
+-				       target_fd0, target_fd1);
++		ret = lxc_abstract_unix_send_fds(sk, target_fds, 2, NULL, 0);
++		if (ret <= 0)
++			return syserror("Failed to send \".lxc/cgroup.procs\" fds %d and %d",
++					target_fd0, target_fd1);
+ 
+-	return log_debug(0, "Sent target cgroup fds %d and %d", target_fd0, target_fd1);
++		TRACE("Sent cgroup file descriptors %d and %d", target_fd0, target_fd1);
++	} else {
++		ret = lxc_abstract_unix_send_credential(sk, NULL, 0);
++		if (ret < 0)
++			return syserror("Failed to inform parent that we are done setting up mounts");
++
++		TRACE("Informed parent process that cgroup has been created");
++	}
++
++	return 0;
+ }
+ 
+ static int cgroup_attach_move_into_leaf(const struct lxc_conf *conf,
+-					int *sk_fd, pid_t pid)
++					const char *lxcpath,
++					int unified_fd, int *sk_fd, pid_t pid,
++					bool unprivileged)
+ {
+ 	__do_close int sk = *sk_fd, target_fd0 = -EBADF, target_fd1 = -EBADF;
+-	int target_fds[2];
+ 	char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
+ 	size_t pidstr_len;
++#if HAVE_LIBSYSTEMD
++	__do_free char *scope = NULL;
++#endif
+ 	ssize_t ret;
+ 
+-	ret = lxc_abstract_unix_recv_fds(sk, target_fds, 2, NULL, 0);
+-	if (ret <= 0)
+-		return log_error_errno(-1, errno, "Failed to receive target cgroup fd");
+-	target_fd0 = target_fds[0];
+-	target_fd1 = target_fds[1];
++#if HAVE_LIBSYSTEMD
++	scope = lxc_cmd_get_systemd_scope(conf->name, lxcpath);
++	if (scope) {
++		TRACE("%s:%s is running under systemd-created scope '%s'.  Attaching...", lxcpath, conf->name, scope);
++		if (enter_scope(scope, pid))
++			TRACE("Successfully entered scope '%s'", scope);
++		else
++			ERROR("Failed entering scope '%s'", scope);
++	} else {
++		TRACE("%s:%s is not running under a systemd-created scope", lxcpath, conf->name);
++	}
++#endif
++	if (unprivileged) {
++		ret = lxc_abstract_unix_recv_two_fds(sk, &target_fd0, &target_fd1);
++		if (ret < 0)
++			return log_error_errno(-1, errno, "Failed to receive target cgroup fd");
++	} else {
++		ret = lxc_abstract_unix_rcv_credential(sk, NULL, 0);
++		if (ret < 0)
++			return syserror("Failed to receive notification from parent process");
++
++		TRACE("Child process informed us that cgroup has been created");
++
++		target_fd0 = open_at(unified_fd, ".lxc/cgroup.procs", PROTECT_OPEN_W, PROTECT_LOOKUP_BENEATH, 0);
++		if (target_fd0 < 0)
++			return syserror("Failed to open \".lxc/cgroup.procs\"");
++
++		target_fd1 = open_at(unified_fd, "cgroup.procs", PROTECT_OPEN_W, PROTECT_LOOKUP_BENEATH, 0);
++		if (target_fd1 < 0)
++			return syserror("Failed to open \".lxc/cgroup.procs\"");
++
++		TRACE("Opened target cgroup file descriptors %d and %d", target_fd0, target_fd1);
++	}
+ 
+ 	pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid);
+ 
+ 	ret = lxc_write_nointr(target_fd0, pidstr, pidstr_len);
+-	if (ret > 0 && ret == pidstr_len)
++	if (ret > 0 && (size_t)ret == pidstr_len)
+ 		return log_debug(0, "Moved process into target cgroup via fd %d", target_fd0);
+ 
+ 	ret = lxc_write_nointr(target_fd1, pidstr, pidstr_len);
+-	if (ret > 0 && ret == pidstr_len)
++	if (ret > 0 && (size_t)ret == pidstr_len)
+ 		return log_debug(0, "Moved process into target cgroup via fd %d", target_fd1);
+ 
+-	return log_debug_errno(-1, errno, "Failed to move process into target cgroup via fd %d and %d",
+-			       target_fd0, target_fd1);
++	return syserror("Failed to move process into target cgroup via fd %d and %d", target_fd0, target_fd1);
+ }
+ 
+ struct userns_exec_unified_attach_data {
+ 	const struct lxc_conf *conf;
++	const char *lxcpath;
+ 	int unified_fd;
+ 	int sk_pair[2];
+ 	pid_t pid;
++	bool unprivileged;
+ };
+ 
+ static int cgroup_unified_attach_child_wrapper(void *data)
+ {
+ 	struct userns_exec_unified_attach_data *args = data;
+ 
+-	if (!args->conf || args->unified_fd < 0 || args->pid <= 0 ||
+-	    args->sk_pair[0] < 0 || args->sk_pair[1] < 0)
++	if (!args->conf || !args->lxcpath || args->unified_fd < 0 ||
++	    args->pid <= 0 || args->sk_pair[0] < 0 || args->sk_pair[1] < 0)
+ 		return ret_errno(EINVAL);
+ 
+ 	close_prot_errno_disarm(args->sk_pair[0]);
+ 	return cgroup_attach_create_leaf(args->conf, args->unified_fd,
+-					 &args->sk_pair[1]);
++					 &args->sk_pair[1], args->unprivileged);
+ }
+ 
+ static int cgroup_unified_attach_parent_wrapper(void *data)
+@@ -1935,44 +2403,10 @@ static int cgroup_unified_attach_parent_wrapper(void *data)
+ 		return ret_errno(EINVAL);
+ 
+ 	close_prot_errno_disarm(args->sk_pair[1]);
+-	return cgroup_attach_move_into_leaf(args->conf, &args->sk_pair[0],
+-					    args->pid);
+-}
+-
+-int cgroup_attach(const struct lxc_conf *conf, const char *name,
+-		  const char *lxcpath, pid_t pid)
+-{
+-	__do_close int unified_fd = -EBADF;
+-	int ret;
+-
+-	if (!conf || !name || !lxcpath || pid <= 0)
+-		return ret_errno(EINVAL);
+-
+-	unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath);
+-	if (unified_fd < 0)
+-		return ret_errno(EBADF);
+-
+-	if (!lxc_list_empty(&conf->id_map)) {
+-		struct userns_exec_unified_attach_data args = {
+-			.conf		= conf,
+-			.unified_fd	= unified_fd,
+-			.pid		= pid,
+-		};
+-
+-		ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
+-		if (ret < 0)
+-			return -errno;
+-
+-		ret = userns_exec_minimal(conf,
+-					  cgroup_unified_attach_parent_wrapper,
+-					  &args,
+-					  cgroup_unified_attach_child_wrapper,
+-					  &args);
+-	} else {
+-		ret = cgroup_attach_leaf(conf, unified_fd, pid);
+-	}
+-
+-	return ret;
++	return cgroup_attach_move_into_leaf(args->conf, args->lxcpath,
++					    args->unified_fd,
++					    &args->sk_pair[0], args->pid,
++					    args->unprivileged);
+ }
+ 
+ /* Technically, we're always at a delegation boundary here (This is especially
+@@ -1999,7 +2433,8 @@ static int __cg_unified_attach(const struct hierarchy *h,
+ 	ret = cgroup_attach(conf, name, lxcpath, pid);
+ 	if (ret == 0)
+ 		return log_trace(0, "Attached to unified cgroup via command handler");
+-	if (ret != -EBADF)
++	TRACE("__cg_unified_attach: cgroup_attach returned %d", ret);
++	if (!ERRNO_IS_NOT_SUPPORTED(ret) && ret != -ENOCGROUP2)
+ 		return log_error_errno(ret, errno, "Failed to attach to unified cgroup");
+ 
+ 	/* Fall back to retrieving the path for the unified cgroup. */
+@@ -2007,18 +2442,21 @@ static int __cg_unified_attach(const struct hierarchy *h,
+ 	/* not running */
+ 	if (!cgroup)
+ 		return 0;
++	TRACE("lxc_cmd_get_cgroup_path returned %s", cgroup);
+ 
+-	path = must_make_path(h->at_mnt, cgroup, NULL);
++	path = make_cgroup_path(h, cgroup, NULL);
+ 
+ 	unified_fd = open(path, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ 	if (unified_fd < 0)
+ 		return ret_errno(EBADF);
+ 
+-	if (!lxc_list_empty(&conf->id_map)) {
++	if (!list_empty(&conf->id_map)) {
+ 		struct userns_exec_unified_attach_data args = {
+ 			.conf		= conf,
+ 			.unified_fd	= unified_fd,
+ 			.pid		= pid,
++			.unprivileged	= am_guest_unpriv(),
++			.lxcpath	= lxcpath,
+ 		};
+ 
+ 		ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
+@@ -2152,32 +2590,26 @@ static int device_cgroup_parse_access(struct device_item *device, const char *va
+ 	return 0;
+ }
+ 
+-int device_cgroup_rule_parse(struct device_item *device, const char *key,
++static int device_cgroup_rule_parse(struct device_item *device, const char *key,
+ 				    const char *val)
+ {
+-	int count, ret;
++	size_t count;
++	int ret;
+ 	char temp[50];
+ 
+-	if (strcmp("devices.allow", key) == 0)
+-		device->allow = 1;
++	if (strequal("devices.allow", key))
++		device->allow = 1; /* allow the device */
+ 	else
+-		device->allow = 0;
++		device->allow = 0; /* deny the device */
+ 
+-	if (strcmp(val, "a") == 0) {
++	if (strequal(val, "a")) {
+ 		/* global rule */
+ 		device->type = 'a';
+ 		device->major = -1;
+ 		device->minor = -1;
+-		device->global_rule = device->allow
+-					  ? LXC_BPF_DEVICE_CGROUP_BLACKLIST
+-					  : LXC_BPF_DEVICE_CGROUP_WHITELIST;
+-		device->allow = -1;
+ 		return 0;
+ 	}
+ 
+-	/* local rule */
+-	device->global_rule = LXC_BPF_DEVICE_CGROUP_LOCAL_RULE;
+-
+ 	switch (*val) {
+ 	case 'a':
+ 		__fallthrough;
+@@ -2300,7 +2732,9 @@ static int device_cgroup_rule_parse_devpath(struct device_item *device,
+ 	char *p;
+ 	struct stat sb;
+ 
+-	path = must_copy_string(devpath);
++	path = strdup(devpath);
++	if (!path)
++		return ret_errno(ENOMEM);
+ 
+ 	/*
+ 	 * Read path followed by mode. Ignore any trailing text.
+@@ -2329,9 +2763,6 @@ static int device_cgroup_rule_parse_devpath(struct device_item *device,
+ 	if (device_cgroup_parse_access(device, mode) < 0)
+ 		return -1;
+ 
+-	if (n_parts == 1)
+-		return ret_set_errno(-1, EINVAL);
+-
+ 	ret = stat(path, &sb);
+ 	if (ret < 0)
+ 		return ret_set_errno(-1, errno);
+@@ -2351,7 +2782,6 @@ static int device_cgroup_rule_parse_devpath(struct device_item *device,
+ 	device->major = MAJOR(sb.st_rdev);
+ 	device->minor = MINOR(sb.st_rdev);
+ 	device->allow = 1;
+-	device->global_rule = LXC_BPF_DEVICE_CGROUP_LOCAL_RULE;
+ 
+ 	return 0;
+ }
+@@ -2481,15 +2911,38 @@ retry:
+ 	return ret;
+ }
+ 
++/*
++ * Return the list of cgroup_settings sorted according to the following rules
++ * 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
++ */
++static void sort_cgroup_settings(struct lxc_conf *conf)
++{
++	LIST_HEAD(memsw_list);
++	struct lxc_cgroup *cgroup, *ncgroup;
++
++	/* Iterate over the cgroup settings and copy them to the output list. */
++	list_for_each_entry_safe(cgroup, ncgroup, &conf->cgroup, head) {
++		if (!strequal(cgroup->subsystem, "memory.memsw.limit_in_bytes"))
++			continue;
++
++		/* Move the memsw entry from the cgroup settings list. */
++		list_move_tail(&cgroup->head, &memsw_list);
++	}
++
++	/*
++	 * Append all the memsw entries to the end of the cgroup settings list
++	 * to make sure they are applied after all memory limit settings.
++	 */
++	list_splice_tail(&memsw_list, &conf->cgroup);
++
++}
++
+ __cgfsng_ops static bool isulad_cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
+ 						    struct lxc_conf *conf,
+ 						    bool do_devices)
+ {
+-	__do_free struct lxc_list *sorted_cgroup_settings = NULL;
+-	struct lxc_list *cgroup_settings = &conf->cgroup;
+-	struct lxc_list *iterator, *next;
+-	struct lxc_cgroup *cg;
+-	bool ret = false;
++	struct list_head *cgroup_settings;
++	struct lxc_cgroup *cgroup;
+ 	char value[21 + 1] = { 0 };
+ 	long long int readvalue, setvalue;
+ 
+@@ -2500,7 +2953,7 @@ __cgfsng_ops static bool isulad_cgfsng_setup_limits_legacy(struct cgroup_ops *op
+ 		return ret_set_errno(false, EINVAL);
+ 
+ 	cgroup_settings = &conf->cgroup;
+-	if (lxc_list_empty(cgroup_settings))
++	if (list_empty(cgroup_settings))
+ 		return true;
+ 
+ 	if (!ops->hierarchies)
+@@ -2509,75 +2962,63 @@ __cgfsng_ops static bool isulad_cgfsng_setup_limits_legacy(struct cgroup_ops *op
+ 	if (pure_unified_layout(ops))
+ 		return true;
+ 
+-	sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
+-	if (!sorted_cgroup_settings)
+-		return false;
+-
+-	lxc_list_for_each(iterator, sorted_cgroup_settings) {
+-		cg = iterator->elem;
+-
+-		if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
+-			const char *cgvalue = cg->value;
+-			if (strcmp(cg->subsystem, "files.limit") == 0) {
++	sort_cgroup_settings(conf);
++	list_for_each_entry(cgroup, cgroup_settings, head) {
++		if (do_devices == strnequal("devices", cgroup->subsystem, 7)) {
++			const char *cgvalue = cgroup->value;
++			if (strcmp(cgroup->subsystem, "files.limit") == 0) {
+ 				if (lxc_safe_long_long(cgvalue, &setvalue) != 0) {
+ 					SYSERROR("Invalid integer value %s", cgvalue);
+-					goto out;
++					return false;
+ 				}
+ 				if (setvalue <= 0) {
+ 					cgvalue = "max";
+ 				}
+ 			}
+-			if (isulad_cg_legacy_set_data(ops, cg->subsystem, cgvalue)) {
++			if (isulad_cg_legacy_set_data(ops, cgroup->subsystem, cgvalue)) {
+ 				if (do_devices && (errno == EACCES || errno == EPERM)) {
+-					SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue);
++					SYSWARN("Failed to set \"%s\" to \"%s\"", cgroup->subsystem, cgvalue);
+ 					continue;
+ 				}
+-				SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue);
+-				goto out;
++				SYSERROR("Failed to set \"%s\" to \"%s\"", cgroup->subsystem, cgvalue);
++				return false;
+ 			}
+-			DEBUG("Set controller \"%s\" set to \"%s\"", cg->subsystem, cgvalue);
++			DEBUG("Set controller \"%s\" set to \"%s\"", cgroup->subsystem, cgvalue);
+ 		}
+ 
+ 		// isulad: check cpu shares
+-		if (strcmp(cg->subsystem, "cpu.shares") == 0) {
+-			if (isulad_cg_legacy_get_data(ops, cg->subsystem, value, sizeof(value) - 1) < 0) {
+-				SYSERROR("Error get %s", cg->subsystem);
+-				goto out;
++		if (strcmp(cgroup->subsystem, "cpu.shares") == 0) {
++			if (isulad_cg_legacy_get_data(ops, cgroup->subsystem, value, sizeof(value) - 1) < 0) {
++				SYSERROR("Error get %s", cgroup->subsystem);
++				return false;
+ 			}
+ 			trim(value);
+-			if (lxc_safe_long_long(cg->value, &setvalue) != 0) {
+-				SYSERROR("Invalid value %s", cg->value);
+-				goto out;
++			if (lxc_safe_long_long(cgroup->value, &setvalue) != 0) {
++				SYSERROR("Invalid value %s", cgroup->value);
++				return false;
+ 			}
+ 			if (lxc_safe_long_long(value, &readvalue) != 0) {
+ 				SYSERROR("Invalid value %s", value);
+-				goto out;
++				return false;
+ 			}
+ 			if (setvalue > readvalue) {
+ 				ERROR("The maximum allowed cpu-shares is %s", value);
+ 				lxc_write_error_message(ops->errfd,
+ 				                        "%s:%d: setting cgroup config for ready process caused \"The maximum allowed cpu-shares is %s\".",
+ 				                        __FILE__, __LINE__, value);
+-				goto out;
++				return false;
+ 			} else if (setvalue < readvalue) {
+ 				ERROR("The minimum allowed cpu-shares is %s", value);
+ 				lxc_write_error_message(ops->errfd,
+ 				                        "%s:%d: setting cgroup config for ready process caused \"The minimum allowed cpu-shares is %s\".",
+ 				                        __FILE__, __LINE__, value);
+-				goto out;
++				return false;
+ 			}
+ 		}
+ 	}
+ 
+-	ret = true;
+ 	INFO("Limits for the legacy cgroup hierarchies have been setup");
+-out:
+-	lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
+-		lxc_list_del(iterator);
+-		free(iterator);
+-	}
+-
+-	return ret;
++	return true;
+ }
+ 
+ /*
+@@ -2588,31 +3029,35 @@ static int bpf_device_cgroup_prepare(struct cgroup_ops *ops,
+ 				     struct lxc_conf *conf, const char *key,
+ 				     const char *val)
+ {
+-#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+-	struct device_item device_item = {0};
++	struct device_item device_item = {};
+ 	int ret;
+ 
+-	if (strcmp("devices.allow", key) == 0 && *val == '/')
++	if (strequal("devices.allow", key) && abspath(val))
+ 		ret = device_cgroup_rule_parse_devpath(&device_item, val);
+ 	else
+ 		ret = device_cgroup_rule_parse(&device_item, key, val);
+ 	if (ret < 0)
+-		return log_error_errno(-1, EINVAL, "Failed to parse device string %s=%s", key, val);
++		return syserror_set(EINVAL, "Failed to parse device rule %s=%s", key, val);
+ 
+-	ret = bpf_list_add_device(conf, &device_item);
++	/*
++	 * Note that bpf_list_add_device() returns 1 if it altered the device
++	 * list and 0 if it didn't; both return values indicate success.
++	 * Only a negative return value indicates an error.
++	 */
++	ret = bpf_list_add_device(&conf->bpf_devices, &device_item);
+ 	if (ret < 0)
+ 		return -1;
+-#endif
++
+ 	return 0;
+ }
+-
+ __cgfsng_ops static bool isulad_cgfsng_setup_limits(struct cgroup_ops *ops,
+ 					     struct lxc_handler *handler)
+ {
+ 	__do_free char *path = NULL;
+-	struct lxc_list *cgroup_settings, *iterator;
++	struct list_head *cgroup_settings;
+ 	struct hierarchy *h;
+ 	struct lxc_conf *conf;
++	struct lxc_cgroup *cg;
+ 
+ 	if (!ops)
+ 		return ret_set_errno(false, ENOENT);
+@@ -2627,7 +3072,7 @@ __cgfsng_ops static bool isulad_cgfsng_setup_limits(struct cgroup_ops *ops,
+ 		return ret_set_errno(false, EINVAL);
+ 	conf = handler->conf;
+ 
+-	if (lxc_list_empty(&conf->cgroup2))
++	if (list_empty(&conf->cgroup2))
+ 		return true;
+ 	cgroup_settings = &conf->cgroup2;
+ 
+@@ -2638,8 +3083,7 @@ __cgfsng_ops static bool isulad_cgfsng_setup_limits(struct cgroup_ops *ops,
+ 		return false;
+ 	h = ops->unified;
+ 
+-	lxc_list_for_each (iterator, cgroup_settings) {
+-		struct lxc_cgroup *cg = iterator->elem;
++	list_for_each_entry(cg, cgroup_settings, head) {
+ 		int ret;
+ 
+ 		if (strncmp("devices", cg->subsystem, 7) == 0) {
+@@ -2786,7 +3230,7 @@ bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
+ 		(void)strlcat(add_controllers, "+", full_len + 1);
+ 		(void)strlcat(add_controllers, *it, full_len + 1);
+ 
+-		if ((it + 1) && *(it + 1))
++		if (*(it + 1))
+ 			(void)strlcat(add_controllers, " ", full_len + 1);
+ 	}
+ 
+@@ -2836,333 +3280,490 @@ __cgfsng_ops bool isulad_cgfsng_payload_delegate_controllers(struct cgroup_ops *
+ 	return __cgfsng_delegate_controllers(ops, ops->container_cgroup);
+ }
+ 
+-static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
+-				       char **controllers)
++static inline bool unified_cgroup(const char *line)
+ {
+-	if (!ops->cgroup_use)
+-		return true;
++	return *line == '0';
++}
+ 
+-	for (char **cur_ctrl = controllers; cur_ctrl && *cur_ctrl; cur_ctrl++) {
+-		bool found = false;
++static inline char *current_unified_cgroup(bool relative, char *line)
++{
++	char *current_cgroup;
+ 
+-		for (char **cur_use = ops->cgroup_use; cur_use && *cur_use; cur_use++) {
+-			if (strcmp(*cur_use, *cur_ctrl) != 0)
+-				continue;
++	line += STRLITERALLEN("0::");
+ 
+-			found = true;
+-			break;
+-		}
++	if (!abspath(line))
++		return ERR_PTR(-EINVAL);
+ 
+-		if (found)
+-			continue;
++	/* remove init.scope */
++	if (!relative)
++		line = prune_init_scope(line);
+ 
+-		return false;
+-	}
++	/* create a relative path */
++	line = deabs(line);
+ 
+-	return true;
++	current_cgroup = strdup(line);
++	if (!current_cgroup)
++		return ERR_PTR(-ENOMEM);
++
++	return current_cgroup;
+ }
+ 
+-static void cg_unified_delegate(char ***delegate)
++static inline const char *unprefix(const char *controllers)
+ {
++	if (strnequal(controllers, "name=", STRLITERALLEN("name=")))
++		return controllers + STRLITERALLEN("name=");
++	return controllers;
++}
++
++static int __list_cgroup_delegate(char ***delegate)
++{
++	__do_free char **list = NULL;
+ 	__do_free char *buf = NULL;
+-	char *standard[] = {"cgroup.subtree_control", "cgroup.threads", NULL};
++	char *standard[] = {
++		"cgroup.procs",
++		"cgroup.threads",
++		"cgroup.subtree_control",
++		"memory.oom.group",
++		NULL,
++	};
+ 	char *token;
+-	int idx;
++	int ret;
+ 
+-	buf = read_file("/sys/kernel/cgroup/delegate");
++	buf = read_file_at(-EBADF, "/sys/kernel/cgroup/delegate", PROTECT_OPEN, 0);
+ 	if (!buf) {
+ 		for (char **p = standard; p && *p; p++) {
+-			idx = append_null_to_list((void ***)delegate);
+-			(*delegate)[idx] = must_copy_string(*p);
++			ret = list_add_string(&list, *p);
++			if (ret < 0)
++				return ret;
+ 		}
+-		SYSWARN("Failed to read /sys/kernel/cgroup/delegate");
+-		return;
++
++		*delegate = move_ptr(list);
++		return syswarn_ret(0, "Failed to read /sys/kernel/cgroup/delegate");
+ 	}
+ 
+-	lxc_iterate_parts (token, buf, " \t\n") {
++	lxc_iterate_parts(token, buf, " \t\n") {
+ 		/*
+ 		 * We always need to chown this for both cgroup and
+ 		 * cgroup2.
+ 		 */
+-		if (strcmp(token, "cgroup.procs") == 0)
++		if (strequal(token, "cgroup.procs"))
+ 			continue;
+ 
+-		idx = append_null_to_list((void ***)delegate);
+-		(*delegate)[idx] = must_copy_string(token);
++		ret = list_add_string(&list, token);
++		if (ret < 0)
++			return ret;
+ 	}
++
++	*delegate = move_ptr(list);
++	return 0;
+ }
+ 
+-/* At startup, parse_hierarchies finds all the info we need about cgroup
+- * mountpoints and current cgroups, and stores it in @d.
+- */
+-static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileged)
++static bool unified_hierarchy_delegated(int dfd_base, char ***ret_files)
+ {
+-	__do_free char *basecginfo = NULL, *line = NULL;
+-	__do_free_string_list char **klist = NULL, **nlist = NULL;
+-	__do_fclose FILE *f = NULL;
++	__do_free_string_list char **list = NULL;
+ 	int ret;
+-	size_t len = 0;
+ 
+-	/* Root spawned containers escape the current cgroup, so use init's
+-	 * cgroups as our base in that case.
+-	 */
+-	if (!relative && (geteuid() == 0))
+-		basecginfo = read_file("/proc/1/cgroup");
+-	else
+-		basecginfo = read_file("/proc/self/cgroup");
+-	if (!basecginfo)
+-		return ret_set_errno(-1, ENOMEM);
+-
+-	ret = get_existing_subsystems(&klist, &nlist);
++	ret = __list_cgroup_delegate(&list);
+ 	if (ret < 0)
+-		return log_error_errno(-1, errno, "Failed to retrieve available legacy cgroup controllers");
++		return syserror_ret(ret, "Failed to determine unified cgroup delegation requirements");
+ 
+-	f = fopen("/proc/self/mountinfo", "re");
+-	if (!f)
+-		return log_error_errno(-1, errno, "Failed to open \"/proc/self/mountinfo\"");
++	for (char *const *s = list; s && *s; s++) {
++		if (!faccessat(dfd_base, *s, W_OK, 0) || errno == ENOENT)
++			continue;
+ 
+-	lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
++		return sysinfo_ret(false, "The %s file is not writable, skipping unified hierarchy", *s);
++	}
+ 
+-	while (getline(&line, &len, f) != -1) {
+-		__do_free char *base_cgroup = NULL, *mountpoint = NULL;
+-		__do_free_string_list char **controller_list = NULL;
+-		int type;
+-		struct hierarchy *new;
++	*ret_files = move_ptr(list);
++	return true;
++}
+ 
+-		type = get_cgroup_version(line);
+-		if (type == 0)
+-			continue;
++static bool legacy_hierarchy_delegated(int dfd_base)
++{
++	int ret;
+ 
+-		if (type == CGROUP2_SUPER_MAGIC && ops->unified)
+-			continue;
++	ret = faccessat(dfd_base, ".", W_OK, 0);
++	if (ret < 0 && errno != ENOENT)
++		return sysinfo_ret(false, "Legacy hierarchy not writable, skipping");
+ 
+-		if (ops->cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
+-			if (type == CGROUP2_SUPER_MAGIC)
+-				ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+-			else if (type == CGROUP_SUPER_MAGIC)
+-				ops->cgroup_layout = CGROUP_LAYOUT_LEGACY;
+-		} else if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
+-			if (type == CGROUP_SUPER_MAGIC)
+-				ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
+-		} else if (ops->cgroup_layout == CGROUP_LAYOUT_LEGACY) {
+-			if (type == CGROUP2_SUPER_MAGIC)
+-				ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
+-		}
++	return true;
++}
+ 
+-		controller_list = cg_hybrid_get_controllers(klist, nlist, line, type);
+-		if (!controller_list && type == CGROUP_SUPER_MAGIC)
+-			continue;
++/**
++ * systemd guarantees that the order of co-mounted controllers is stable. On
++ * some systems the order of the controllers might be reversed though.
++ *
++ * For example, this is how the order is mismatched on CentOS 7:
++ *
++ *      [root@localhost ~]# cat /proc/self/cgroup
++ *      11:perf_event:/
++ *      10:pids:/
++ *      9:freezer:/
++ * >>>> 8:cpuacct,cpu:/
++ *      7:memory:/
++ *      6:blkio:/
++ *      5:devices:/
++ *      4:hugetlb:/
++ * >>>> 3:net_prio,net_cls:/
++ *      2:cpuset:/
++ *      1:name=systemd:/user.slice/user-0.slice/session-c1.scope
++ *
++ * whereas the mountpoint:
++ *
++ *      | |-/sys/fs/cgroup                    tmpfs         tmpfs      ro,nosuid,nodev,noexec,mode=755
++ *      | | |-/sys/fs/cgroup/systemd          cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
++ *      | | |-/sys/fs/cgroup/cpuset           cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,cpuset
++ * >>>> | | |-/sys/fs/cgroup/net_cls,net_prio cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,net_prio,net_cls
++ *      | | |-/sys/fs/cgroup/hugetlb          cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,hugetlb
++ *      | | |-/sys/fs/cgroup/devices          cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,devices
++ *      | | |-/sys/fs/cgroup/blkio            cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,blkio
++ *      | | |-/sys/fs/cgroup/memory           cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,memory
++ * >>>> | | |-/sys/fs/cgroup/cpu,cpuacct      cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,cpuacct,cpu
++ *      | | |-/sys/fs/cgroup/freezer          cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,freezer
++ *      | | |-/sys/fs/cgroup/pids             cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,pids
++ *      | | `-/sys/fs/cgroup/perf_event       cgroup        cgroup     rw,nosuid,nodev,noexec,relatime,perf_event
++ *
++ * Ensure that we always use the systemd-guaranteed stable order when checking
++ * for the mountpoint.
++ */
++#if HAVE_COMPILER_ATTR_NONNULL
++__attribute__((nonnull))
++#endif
++#if HAVE_COMPILER_ATTR_RETURNS_NONNULL
++__attribute__((returns_nonnull))
++#endif
++static const char *stable_order(const char *controllers)
++{
++	if (strequal(controllers, "cpuacct,cpu"))
++		return "cpu,cpuacct";
+ 
+-		if (type == CGROUP_SUPER_MAGIC)
+-			if (controller_list_is_dup(ops->hierarchies, controller_list)) {
+-				TRACE("Skipping duplicating controller");
+-				continue;
+-			}
++	if (strequal(controllers, "net_prio,net_cls"))
++		return "net_cls,net_prio";
+ 
+-		mountpoint = cg_hybrid_get_mountpoint(line);
+-		if (!mountpoint) {
+-			WARN("Failed parsing mountpoint from \"%s\"", line);
+-			continue;
+-		}
++	return unprefix(controllers);
++}
+ 
+-		if (type == CGROUP_SUPER_MAGIC)
+-			base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
+-		else
+-			base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
+-		if (!base_cgroup) {
+-			WARN("Failed to find current cgroup");
+-			continue;
+-		}
++#define CGFSNG_LAYOUT_LEGACY	BIT(0)
++#define CGFSNG_LAYOUT_UNIFIED	BIT(1)
+ 
+-		trim(base_cgroup);
+-		prune_init_scope(base_cgroup);
++static int __initialize_cgroups(struct cgroup_ops *ops, bool relative,
++				bool unprivileged, struct lxc_conf *conf)
++{
++	__do_free char *cgroup_info = NULL;
++	unsigned int layout_mask = 0;
++	int ret;
++	char *it;
+ 
+-		/* isulad: do not test writeable, if we run isulad in docker without cgroup namespace.
+-		 * the base_cgroup will be docker/XXX.., mountpoint+base_cgroup may be not exist */
++	ret = unpriv_systemd_create_scope(ops, conf);
++	if (ret < 0)
++		return ret_set_errno(false, ret);
++	else if (ret == 0)
++		TRACE("Entered an unpriv systemd scope");
+ 
+-		/*
+-		 * reason:base cgroup may be started with /system.slice when cg_hybrid_init
+-		 *	read /proc/1/cgroup on host, and cgroup init will set all containers
+-		 *	cgroup path under /sys/fs/cgroup/<controller>/system.slice/xxx/lxc
+-		 *	directory, this is not consistent with docker. The default cgroup path
+-		 *	should be under /sys/fs/cgroup/<controller>/lxc directory.
+-		 */
++	/*
++	 * Root spawned containers escape the current cgroup, so use init's
++	 * cgroups as our base in that case.
++	 */
++	if (!relative && (geteuid() == 0))
++		cgroup_info = read_file_at(-EBADF, "/proc/1/cgroup", PROTECT_OPEN, 0);
++	else
++		cgroup_info = read_file_at(-EBADF, "/proc/self/cgroup", PROTECT_OPEN, 0);
++	if (!cgroup_info)
++		return ret_errno(ENOMEM);
++
++	lxc_iterate_parts(it, cgroup_info, "\n") {
++		__do_close int dfd_base = -EBADF, dfd_mnt = -EBADF;
++		__do_free char *controllers = NULL, *current_cgroup = NULL;
++		__do_free_string_list char **controller_list = NULL,
++					   **delegate = NULL;
++		char *line;
++		int dfd, type;
++
++		/* Handle the unified cgroup hierarchy. */
++		line = it;
++		if (unified_cgroup(line)) {
++			char *unified_mnt;
++
++			type = UNIFIED_HIERARCHY;
++			layout_mask |= CGFSNG_LAYOUT_UNIFIED;
++
++			if (conf->cgroup_meta.systemd_scope)
++				current_cgroup = cgroup_relpath(conf->cgroup_meta.systemd_scope);
++			if (IS_ERR_OR_NULL(current_cgroup))
++				current_cgroup = current_unified_cgroup(relative, line);
++			if (IS_ERR(current_cgroup))
++				return PTR_ERR(current_cgroup);
++
++			if (unified_cgroup_fd(ops->dfd_mnt)) {
++				dfd_mnt = dup_cloexec(ops->dfd_mnt);
++				unified_mnt = "";
++			} else {
++				dfd_mnt = open_at(ops->dfd_mnt,
++						  "unified",
++						  PROTECT_OPATH_DIRECTORY,
++						  PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
++				unified_mnt = "unified";
++			}
++			if (dfd_mnt < 0) {
++				if (errno != ENOENT)
++					return syserror("Failed to open %d/unified", ops->dfd_mnt);
+ 
+-		if (strlen(base_cgroup) > 1 && base_cgroup[0] == '/') {
+-			base_cgroup[1] = '\0';
+-		}
++				SYSTRACE("Unified cgroup not mounted");
++				continue;
++			}
++
++			if (!fhas_fs_type(dfd_mnt, CGROUP2_SUPER_MAGIC)) {
++				SYSTRACE("Opened file descriptor %d is not a cgroup2 mountpoint", dfd_mnt);
++				continue;
++			}
+ 
+-		if (type == CGROUP2_SUPER_MAGIC) {
+-			char *cgv2_ctrl_path;
++			dfd = dfd_mnt;
++
++			if (!is_empty_string(current_cgroup)) {
++				dfd_base = open_at(dfd_mnt, current_cgroup,
++						   PROTECT_OPATH_DIRECTORY,
++						   PROTECT_LOOKUP_BENEATH_XDEV, 0);
++				if (dfd_base < 0) {
++					if (errno != ENOENT)
++						return syserror("Failed to open %d/%s",
++								dfd_mnt, current_cgroup);
++
++					SYSTRACE("Current cgroup %d/%s does not exist (funky cgroup layout?)",
++						 dfd_mnt, current_cgroup);
++					continue;
++				}
++				dfd = dfd_base;
++			}
+ 
+-			cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
+-							"cgroup.controllers",
+-							NULL);
++			if (!unified_hierarchy_delegated(dfd, &delegate))
++				continue;
+ 
+-			controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
+-			free(cgv2_ctrl_path);
++			controller_list = unified_controllers(dfd, "cgroup.controllers");
+ 			if (!controller_list) {
+-				controller_list = cg_unified_make_empty_controller();
+-				TRACE("No controllers are enabled for "
+-				      "delegation in the unified hierarchy");
++				TRACE("No controllers are enabled for delegation in the unified hierarchy");
++				controller_list = list_new();
++				if (!controller_list)
++					return syserror_set(-ENOMEM, "Failed to create empty controller list");
+ 			}
+-		}
+ 
+-		/* Exclude all controllers that cgroup use does not want. */
+-		if (!cgroup_use_wants_controllers(ops, controller_list)) {
+-			TRACE("Skipping controller");
+-			continue;
+-		}
++			controllers = strdup(unified_mnt);
++			if (!controllers)
++				return ret_errno(ENOMEM);
++		} else {
++			char *__controllers, *__current_cgroup;
++
++			type = LEGACY_HIERARCHY;
++			layout_mask |= CGFSNG_LAYOUT_LEGACY;
++
++			__controllers = strchr(line, ':');
++			if (!__controllers)
++				return ret_errno(EINVAL);
++			__controllers++;
++
++			__current_cgroup = strchr(__controllers, ':');
++			if (!__current_cgroup)
++				return ret_errno(EINVAL);
++			*__current_cgroup = '\0';
++			__current_cgroup++;
++
++			controllers = strdup(stable_order(__controllers));
++			if (!controllers)
++				return ret_errno(ENOMEM);
++
++			dfd_mnt = open_at(ops->dfd_mnt,
++					  controllers,
++					  PROTECT_OPATH_DIRECTORY,
++					  PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
++			if (dfd_mnt < 0) {
++				if (errno != ENOENT)
++					return syserror("Failed to open %d/%s",
++							ops->dfd_mnt, controllers);
++
++				SYSTRACE("%s not mounted", controllers);
++				continue;
++			}
+ 
+-		new = add_hierarchy(&ops->hierarchies, move_ptr(controller_list), move_ptr(mountpoint), move_ptr(base_cgroup), type);
+-		if (type == CGROUP2_SUPER_MAGIC && !ops->unified) {
+-			if (unprivileged)
+-				cg_unified_delegate(&new->cgroup2_chown);
+-			ops->unified = new;
+-		}
+-	}
++			if (!fhas_fs_type(dfd_mnt, CGROUP_SUPER_MAGIC)) {
++				SYSTRACE("Opened file descriptor %d is not a cgroup mountpoint", dfd_mnt);
++				continue;
++			}
+ 
+-	TRACE("Writable cgroup hierarchies:");
+-	lxc_cgfsng_print_hierarchies(ops);
++			dfd = dfd_mnt;
+ 
+-	/* verify that all controllers in cgroup.use and all crucial
+-	 * controllers are accounted for
+-	 */
+-	if (!all_controllers_found(ops))
+-		return log_error_errno(-1, ENOENT, "Failed to find all required controllers");
++			if (!abspath(__current_cgroup))
++				return ret_errno(EINVAL);
+ 
+-	return 0;
+-}
++			/* remove init.scope */
++			if (!relative)
++				__current_cgroup = prune_init_scope(__current_cgroup);
+ 
+-/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
+-static char *cg_unified_get_current_cgroup(bool relative)
+-{
+-	__do_free char *basecginfo = NULL;
+-	char *copy;
+-	char *base_cgroup;
++			/* create a relative path */
++			__current_cgroup = deabs(__current_cgroup);
+ 
+-	if (!relative && (geteuid() == 0))
+-		basecginfo = read_file("/proc/1/cgroup");
+-	else
+-		basecginfo = read_file("/proc/self/cgroup");
+-	if (!basecginfo)
+-		return NULL;
++			current_cgroup = strdup(__current_cgroup);
++			if (!current_cgroup)
++				return ret_errno(ENOMEM);
+ 
+-	base_cgroup = strstr(basecginfo, "0::/");
+-	if (!base_cgroup)
+-		return NULL;
++			if (!is_empty_string(current_cgroup)) {
++				dfd_base = open_at(dfd_mnt, current_cgroup,
++						   PROTECT_OPATH_DIRECTORY,
++						   PROTECT_LOOKUP_BENEATH_XDEV, 0);
++				if (dfd_base < 0) {
++					if (errno != ENOENT)
++						return syserror("Failed to open %d/%s",
++								dfd_mnt, current_cgroup);
+ 
+-	base_cgroup = base_cgroup + 3;
+-	copy = copy_to_eol(base_cgroup);
+-	if (!copy)
+-		return NULL;
++					SYSTRACE("Current cgroup %d/%s does not exist (funky cgroup layout?)",
++						 dfd_mnt, current_cgroup);
++					continue;
++				}
++				dfd = dfd_base;
++			}
+ 
+-	return trim(copy);
+-}
++			if (!legacy_hierarchy_delegated(dfd))
++				continue;
+ 
+-static int cg_unified_init(struct cgroup_ops *ops, bool relative,
+-			   bool unprivileged)
+-{
+-	__do_free char *subtree_path = NULL;
+-	int ret;
+-	char *mountpoint;
+-	char **delegatable;
+-	struct hierarchy *new;
+-	char *base_cgroup = NULL;
++			/*
++			 * We intentionally pass __current_cgroup here and not
++			 * controllers because we would otherwise chop the
++			 * mountpoint.
++			 */
++			controller_list = list_add_controllers(__controllers);
++			if (!controller_list)
++				return syserror_set(-ENOMEM, "Failed to create controller list from %s", __controllers);
+ 
+-	ret = unified_cgroup_hierarchy();
+-	if (ret == -ENOMEDIUM)
+-		return ret_errno(ENOMEDIUM);
++			if (skip_hierarchy(ops, controller_list))
++				continue;
+ 
+-	if (ret != CGROUP2_SUPER_MAGIC)
+-		return 0;
++			ops->cgroup_layout = CGROUP_LAYOUT_LEGACY;
++		}
+ 
+-	base_cgroup = cg_unified_get_current_cgroup(relative);
+-	if (!base_cgroup)
+-		return ret_errno(EINVAL);
+-	if (!relative)
+-		prune_init_scope(base_cgroup);
++		ret = cgroup_hierarchy_add(ops, dfd_mnt, controllers, dfd,
++					   current_cgroup, controller_list, type);
++		if (ret < 0)
++			return syserror_ret(ret, "Failed to add %s hierarchy", controllers);
++
++		/* Transfer ownership. */
++		move_fd(dfd_mnt);
++		move_fd(dfd_base);
++		move_ptr(current_cgroup);
++		move_ptr(controllers);
++		move_ptr(controller_list);
++		if (type == UNIFIED_HIERARCHY)
++			ops->unified->delegate = move_ptr(delegate);
++	}
+ 
+-	/*
+-	 * We assume that the cgroup we're currently in has been delegated to
+-	 * us and we are free to further delege all of the controllers listed
+-	 * in cgroup.controllers further down the hierarchy.
+-	 */
+-	mountpoint = must_copy_string(DEFAULT_CGROUP_MOUNTPOINT);
+-	subtree_path = must_make_path(mountpoint, base_cgroup, "cgroup.controllers", NULL);
+-	delegatable = cg_unified_get_controllers(subtree_path);
+-	if (!delegatable)
+-		delegatable = cg_unified_make_empty_controller();
+-	if (!delegatable[0]) {
+-		TRACE("No controllers are enabled for delegation");
+-#ifdef HAVE_ISULAD
+-		ops->no_controller = true;
+-#endif
++	/* determine cgroup layout */
++	if (ops->unified) {
++		if (ops->cgroup_layout == CGROUP_LAYOUT_LEGACY) {
++			ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
++		} else {
++			if (bpf_devices_cgroup_supported())
++				ops->unified->utilities |= DEVICES_CONTROLLER;
++			ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
++		}
+ 	}
+ 
+-	/* TODO: If the user requested specific controllers via lxc.cgroup.use
+-	 * we should verify here. The reason I'm not doing it right is that I'm
+-	 * not convinced that lxc.cgroup.use will be the future since it is a
+-	 * global property. I much rather have an option that lets you request
+-	 * controllers per container.
++	/*
++	 * If we still don't know the cgroup layout at this point it means we
++	 * have not found any writable cgroup hierarchies. Infer the layout
++	 * from the layout bitmask we created when parsing the cgroups.
++	 *
++	 * Keep the ordering in the switch otherwise the bistmask-based
++	 * matching won't work.
+ 	 */
++	if (ops->cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
++		switch (layout_mask) {
++		case (CGFSNG_LAYOUT_LEGACY | CGFSNG_LAYOUT_UNIFIED):
++			ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
++			break;
++		case CGFSNG_LAYOUT_LEGACY:
++			ops->cgroup_layout = CGROUP_LAYOUT_LEGACY;
++			break;
++		case CGFSNG_LAYOUT_UNIFIED:
++			ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
++			break;
++		}
++	}
+ 
+-	new = add_hierarchy(&ops->hierarchies, delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
+-	if (unprivileged)
+-		cg_unified_delegate(&new->cgroup2_chown);
+-
+-	if (bpf_devices_cgroup_supported())
+-		new->bpf_device_controller = 1;
+-
+-	ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+-	ops->unified = new;
++	if (!controllers_available(ops))
++		return syserror_set(-ENOENT, "One or more requested controllers unavailable or not delegated");
+ 
+-	return CGROUP2_SUPER_MAGIC;
++	return 0;
+ }
+ 
+-static int isulad_cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
++static int isulad_initialize_cgroups(struct cgroup_ops *ops, struct lxc_conf *conf)
+ {
++	__do_close int dfd = -EBADF;
+ 	int ret;
+-	const char *tmp;
+-	bool relative = conf->cgroup_meta.relative;
++	const char *controllers_use;
+ 
+-	tmp = lxc_global_config_value("lxc.cgroup.use");
+-	if (tmp) {
+-		__do_free char *pin = NULL;
+-		char *chop, *cur;
++	if (ops->dfd_mnt >= 0)
++		return ret_errno(EBUSY);
++
++	/*
++	 * I don't see the need for allowing symlinks here. If users want to
++	 * have their hierarchy available in different locations I strongly
++	 * suggest bind-mounts.
++	 */
++	dfd = open_at(-EBADF, DEFAULT_CGROUP_MOUNTPOINT,
++			PROTECT_OPATH_DIRECTORY, PROTECT_LOOKUP_ABSOLUTE_XDEV, 0);
++	if (dfd < 0)
++		return syserror("Failed to open " DEFAULT_CGROUP_MOUNTPOINT);
++
++	controllers_use = lxc_global_config_value("lxc.cgroup.use");
++	if (controllers_use) {
++		__do_free char *dup = NULL;
++		char *it;
+ 
+-		pin = must_copy_string(tmp);
+-		chop = pin;
++		dup = strdup(controllers_use);
++		if (!dup)
++			return -errno;
+ 
+-		lxc_iterate_parts(cur, chop, ",")
+-			must_append_string(&ops->cgroup_use, cur);
++		lxc_iterate_parts(it, dup, ",") {
++			ret = list_add_string(&ops->cgroup_use, it);
++			if (ret < 0)
++				return ret;
++		}
+ 	}
+ 
+-	ret = cg_unified_init(ops, relative, !lxc_list_empty(&conf->id_map));
+-	if (ret < 0)
+-		return -1;
++	/*
++	 * Keep dfd referenced by the cleanup function and actually move the fd
++	 * once we know the initialization succeeded. So if we fail we clean up
++	 * the dfd.
++	 */
++	ops->dfd_mnt = dfd;
+ 
+-	if (ret == CGROUP2_SUPER_MAGIC)
+-		return 0;
++	ret = __initialize_cgroups(ops, conf->cgroup_meta.relative, !list_empty(&conf->id_map), conf);
++	if (ret < 0)
++		return syserror_ret(ret, "Failed to initialize cgroups");
+ 
+-	return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
++	/* Transfer ownership to cgroup_ops. */
++	move_fd(dfd);
++	return 0;
+ }
+ 
+ __cgfsng_ops static int isulad_cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf)
+ {
+ 	const char *cgroup_pattern;
++#ifdef HAVE_ISULAD
+ 	const char *cgroup_tree;
+ 	__do_free char *container_cgroup = NULL, *__cgroup_tree = NULL;
+ 	size_t len;
++#endif
+ 
+ 	if (!ops)
+ 		return ret_set_errno(-1, ENOENT);
+ 
+ 	/* copy system-wide cgroup information */
+ 	cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
+-	if (cgroup_pattern && strcmp(cgroup_pattern, "") != 0)
+-		ops->cgroup_pattern = must_copy_string(cgroup_pattern);
++	if (cgroup_pattern && !strequal(cgroup_pattern, "")) {
++		ops->cgroup_pattern = strdup(cgroup_pattern);
++		if (!ops->cgroup_pattern)
++			return ret_errno(ENOMEM);
++	}
+ 
++#ifdef HAVE_ISULAD
+ 	if (conf->cgroup_meta.dir) {
+ 		cgroup_tree = conf->cgroup_meta.dir;
+ 		container_cgroup = must_concat(&len, cgroup_tree, "/", conf->name, NULL);
+@@ -3181,22 +3782,23 @@ __cgfsng_ops static int isulad_cgfsng_data_init(struct cgroup_ops *ops, struct l
+ 		return ret_set_errno(-1, ENOMEM);
+ 
+ 	ops->container_cgroup = move_ptr(container_cgroup);
++#endif
+ 
+ 	return 0;
+ }
+ 
+-struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
++struct cgroup_ops *cgroup_ops_init(struct lxc_conf *conf)
+ {
+-	__do_free struct cgroup_ops *cgfsng_ops = NULL;
++	__cleanup_cgroup_ops struct cgroup_ops *cgfsng_ops = NULL;
+ 
+-	cgfsng_ops = malloc(sizeof(struct cgroup_ops));
++	cgfsng_ops = zalloc(sizeof(struct cgroup_ops));
+ 	if (!cgfsng_ops)
+ 		return ret_set_errno(NULL, ENOMEM);
+ 
+-	memset(cgfsng_ops, 0, sizeof(struct cgroup_ops));
+-	cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
++	cgfsng_ops->cgroup_layout	= CGROUP_LAYOUT_UNKNOWN;
++	cgfsng_ops->dfd_mnt		= -EBADF;
+ 
+-	if (isulad_cg_init(cgfsng_ops, conf))
++	if (isulad_initialize_cgroups(cgfsng_ops, conf))
+ 		return NULL;
+ 
+ 	cgfsng_ops->data_init = isulad_cgfsng_data_init;
+@@ -3211,10 +3813,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
+ 	cgfsng_ops->payload_delegate_controllers = isulad_cgfsng_payload_delegate_controllers;
+ 	cgfsng_ops->payload_create = isulad_cgfsng_payload_create;
+ 	cgfsng_ops->payload_enter = isulad_cgfsng_payload_enter;
+-	cgfsng_ops->payload_finalize = isulad_cgfsng_payload_finalize;
+-	cgfsng_ops->escape = isulad_cgfsng_escape;
+-	cgfsng_ops->num_hierarchies = isulad_cgfsng_num_hierarchies;
+-	cgfsng_ops->get_hierarchies = isulad_cgfsng_get_hierarchies;
++	cgfsng_ops->finalize = isulad_cgfsng_finalize;
+ 	cgfsng_ops->get_cgroup = isulad_cgfsng_get_cgroup;
+ 	cgfsng_ops->get = isulad_cgfsng_get;
+ 	cgfsng_ops->set = isulad_cgfsng_set;
+@@ -3229,5 +3828,310 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
+ 	cgfsng_ops->mount = isulad_cgfsng_mount;
+ 	cgfsng_ops->devices_activate = isulad_cgfsng_devices_activate;
+ 
++	cgfsng_ops->criu_escape = isulad_cgfsng_criu_escape;
++	cgfsng_ops->criu_num_hierarchies = isulad_cgfsng_criu_num_hierarchies;
++	cgfsng_ops->criu_get_hierarchies = isulad_cgfsng_criu_get_hierarchies;
++
+ 	return move_ptr(cgfsng_ops);
+ }
++
++static int __unified_attach_fd(const struct lxc_conf *conf, const char *lxcpath, int fd_unified, pid_t pid)
++{
++	int ret;
++
++	if (!list_empty(&conf->id_map)) {
++		struct userns_exec_unified_attach_data args = {
++			.conf		= conf,
++			.unified_fd	= fd_unified,
++			.pid		= pid,
++			.unprivileged	= am_guest_unpriv(),
++			.lxcpath	= lxcpath,
++		};
++
++		ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
++		if (ret < 0)
++			return -errno;
++
++		ret = userns_exec_minimal(conf,
++					  cgroup_unified_attach_parent_wrapper,
++					  &args,
++					  cgroup_unified_attach_child_wrapper,
++					  &args);
++	} else {
++		ret = cgroup_attach_leaf(conf, fd_unified, pid);
++	}
++
++	return ret;
++}
++
++static int __cgroup_attach_many(const struct lxc_conf *conf, const char *name,
++				const char *lxcpath, pid_t pid)
++{
++	call_cleaner(put_cgroup_ctx) struct cgroup_ctx *ctx = &(struct cgroup_ctx){};
++	int ret;
++	size_t idx;
++	ssize_t pidstr_len;
++	char pidstr[INTTYPE_TO_STRLEN(pid_t)];
++
++	ret = lxc_cmd_get_cgroup_ctx(name, lxcpath, sizeof(struct cgroup_ctx), ctx);
++	if (ret < 0)
++		return ret_errno(ENOSYS);
++
++	if (ctx->fd_len == 0)
++		return log_trace(0, "Container runs with unwritable %s cgroup layout",
++				 cgroup_layout_name(ctx->layout));
++
++	pidstr_len = strnprintf(pidstr, sizeof(pidstr), "%d", pid);
++	if (pidstr_len < 0)
++		return pidstr_len;
++
++	for (idx = 0; idx < ctx->fd_len; idx++) {
++		int dfd_con = ctx->fd[idx];
++
++		if (unified_cgroup_fd(dfd_con))
++			ret = __unified_attach_fd(conf, lxcpath, dfd_con, pid);
++		else
++			ret = lxc_writeat(dfd_con, "cgroup.procs", pidstr, pidstr_len);
++		if (ret)
++			return syserror_ret(ret, "Failed to attach to cgroup fd %d", dfd_con);
++		else
++			TRACE("Attached to cgroup fd %d", dfd_con);
++	}
++
++	TRACE("Attached to %s cgroup layout", cgroup_layout_name(ctx->layout));
++	return 0;
++}
++
++static int __cgroup_attach_unified(const struct lxc_conf *conf, const char *name,
++				   const char *lxcpath, pid_t pid)
++{
++	__do_close int dfd_unified = -EBADF;
++
++	if (!conf || is_empty_string(name) || is_empty_string(lxcpath) || pid <= 0)
++		return ret_errno(EINVAL);
++
++	dfd_unified = lxc_cmd_get_cgroup2_fd(name, lxcpath);
++	if (dfd_unified < 0)
++		return ret_errno(ENOSYS);
++
++	return __unified_attach_fd(conf, lxcpath, dfd_unified, pid);
++}
++
++int cgroup_attach(const struct lxc_conf *conf, const char *name,
++		  const char *lxcpath, pid_t pid)
++{
++	int ret;
++
++	ret = __cgroup_attach_many(conf, name, lxcpath, pid);
++	if (ret < 0) {
++		if (!ERRNO_IS_NOT_SUPPORTED(ret))
++			return ret;
++
++		ret = __cgroup_attach_unified(conf, name, lxcpath, pid);
++		if (ret < 0 && ERRNO_IS_NOT_SUPPORTED(ret))
++			return ret_errno(ENOSYS);
++	}
++
++	return ret;
++}
++
++/* Connects to command socket therefore isn't callable from command handler. */
++int cgroup_get(const char *name, const char *lxcpath, const char *key, char *buf, size_t len)
++{
++	__do_close int dfd = -EBADF;
++	struct cgroup_fd fd = {
++		.fd = -EBADF,
++	};
++	size_t len_controller;
++	int ret;
++
++	if (is_empty_string(name) || is_empty_string(lxcpath) ||
++	    is_empty_string(key))
++		return ret_errno(EINVAL);
++
++	if ((buf && !len) || (len && !buf))
++		return ret_errno(EINVAL);
++
++	len_controller = strcspn(key, ".");
++	len_controller++; /* Don't forget the \0 byte. */
++	if (len_controller >= MAX_CGROUP_ROOT_NAMELEN)
++		return ret_errno(EINVAL);
++	(void)strlcpy(fd.controller, key, len_controller);
++
++	ret = lxc_cmd_get_limit_cgroup_fd(name, lxcpath, sizeof(struct cgroup_fd), &fd);
++	if (ret < 0) {
++		if (!ERRNO_IS_NOT_SUPPORTED(ret))
++			return ret;
++
++		dfd = lxc_cmd_get_limit_cgroup2_fd(name, lxcpath);
++		if (dfd < 0) {
++			if (!ERRNO_IS_NOT_SUPPORTED(ret))
++				return ret;
++
++			return ret_errno(ENOSYS);
++		}
++		fd.type = UNIFIED_HIERARCHY;
++		fd.fd = move_fd(dfd);
++	}
++	dfd = move_fd(fd.fd);
++
++	TRACE("Reading %s from %s cgroup hierarchy", key, cgroup_hierarchy_name(fd.type));
++
++	if (fd.type == UNIFIED_HIERARCHY && strequal(fd.controller, "devices"))
++		return ret_errno(EOPNOTSUPP);
++	else
++		ret = lxc_read_try_buf_at(dfd, key, buf, len);
++
++	return ret;
++}
++
++/* Connects to command socket therefore isn't callable from command handler. */
++int cgroup_set(const char *name, const char *lxcpath, const char *key, const char *value)
++{
++	__do_close int dfd = -EBADF;
++	struct cgroup_fd fd = {
++		.fd = -EBADF,
++	};
++	size_t len_controller;
++	int ret;
++
++	if (is_empty_string(name) || is_empty_string(lxcpath) ||
++	    is_empty_string(key) || is_empty_string(value))
++		return ret_errno(EINVAL);
++
++	len_controller = strcspn(key, ".");
++	len_controller++; /* Don't forget the \0 byte. */
++	if (len_controller >= MAX_CGROUP_ROOT_NAMELEN)
++		return ret_errno(EINVAL);
++	(void)strlcpy(fd.controller, key, len_controller);
++
++	ret = lxc_cmd_get_limit_cgroup_fd(name, lxcpath, sizeof(struct cgroup_fd), &fd);
++	if (ret < 0) {
++		if (!ERRNO_IS_NOT_SUPPORTED(ret))
++			return ret;
++
++		dfd = lxc_cmd_get_limit_cgroup2_fd(name, lxcpath);
++		if (dfd < 0) {
++			if (!ERRNO_IS_NOT_SUPPORTED(ret))
++				return ret;
++
++			return ret_errno(ENOSYS);
++		}
++		fd.type = UNIFIED_HIERARCHY;
++		fd.fd = move_fd(dfd);
++	}
++	dfd = move_fd(fd.fd);
++
++	TRACE("Setting %s to %s in %s cgroup hierarchy", key, value, cgroup_hierarchy_name(fd.type));
++
++	if (fd.type == UNIFIED_HIERARCHY && strequal(fd.controller, "devices")) {
++		struct device_item device = {};
++
++		ret = device_cgroup_rule_parse(&device, key, value);
++		if (ret < 0)
++			return log_error_errno(-1, EINVAL, "Failed to parse device string %s=%s",
++					       key, value);
++
++		ret = lxc_cmd_add_bpf_device_cgroup(name, lxcpath, &device);
++	} else {
++		ret = lxc_writeat(dfd, key, value, strlen(value));
++	}
++
++	return ret;
++}
++
++static int do_cgroup_freeze(int unified_fd,
++			    const char *state_string,
++			    int state_num,
++			    int timeout,
++			    const char *epoll_error,
++			    const char *wait_error)
++{
++	__do_close int events_fd = -EBADF;
++	call_cleaner(lxc_mainloop_close) struct lxc_async_descr *descr_ptr = NULL;
++	int ret;
++	struct lxc_async_descr descr = {};
++
++	if (timeout != 0) {
++		ret = lxc_mainloop_open(&descr);
++		if (ret)
++			return log_error_errno(-1, errno, "%s", epoll_error);
++
++		/* automatically cleaned up now */
++		descr_ptr = &descr;
++
++		events_fd = open_at(unified_fd, "cgroup.events", PROTECT_OPEN, PROTECT_LOOKUP_BENEATH, 0);
++		if (events_fd < 0)
++			return log_error_errno(-errno, errno, "Failed to open cgroup.events file");
++
++		ret = lxc_mainloop_add_handler_events(&descr, events_fd, EPOLLPRI,
++						      freezer_cgroup_events_cb,
++						      default_cleanup_handler,
++						      INT_TO_PTR(state_num),
++						      "freezer_cgroup_events_cb");
++		if (ret < 0)
++			return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
++	}
++
++	ret = lxc_writeat(unified_fd, "cgroup.freeze", state_string, 1);
++	if (ret < 0)
++		return log_error_errno(-1, errno, "Failed to open cgroup.freeze file");
++
++	if (timeout != 0) {
++		ret = lxc_mainloop(&descr, timeout);
++		if (ret)
++			return log_error_errno(-1, errno, "%s", wait_error);
++	}
++
++	return log_trace(0, "Container now %s", (state_num == 1) ? "frozen" : "unfrozen");
++}
++
++static inline int __cgroup_freeze(int unified_fd, int timeout)
++{
++	return do_cgroup_freeze(unified_fd, "1", 1, timeout,
++			        "Failed to create epoll instance to wait for container freeze",
++			        "Failed to wait for container to be frozen");
++}
++
++int cgroup_freeze(const char *name, const char *lxcpath, int timeout)
++{
++	__do_close int unified_fd = -EBADF;
++	int ret;
++
++	if (is_empty_string(name) || is_empty_string(lxcpath))
++		return ret_errno(EINVAL);
++
++	unified_fd = lxc_cmd_get_limit_cgroup2_fd(name, lxcpath);
++	if (unified_fd < 0)
++		return ret_errno(ENOCGROUP2);
++
++	lxc_cmd_notify_state_listeners(name, lxcpath, FREEZING);
++	ret = __cgroup_freeze(unified_fd, timeout);
++	lxc_cmd_notify_state_listeners(name, lxcpath, !ret ? FROZEN : RUNNING);
++	return ret;
++}
++
++int __cgroup_unfreeze(int unified_fd, int timeout)
++{
++	return do_cgroup_freeze(unified_fd, "0", 0, timeout,
++			        "Failed to create epoll instance to wait for container freeze",
++			        "Failed to wait for container to be frozen");
++}
++
++int cgroup_unfreeze(const char *name, const char *lxcpath, int timeout)
++{
++	__do_close int unified_fd = -EBADF;
++	int ret;
++
++	if (is_empty_string(name) || is_empty_string(lxcpath))
++		return ret_errno(EINVAL);
++
++	unified_fd = lxc_cmd_get_limit_cgroup2_fd(name, lxcpath);
++	if (unified_fd < 0)
++		return ret_errno(ENOCGROUP2);
++
++	lxc_cmd_notify_state_listeners(name, lxcpath, THAWED);
++	ret = __cgroup_unfreeze(unified_fd, timeout);
++	lxc_cmd_notify_state_listeners(name, lxcpath, !ret ? RUNNING : FROZEN);
++	return ret;
++}
+diff --git a/src/lxc/commands.c b/src/lxc/commands.c
+index 2188b31..bf63cac 100644
+--- a/src/lxc/commands.c
++++ b/src/lxc/commands.c
+@@ -1991,7 +1991,7 @@ int lxc_cmd_set_terminal_fifos(const char *name, const char *lxcpath, const char
+ }
+ 
+ static int lxc_cmd_set_terminal_fifos_callback(int fd, struct lxc_cmd_req *req,
+-					struct lxc_handler *handler, struct lxc_epoll_descr *descr)
++					struct lxc_handler *handler, struct lxc_async_descr *descr)
+ {
+ 	struct lxc_cmd_rsp rsp;
+ 	memset(&rsp, 0, sizeof(rsp));
+@@ -2037,7 +2037,7 @@ int lxc_cmd_set_terminal_winch(const char *name, const char *lxcpath, unsigned i
+ }
+ 
+ static int lxc_cmd_set_terminal_winch_callback(int fd, struct lxc_cmd_req *req,
+-					struct lxc_handler *handler, struct lxc_epoll_descr *descr)
++					struct lxc_handler *handler, struct lxc_async_descr *descr)
+ {
+ 	struct lxc_cmd_rsp rsp;
+ 	struct lxc_cmd_set_terminal_winch_request *data = (struct lxc_cmd_set_terminal_winch_request *)(req->data);
+diff --git a/src/lxc/conf.c b/src/lxc/conf.c
+index 187e60e..34cf90a 100644
+--- a/src/lxc/conf.c
++++ b/src/lxc/conf.c
+@@ -299,15 +299,15 @@ static struct limit_opt limit_opt[] = {
+ static int rootfs_parent_mount_private(char *rootfs);
+ static int setup_rootfs_ropaths(struct lxc_list *ropaths);
+ static int setup_rootfs_maskedpaths(struct lxc_list *maskedpaths);
+-static int remount_proc_sys_mount_entries(struct lxc_list *mount_list, bool lsm_aa_allow_nesting);
++static int remount_proc_sys_mount_entries(struct list_head *mount_entries, bool lsm_aa_allow_nesting);
+ static int check_mount_destination(const char *rootfs, const char *dest, const char *src);
+ static int mount_entry_with_loop_dev(const char *src, const char *dest, const char *fstype,
+ 		char *mnt_opts, const char *rootfs);
+-static bool need_setup_proc(const struct lxc_conf *conf, struct lxc_list *mount);
+-static bool need_setup_dev(const struct lxc_conf *conf, struct lxc_list *mount);
++static bool need_setup_proc(const struct lxc_conf *conf, struct list_head *mount);
++static bool need_setup_dev(const struct lxc_conf *conf, struct list_head *mount);
+ static int setup_populate_devs(const struct lxc_rootfs *rootfs, struct lxc_list *devs, const char *mount_label);
+ static int setup_rootfs_mountopts(const struct lxc_rootfs *rootfs);
+-static int create_mtab_link();
++static int create_mtab_link(void);
+ #endif
+ 
+ static int run_buffer(char *buffer)
+@@ -1252,8 +1252,13 @@ static int lxc_send_ttys_to_parent(struct lxc_handler *handler)
+ /* Just create a path for /dev under $lxcpath/$name and in rootfs If we hit an
+  * error, log it but don't fail yet.
+  */
++#ifdef HAVE_ISULAD
++static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs,
++			 int autodevtmpfssize, const char *lxcpath, char *systemd, const char *mount_label)
++#else
+ static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs,
+ 			 int autodevtmpfssize, const char *lxcpath)
++#endif
+ {
+ #ifndef HAVE_ISULAD
+ 	__do_close int fd_fs = -EBADF;
+@@ -1905,18 +1910,21 @@ static int lxc_setup_devpts_child(struct lxc_handler *handler)
+ 		 */
+ #ifdef HAVE_ISULAD
+ 		if (rootfs->lsm_se_mount_context != NULL) {
+-			ret = strnprintf(devpts_mntopts, sizeof(devpts_mntopts), "%s,max=%zu,context=\"%s\"",
+-					default_devpts_mntopts, pty_max, rootfs->lsm_se_mount_context);
++			if (asprintf(&devpts_mntopts, "%s,max=%zu,context=\"%s\"",
++				default_devpts_mntopts, conf->pty_max, conf->rootfs.lsm_se_mount_context) < 0) {
++				return -1;
++			}
+ 		} else {
++			if (asprintf(&devpts_mntopts, "%s,max=%zu", default_devpts_mntopts, conf->pty_max) < 0) {
++				return -1;
++			}
++		}
+ #else
+ 		ret = strnprintf(devpts_mntopts, sizeof(devpts_mntopts), "%s,max=%zu",
+ 				default_devpts_mntopts, pty_max);
+-#endif
+-#ifdef HAVE_ISULAD
+-		}
+-#endif
+ 		if (ret < 0)
+ 			return -1;
++#endif
+ 
+ 		/* Create mountpoint for devpts instance. */
+ 		ret = mkdirat(rootfs->dfd_dev, "pts", 0755);
+@@ -2079,7 +2087,7 @@ static int bind_mount_console(int fd_devpts, struct lxc_rootfs *rootfs,
+ 	__do_free char *mnt_opts = NULL;
+ 
+ 	if (rootfs->lsm_se_mount_context != NULL) {
+-		if (asprintf(mnt_opts, "context=\"%s\"", rootfs->lsm_se_mount_context) < 0) {
++		if (asprintf(&mnt_opts, "context=\"%s\"", rootfs->lsm_se_mount_context) < 0) {
+ 			return syserror("Out of memory");
+ 		}
+ 	}
+@@ -2181,7 +2189,7 @@ static int lxc_setup_ttydir_console(int fd_devpts, struct lxc_rootfs *rootfs,
+ 	__do_free char *mnt_opts = NULL;
+ 
+ 	if (rootfs->lsm_se_mount_context != NULL) {
+-		if (asprintf(mnt_opts, "context=\"%s\"", rootfs->lsm_se_mount_context) < 0) {
++		if (asprintf(&mnt_opts, "context=\"%s\"", rootfs->lsm_se_mount_context) < 0) {
+ 			return syserror("Out of memory");
+ 		}
+ 	}
+@@ -2968,8 +2976,13 @@ static int mount_entry_on_relative_rootfs(struct mntent *mntent,
+ 	return mount_entry_on_generic(mntent, rootfs->buf, rootfs, lxc_name, lxc_path);
+ }
+ 
++#ifdef HAVE_ISULAD
++static int mount_file_entries(const struct lxc_conf *conf, struct lxc_rootfs *rootfs, FILE *file,
++			      const char *lxc_name, const char *lxc_path)
++#else
+ static int mount_file_entries(struct lxc_rootfs *rootfs, FILE *file,
+ 			      const char *lxc_name, const char *lxc_path)
++#endif
+ {
+ 	char buf[PATH_MAX];
+ 	struct mntent mntent;
+@@ -3030,8 +3043,13 @@ static inline void __auto_endmntent__(FILE **f)
+ 
+ #define __do_endmntent __attribute__((__cleanup__(__auto_endmntent__)))
+ 
++#ifdef HAVE_ISULAD
++static int setup_mount_fstab(const struct lxc_conf *conf, struct lxc_rootfs *rootfs, const char *fstab,
++			     const char *lxc_name, const char *lxc_path)
++#else
+ static int setup_mount_fstab(struct lxc_rootfs *rootfs, const char *fstab,
+ 			     const char *lxc_name, const char *lxc_path)
++#endif
+ {
+ 	__do_endmntent FILE *f = NULL;
+ 	int ret;
+@@ -3043,7 +3061,11 @@ static int setup_mount_fstab(struct lxc_rootfs *rootfs, const char *fstab,
+ 	if (!f)
+ 		return log_error_errno(-1, errno, "Failed to open \"%s\"", fstab);
+ 
++#ifdef HAVE_ISULAD
++	ret = mount_file_entries(conf, rootfs, f, lxc_name, lxc_path);
++#else
+ 	ret = mount_file_entries(rootfs, f, lxc_name, lxc_path);
++#endif
+ 	if (ret < 0)
+ 		ERROR("Failed to set up mount entries");
+ 
+@@ -3126,8 +3148,11 @@ static int setup_mount_entries(const struct lxc_conf *conf,
+ 	f = make_anonymous_mount_file(&conf->mount_entries, conf->lsm_aa_allow_nesting);
+ 	if (!f)
+ 		return -1;
+-
++#ifdef HAVE_ISULAD
++	return mount_file_entries(conf, rootfs, f, lxc_name, lxc_path);
++#else
+ 	return mount_file_entries(rootfs, f, lxc_name, lxc_path);
++#endif
+ }
+ 
+ static int __lxc_idmapped_mounts_child(struct lxc_handler *handler, FILE *f)
+@@ -3540,7 +3565,11 @@ static int parse_resource(const char *res)
+ 	return resid;
+ }
+ 
++#ifdef HAVE_ISULAD
++int setup_resource_limits(struct lxc_conf *conf, pid_t pid, int errfd)
++#else
+ int setup_resource_limits(struct lxc_conf *conf, pid_t pid)
++#endif
+ {
+ 	int resid;
+ 	struct lxc_limit *lim;
+@@ -3554,8 +3583,17 @@ int setup_resource_limits(struct lxc_conf *conf, pid_t pid)
+ 			return log_error(-1, "Unknown resource %s", lim->resource);
+ 
+ #if HAVE_PRLIMIT || HAVE_PRLIMIT64
++#ifdef HAVE_ISULAD
++		if (prlimit(pid, resid, &lim->limit, NULL) != 0) {
++			lxc_write_error_message(errfd, "%s:%d: Failed to set limit %s %lu %lu: %s.",
++					__FILE__, __LINE__, lim->resource,
++					lim->limit.rlim_cur, lim->limit.rlim_max, strerror(errno));
++			return log_error_errno(-1, errno, "Failed to set limit %s", lim->resource);
++		}
++#else
+ 		if (prlimit(pid, resid, &lim->limit, NULL) != 0)
+ 			return log_error_errno(-1, errno, "Failed to set limit %s", lim->resource);
++#endif
+ 
+ 		TRACE("Setup \"%s\" limit", lim->resource);
+ #else
+@@ -4099,8 +4137,11 @@ domount:
+ 			ret = strnprintf(rootfs->buf, sizeof(rootfs->buf), "%s/proc", rootfs->path ? rootfs->mount : "");
+ 			if (ret < 0)
+ 				return ret_errno(EIO);
+-
++#ifdef HAVE_ISULAD
++			ret = safe_mount("proc", rootfs->buf, "proc", 0, NULL, rootfs->mount, NULL);
++#else
+ 			ret = safe_mount("proc", rootfs->buf, "proc", 0, NULL, rootfs->mount);
++#endif
+ 		}
+ 	}
+ 	if (ret < 0)
+@@ -4675,7 +4716,12 @@ int lxc_setup(struct lxc_handler *handler)
+ 	}
+ 
+ 	if (lxc_conf->autodev > 0) {
++#ifdef HAVE_ISULAD
++		ret = mount_autodev(name, &lxc_conf->rootfs, lxc_conf->autodevtmpfssize, lxcpath,
++							lxc_conf->systemd, lxc_conf->rootfs.lsm_se_mount_context);
++#else
+ 		ret = mount_autodev(name, &lxc_conf->rootfs, lxc_conf->autodevtmpfssize, lxcpath);
++#endif
+ 		if (ret < 0)
+ 			return log_error(-1, "Failed to mount \"/dev\"");
+ 	}
+@@ -4697,7 +4743,11 @@ int lxc_setup(struct lxc_handler *handler)
+ 		return log_error(-1, "Failed to setup remaining automatic mounts");
+ #endif
+ 
++#ifdef HAVE_ISULAD
++	ret = setup_mount_fstab(lxc_conf, &lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath);
++#else
+ 	ret = setup_mount_fstab(&lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath);
++#endif
+ 	if (ret < 0)
+ 		return log_error(-1, "Failed to setup mounts");
+ 
+@@ -4750,6 +4800,15 @@ int lxc_setup(struct lxc_handler *handler)
+ 			return log_error(-1, "Failed to populate \"/dev\"");
+ 	}
+ 
++#ifdef HAVE_ISULAD
++	/* isulad: setup devices which will be populated in the container. */
++	if (!lxc_list_empty(&lxc_conf->populate_devs) && setup_dev) {
++		if (setup_populate_devs(&lxc_conf->rootfs, &lxc_conf->populate_devs, lxc_conf->rootfs.lsm_se_mount_context) != 0) {
++			return log_error(-1, "Failed to setup devices in the container");
++		}
++	}
++#endif
++
+ 	/* Make sure any start hooks are in the container */
+ 	if (!verify_start_hooks(lxc_conf))
+ 		return log_error(-1, "Failed to verify start hooks");
+@@ -4796,7 +4855,7 @@ int lxc_setup(struct lxc_handler *handler)
+ 
+ #ifdef HAVE_ISULAD
+ 	/* Ask father to run oci prestart hooks and wait for him to finish. */
+-	if (lxc_sync_wait_parent(handler, LXC_SYNC_OCI_PRESTART_HOOK)) {
++	if (lxc_sync_barrier_parent(handler, START_SYNC_OCI_PRESTART_HOOK)) {
+ 		return log_error(-1, "Failed to sync parent to start host hook");
+ 	}
+ #endif
+@@ -4845,10 +4904,10 @@ int lxc_setup(struct lxc_handler *handler)
+ 		}
+ 	}
+ 
+-	//isulad: system container, remount /proc/sys/xxx by mount_list
++	//isulad: system container, remount /proc/sys/xxx by mount_entries
+ 	if (lxc_conf->systemd != NULL && strcmp(lxc_conf->systemd, "true") == 0) {
+-		if (!lxc_list_empty(&lxc_conf->mount_list)) {
+-			if (remount_proc_sys_mount_entries(&lxc_conf->mount_list,
++		if (!list_empty(&lxc_conf->mount_entries)) {
++			if (remount_proc_sys_mount_entries(&lxc_conf->mount_entries,
+ 							   lxc_conf->lsm_aa_allow_nesting)) {
+ 				return log_error(-1, "failed to remount /proc/sys");
+ 			}
+@@ -5250,7 +5309,7 @@ void lxc_conf_free(struct lxc_conf *conf)
+ 	if (conf->ocihooks) {
+ 		free_oci_runtime_spec_hooks(conf->ocihooks);
+ 	}
+-	free(conf->lsm_se_mount_context);
++	free(conf->rootfs.lsm_se_mount_context);
+ 	free(conf->lsm_se_keyring_context);
+ #endif
+ 
+@@ -6184,19 +6243,22 @@ int lxc_drop_caps(struct lxc_conf *conf)
+ #define __DEF_CAP_TO_MASK(x) (1U << ((x) & 31))
+ #if HAVE_LIBCAP
+ 	int ret = 0;
+-	struct lxc_list *iterator = NULL;
+-	char *keep_entry = NULL;
++	int nret = 0;
+ 	size_t i = 0;
+-	int capid;
+-	size_t numcaps = (size_t)lxc_caps_last_cap() + 1;
+-	struct lxc_list *caps = NULL;
++	__u32 capid;
++	__u32 last_cap;
++	size_t numcaps;
++	struct cap_entry *cap_entry;
+ 	int *caplist = NULL;
+ 
+-	if (lxc_list_empty(&conf->keepcaps))
++	if (!conf->caps.keep)
+ 		return 0;
+ 
+-	caps = &conf->keepcaps;
++	ret = lxc_caps_last_cap(&last_cap);
++	if (ret)
++		return -1;
+ 
++	numcaps = (size_t)last_cap + 1;
+ 	if (numcaps <= 0 || numcaps > 200)
+ 		return -1;
+ 
+@@ -6208,11 +6270,9 @@ int lxc_drop_caps(struct lxc_conf *conf)
+ 	}
+ 	(void)memset(caplist, 0, numcaps * sizeof(int));
+ 
+-	lxc_list_for_each(iterator, caps) {
+-
+-		keep_entry = iterator->elem;
++	list_for_each_entry(cap_entry, &conf->caps.list, head) {
+ 		/* isulad: Do not keep any cap*/
+-		if (strcmp(keep_entry, "ISULAD_KEEP_NONE") == 0) {
++		if (strcmp(cap_entry->cap_name, "ISULAD_KEEP_NONE") == 0) {
+ 			DEBUG("Do not keep any capability");
+ 			for(i = 0; i < numcaps; i++) {
+ 				caplist[i] = 0;
+@@ -6220,18 +6280,17 @@ int lxc_drop_caps(struct lxc_conf *conf)
+ 			break;
+ 		}
+ 
+-		capid = parse_cap(keep_entry);
+-
+-		if (capid == -2)
++		nret = parse_cap(cap_entry->cap_name, &capid);
++		if (nret == -2)
+ 			continue;
+ 
+-		if (capid < 0) {
+-			ERROR("unknown capability %s", keep_entry);
++		if (nret < 0) {
++			ERROR("unknown capability %s", cap_entry->cap_name);
+ 			ret = -1;
+ 			goto out;
+ 		}
+ 
+-		DEBUG("keep capability '%s' (%d)", keep_entry, capid);
++		DEBUG("keep capability '%s' (%d)", cap_entry->cap_name, capid);
+ 
+ 		caplist[capid] = 1;
+ 	}
+@@ -6299,7 +6358,7 @@ static bool have_dev_bind_mount_entry(FILE *file)
+ }
+ 
+ // returns true if /dev needs to be set up.
+-static bool need_setup_dev(const struct lxc_conf *conf, struct lxc_list *mount)
++static bool need_setup_dev(const struct lxc_conf *conf, struct list_head *mount)
+ {
+ 	__do_fclose FILE *f = NULL;
+ 
+@@ -6344,7 +6403,7 @@ static bool have_proc_bind_mount_entry(FILE *file)
+ }
+ 
+ // returns true if /proc needs to be set up.
+-static bool need_setup_proc(const struct lxc_conf *conf, struct lxc_list *mount)
++static bool need_setup_proc(const struct lxc_conf *conf, struct list_head *mount)
+ {
+ 	__do_fclose FILE *f = NULL;
+ 
+@@ -6378,7 +6437,7 @@ static int mount_entry_with_loop_dev(const char *src, const char *dest, const ch
+ 		if (srcfd < 0)
+ 			return srcfd;
+ 		ret = snprintf(srcbuf, sizeof(srcbuf), "/proc/self/fd/%d", srcfd);
+-		if (ret < 0 || ret > sizeof(srcbuf)) {
++		if (ret < 0 || (size_t)ret > sizeof(srcbuf)) {
+ 			close(srcfd);
+ 			ERROR("Failed to print string");
+ 			return -EINVAL;
+@@ -6397,7 +6456,7 @@ static int mount_entry_with_loop_dev(const char *src, const char *dest, const ch
+ 	}
+ 
+ 	ret = snprintf(destbuf, sizeof(destbuf), "/proc/self/fd/%d", destfd);
+-	if (ret < 0 || ret > sizeof(destbuf)) {
++	if (ret < 0 || (size_t)ret > sizeof(destbuf)) {
+ 		if (srcfd != -1)
+ 			close(srcfd);
+ 		close(destfd);
+@@ -6584,13 +6643,13 @@ on_error:
+ 	return false;
+ }
+ 
+-static int remount_proc_sys_mount_entries(struct lxc_list *mount_list, bool lsm_aa_allow_nesting)
++static int remount_proc_sys_mount_entries(struct list_head *mount_entries, bool lsm_aa_allow_nesting)
+ {
+ 	char buf[4096];
+ 	FILE *file;
+ 	struct mntent mntent;
+ 
+-	file = make_anonymous_mount_file(mount_list, lsm_aa_allow_nesting);
++	file = make_anonymous_mount_file(mount_entries, lsm_aa_allow_nesting);
+ 	if (!file)
+ 		return -1;
+ 
+@@ -6824,21 +6883,57 @@ reset_umask:
+ 	return ret;
+ }
+ 
++static void parse_propagationopt(char *opt, unsigned long *flags)
++{
++	struct mount_opt *mo;
++
++	/* If opt is found in propagation_opt, set or clear flags. */
++	for (mo = &propagation_opt[0]; mo->name != NULL; mo++) {
++		if (strncmp(opt, mo->name, strlen(mo->name)) != 0)
++			continue;
++
++		if (mo->clear)
++			*flags &= ~mo->flag;
++		else
++			*flags |= mo->flag;
++
++		return;
++	}
++}
++
++int parse_propagationopts(const char *mntopts, unsigned long *pflags)
++{
++	__do_free char *s = NULL;
++	char *p;
++
++	if (!mntopts)
++		return 0;
++
++	s = strdup(mntopts);
++	if (!s)
++		return log_error_errno(-ENOMEM, errno, "Failed to allocate memory");
++
++	*pflags = 0L;
++	lxc_iterate_parts(p, s, ",")
++		parse_propagationopt(p, pflags);
++
++	return 0;
++}
++
+ // isulad: setup rootfs mountopts
+ static int setup_rootfs_mountopts(const struct lxc_rootfs *rootfs)
+ {
+ 	unsigned long mflags, mntflags, pflags;
+ 	__do_free char *mntdata = NULL;
+ 
+-	if(!rootfs || !rootfs->options)
++	if(!rootfs || !rootfs->mnt_opts.raw_options)
+ 		return 0;
+ 
+-	if (parse_mntopts_legacy(rootfs->options, &mntflags, &mntdata) < 0) {
++	if (parse_mntopts_legacy(rootfs->mnt_opts.raw_options, &mntflags, &mntdata) < 0) {
+ 		return -1;
+ 	}
+ 
+-	ret = parse_propagationopts(rootfs->options, &pflags);
+-	if (ret < 0) {
++	if (parse_propagationopts(rootfs->mnt_opts.raw_options, &pflags) < 0) {
+ 		return -EINVAL;
+ 	}
+ 
+@@ -6853,7 +6948,7 @@ static int setup_rootfs_mountopts(const struct lxc_rootfs *rootfs)
+ 	return 0;
+ }
+ 
+-static int create_mtab_link()
++static int create_mtab_link(void)
+ {
+ 	ssize_t ret;
+ 	int mret;
+@@ -6935,7 +7030,7 @@ static char* generate_json_str(const char *name, const char *lxcpath, const char
+ 	rc = snprintf(inmsg, size,
+ 	              "{\"ociVersion\":\"\",\"id\":\"%s\",\"pid\":%s,\"root\":\"%s\",\"bundle\":\"%s/%s\"}",
+ 	              name, cpid, rootfs, lxcpath, name);
+-	if (rc < 0 || rc >= size) {
++	if (rc < 0 || (size_t)rc >= size) {
+ 		ERROR("Create json string failed");
+ 		ret = -1;
+ 	}
+@@ -7090,8 +7185,8 @@ static struct lxc_popen_FILE *lxc_popen_ocihook(const char *commandpath, char **
+ 	close(pipe_msg[0]);
+ 	pipe_msg[0]= -1;
+ 	if (instr) {
+-		size_t len = strlen(instr);
+-		if (lxc_write_nointr(pipe_msg[1], instr, len) != len) {
++		int len = lxc_write_nointr(pipe_msg[1], instr, strlen(instr));
++		if (len < 0 || (size_t)len != strlen(instr)) {
+ 			WARN("Write instr: %s failed", instr);
+ 		}
+ 	}
+@@ -7413,7 +7508,7 @@ int run_oci_hooks(const char *name, const char *hookname, struct lxc_conf *conf,
+ /*isulad clear init args*/
+ int lxc_clear_init_args(struct lxc_conf *lxc_conf)
+ {
+-	int i;
++	size_t i;
+ 
+ 	for (i = 0; i < lxc_conf->init_argc; i++) {
+ 		free(lxc_conf->init_argv[i]);
+diff --git a/src/lxc/conf.h b/src/lxc/conf.h
+index 108e05b..ef4bb05 100644
+--- a/src/lxc/conf.h
++++ b/src/lxc/conf.h
+@@ -677,7 +677,11 @@ __hidden extern int lxc_setup_rootfs_prepare_root(struct lxc_conf *conf, const c
+ 						  const char *lxcpath);
+ __hidden extern int lxc_setup(struct lxc_handler *handler);
+ __hidden extern int lxc_setup_parent(struct lxc_handler *handler);
++#ifdef HAVE_ISULAD
++__hidden extern int setup_resource_limits(struct lxc_conf *conf, pid_t pid, int errfd);
++#else
+ __hidden extern int setup_resource_limits(struct lxc_conf *conf, pid_t pid);
++#endif
+ __hidden extern int find_unmapped_nsid(const struct lxc_conf *conf, enum idtype idtype);
+ __hidden extern int mapped_hostid(unsigned id, const struct lxc_conf *conf, enum idtype idtype);
+ __hidden extern int userns_exec_1(const struct lxc_conf *conf, int (*fn)(void *), void *data,
+diff --git a/src/lxc/confile.c b/src/lxc/confile.c
+index 1492776..0d0d66c 100644
+--- a/src/lxc/confile.c
++++ b/src/lxc/confile.c
+@@ -287,16 +287,16 @@ static struct lxc_config_t config_jump_table[] = {
+ 	{ "lxc.sysctl",                     false, set_config_sysctl,                     get_config_sysctl,                     clr_config_sysctl,                     },
+ 	{ "lxc.proc",                       false, set_config_proc,                       get_config_proc,                       clr_config_proc,                       },
+ #ifdef HAVE_ISULAD
+-	{ "lxc.isulad.init.args",          set_config_init_args,                   get_config_init_args,                   clr_config_init_args,                 },
+-	{ "lxc.isulad.populate.device",    set_config_populate_device,             get_config_populate_device,             clr_config_populate_device,           },
+-	{ "lxc.isulad.umask",              set_config_umask,                       get_config_umask,                       clr_config_umask,                     },
+-	{ "lxc.isulad.rootfs.maskedpaths", set_config_rootfs_masked_paths,         get_config_rootfs_masked_paths,         clr_config_rootfs_masked_paths,       },
+-	{ "lxc.isulad.rootfs.ropaths",     set_config_rootfs_ro_paths,             get_config_rootfs_ro_paths,             clr_config_rootfs_ro_paths,           },
+-	{ "lxc.isulad.systemd",            set_config_systemd,                     get_config_systemd,                     clr_config_systemd,                   },
+-	{ "lxc.console.logdriver",         set_config_console_log_driver,          get_config_console_log_driver,          clr_config_console_log_driver,        },
+-	{ "lxc.console.syslog_tag",        set_config_console_syslog_tag,          get_config_console_syslog_tag,          clr_config_console_syslog_tag,        },
+-	{ "lxc.console.syslog_facility",   set_config_console_syslog_facility,     get_config_console_syslog_facility,     clr_config_console_syslog_facility,   },
+-	{ "lxc.selinux.mount_context",     set_config_selinux_mount_context,       get_config_selinux_mount_context,       clr_config_selinux_mount_context,     },
++	{ "lxc.isulad.init.args",          	true,	set_config_init_args,                   get_config_init_args,                   clr_config_init_args,                 },
++	{ "lxc.isulad.populate.device",    	true,	set_config_populate_device,             get_config_populate_device,             clr_config_populate_device,           },
++	{ "lxc.isulad.umask",              	true,	set_config_umask,                       get_config_umask,                       clr_config_umask,                     },
++	{ "lxc.isulad.rootfs.maskedpaths", 	true,	set_config_rootfs_masked_paths,         get_config_rootfs_masked_paths,         clr_config_rootfs_masked_paths,       },
++	{ "lxc.isulad.rootfs.ropaths",     	true,	set_config_rootfs_ro_paths,             get_config_rootfs_ro_paths,             clr_config_rootfs_ro_paths,           },
++	{ "lxc.isulad.systemd",            	true,	set_config_systemd,                     get_config_systemd,                     clr_config_systemd,                   },
++	{ "lxc.console.logdriver",         	true,	set_config_console_log_driver,          get_config_console_log_driver,          clr_config_console_log_driver,        },
++	{ "lxc.console.syslog_tag",        	true,	set_config_console_syslog_tag,          get_config_console_syslog_tag,          clr_config_console_syslog_tag,        },
++	{ "lxc.console.syslog_facility",   	true,	set_config_console_syslog_facility,     get_config_console_syslog_facility,     clr_config_console_syslog_facility,   },
++	{ "lxc.selinux.mount_context",     	true,	set_config_selinux_mount_context,       get_config_selinux_mount_context,       clr_config_selinux_mount_context,     },
+ #endif
+ };
+ 
+@@ -3206,7 +3206,7 @@ static int parse_line(char *buffer, void *data)
+ 	if (value_decode == NULL) {
+ 		ERROR("Value %s decode failed", value);
+ 	}
+-	ret = config->set(key, value_decode ? value_decode: value, plc->conf, NULL);
++	return config->set(key, value_decode ? value_decode: value, plc->conf, NULL);
+ #else
+ 	return config->set(key, value, plc->conf, NULL);
+ #endif
+@@ -6895,7 +6895,8 @@ static int set_config_init_args(const char *key, const char *value,
+ static int get_config_init_args(const char *key, char *retv, int inlen,
+ 				  struct lxc_conf *c, void *data)
+ {
+-	int i, len, fulllen = 0;
++	size_t i;
++	int len, fulllen = 0;
+ 
+ 	if (!retv)
+ 		inlen = 0;
+@@ -7261,10 +7262,10 @@ static int set_config_selinux_mount_context(const char *key, const char *value,
+     struct lxc_conf *lxc_conf, void *data)
+ {
+ 	if (value != NULL && strcmp(value, "unconfined_t") == 0) {
+-		return set_config_string_item(&lxc_conf->lsm_se_mount_context, NULL);
++		return set_config_string_item(&lxc_conf->rootfs.lsm_se_mount_context, NULL);
+ 	}
+ 
+-	return set_config_string_item(&lxc_conf->lsm_se_mount_context, value);
++	return set_config_string_item(&lxc_conf->rootfs.lsm_se_mount_context, value);
+ }
+ 
+ static int get_config_console_log_driver(const char *key, char *retv, int inlen,
+@@ -7288,7 +7289,7 @@ static int get_config_console_syslog_facility(const char *key, char *retv, int i
+ static int get_config_selinux_mount_context(const char *key, char *retv, int inlen,
+     struct lxc_conf *c, void *data)
+ {
+-	return lxc_get_conf_str(retv, inlen, c->lsm_se_mount_context);
++	return lxc_get_conf_str(retv, inlen, c->rootfs.lsm_se_mount_context);
+ }
+ 
+ static inline int clr_config_console_log_driver(const char *key,
+@@ -7317,8 +7318,8 @@ static inline int clr_config_console_syslog_facility(const char *key,
+ static inline int clr_config_selinux_mount_context(const char *key,
+     struct lxc_conf *c, void *data)
+ {
+-	free(c->lsm_se_mount_context);
+-	c->lsm_se_mount_context = NULL;
++	free(c->rootfs.lsm_se_mount_context);
++	c->rootfs.lsm_se_mount_context = NULL;
+ 	return 0;
+ }
+ #endif
+diff --git a/src/lxc/exec_commands.c b/src/lxc/exec_commands.c
+index bd81d66..5612109 100644
+--- a/src/lxc/exec_commands.c
++++ b/src/lxc/exec_commands.c
+@@ -37,6 +37,7 @@
+ 
+ #include "af_unix.h"
+ #include "cgroup.h"
++#include "string_utils.h"
+ #include "exec_commands.h"
+ #include "commands_utils.h"
+ #include "conf.h"
+@@ -47,8 +48,6 @@
+ #include "lxclock.h"
+ #include "mainloop.h"
+ #include "monitor.h"
+-#include "string_utils.h"
+-#include "terminal.h"
+ #include "utils.h"
+ 
+ lxc_log_define(commands_exec, lxc);
+@@ -70,12 +69,7 @@ static int lxc_exec_cmd_rsp_recv(int sock, struct lxc_exec_cmd_rr *cmd)
+ 	int ret, rspfd;
+ 	struct lxc_exec_cmd_rsp *rsp = &cmd->rsp;
+ 
+-	/*isulad: add timeout 1s to avoid long block due to [lxc monitor] error*/
+-	if (lxc_socket_set_timeout(sock, 1, 1) != 0) {
+-		return syserror_ret(-1, "Failed to set timeout");
+-	}
+-
+-	ret = lxc_cmd_rsp_recv_fds(sock, &rspfd, 1, rsp, sizeof(*rsp));
++	ret = lxc_abstract_unix_recv_one_fd_timeout(sock, &rspfd, rsp, sizeof(*rsp), 1000 * 1000);
+ 	if (ret < 0) {
+ 		SYSERROR("Failed to receive response for command \"%s\"",
+ 		        lxc_exec_cmd_str(cmd->req.cmd));
+@@ -256,7 +250,7 @@ static int lxc_exec_cmd_process(int fd, struct lxc_exec_cmd_req *req,
+ 	return cb[req->cmd](fd, req, handler);
+ }
+ 
+-static void lxc_exec_cmd_fd_cleanup(int fd, struct lxc_epoll_descr *descr)
++static void lxc_exec_cmd_fd_cleanup(int fd, struct lxc_async_descr *descr)
+ {
+ 	lxc_mainloop_del_handler(descr, fd);
+ 	close(fd);
+@@ -264,7 +258,7 @@ static void lxc_exec_cmd_fd_cleanup(int fd, struct lxc_epoll_descr *descr)
+ }
+ 
+ static int lxc_exec_cmd_handler(int fd, uint32_t events, void *data,
+-			   struct lxc_epoll_descr *descr)
++			   struct lxc_async_descr *descr)
+ {
+ 	int ret;
+ 	struct lxc_exec_cmd_req req;
+@@ -341,7 +335,7 @@ out_close:
+ }
+ 
+ static int lxc_exec_cmd_accept(int fd, uint32_t events, void *data,
+-			  struct lxc_epoll_descr *descr)
++			  struct lxc_async_descr *descr)
+ {
+ 	int connection = -1;
+ 	int opt = 1, ret = -1;
+@@ -364,7 +358,8 @@ static int lxc_exec_cmd_accept(int fd, uint32_t events, void *data,
+ 		goto out_close;
+ 	}
+ 
+-	ret = lxc_mainloop_add_handler(descr, connection, lxc_exec_cmd_handler, data);
++	ret = lxc_mainloop_add_handler(descr, connection, lxc_exec_cmd_handler, default_cleanup_handler, data,
++								   "exec_cmd_handler");
+ 	if (ret) {
+ 		ERROR("Failed to add command handler");
+ 		goto out_close;
+@@ -462,12 +457,12 @@ int lxc_exec_cmd_init(const char *name, const char *lxcpath, const char *suffix)
+ }
+ #endif
+ 
+-int lxc_exec_cmd_mainloop_add(struct lxc_epoll_descr *descr, struct lxc_exec_command_handler *handler)
++int lxc_exec_cmd_mainloop_add(struct lxc_async_descr *descr, struct lxc_exec_command_handler *handler)
+ {
+ 	int ret;
+ 	int fd = handler->maincmd_fd;
+ 
+-	ret = lxc_mainloop_add_handler(descr, fd, lxc_exec_cmd_accept, handler);
++	ret = lxc_mainloop_add_handler(descr, fd, lxc_exec_cmd_accept, default_cleanup_handler, handler, "exec_cmd_accept");
+ 	if (ret < 0) {
+ 		ERROR("Failed to add handler for command socket");
+ 		close(fd);
+diff --git a/src/lxc/exec_commands.h b/src/lxc/exec_commands.h
+index 3ec2a22..ca3a4d6 100644
+--- a/src/lxc/exec_commands.h
++++ b/src/lxc/exec_commands.h
+@@ -63,11 +63,11 @@ struct lxc_exec_cmd_set_terminal_winch_request {
+ 	unsigned int width;
+ };
+ 
+-struct lxc_epoll_descr;
++struct lxc_async_descr;
+ struct lxc_handler;
+ 
+ extern int lxc_exec_cmd_init(const char *name, const char *lxcpath, const char *suffix);
+-extern int lxc_exec_cmd_mainloop_add(struct lxc_epoll_descr *descr, struct lxc_exec_command_handler *handler);
++extern int lxc_exec_cmd_mainloop_add(struct lxc_async_descr *descr, struct lxc_exec_command_handler *handler);
+ extern int lxc_exec_cmd_set_terminal_winch(const char *name, const char *lxcpath, const char *suffix, unsigned int height, unsigned int width);
+ 
+ #ifdef HAVE_ISULAD
+diff --git a/src/lxc/execute.c b/src/lxc/execute.c
+index 6a7ae39..2960664 100644
+--- a/src/lxc/execute.c
++++ b/src/lxc/execute.c
+@@ -18,7 +18,11 @@
+ 
+ lxc_log_define(execute, start);
+ 
++#ifdef HAVE_ISULAD
++static int execute_start(struct lxc_handler *handler, void* data, int fd)
++#else
+ static int execute_start(struct lxc_handler *handler, void* data)
++#endif
+ {
+ 	int argc = 0;
+ 	struct execute_args *my_args = data;
+@@ -40,14 +44,25 @@ static struct lxc_operations execute_start_ops = {
+ 	.post_start = execute_post_start
+ };
+ 
++#ifdef HAVE_ISULAD
++int lxc_execute(const char *name, char *const argv[], int quiet,
++		struct lxc_handler *handler, const char *lxcpath,
++		bool daemonize, int *error_num, unsigned int start_timeout)
++#else
+ int lxc_execute(const char *name, char *const argv[], int quiet,
+ 		struct lxc_handler *handler, const char *lxcpath,
+ 		bool daemonize, int *error_num)
++#endif
+ {
+ 	struct execute_args args = {.argv = argv, .quiet = quiet};
+ 
+ 	TRACE("Doing lxc_execute");
+ 	handler->conf->is_execute = true;
++#ifdef HAVE_ISULAD
++	return __lxc_start(handler, &execute_start_ops, &args, lxcpath,
++			   daemonize, error_num, start_timeout);
++#else
+ 	return __lxc_start(handler, &execute_start_ops, &args, lxcpath,
+ 			   daemonize, error_num);
++#endif
+ }
+diff --git a/src/lxc/isulad_utils.c b/src/lxc/isulad_utils.c
+index 889d912..38dbe2a 100644
+--- a/src/lxc/isulad_utils.c
++++ b/src/lxc/isulad_utils.c
+@@ -233,7 +233,7 @@ unsigned long long lxc_get_process_startat(pid_t pid)
+     char sbuf[1024] = {0}; /* bufs for stat */
+ 
+     sret = snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
+-    if (sret < 0 || sret >= sizeof(filename)) {
++    if (sret < 0 || (size_t)sret >= sizeof(filename)) {
+         ERROR("Failed to sprintf filename");
+         goto out;
+     }
+@@ -317,7 +317,7 @@ bool lxc_process_alive(pid_t pid, unsigned long long start_time)
+ 		return false;
+ 
+ 	sret = snprintf(filename, sizeof(filename), "/proc/%d/stat", pid);
+-	if (sret < 0 || sret >= sizeof(filename)) {
++	if (sret < 0 || (size_t)sret >= sizeof(filename)) {
+ 		ERROR("Failed to sprintf filename");
+ 		goto out;
+ 	}
+@@ -537,7 +537,7 @@ out:
+ ssize_t lxc_write_nointr_for_fifo(int fd, const char *buf, size_t count)
+ {
+     ssize_t nret = 0;
+-    ssize_t nwritten;
++    size_t nwritten;
+ 
+     if (buf == NULL) {
+         return -1;
+diff --git a/src/lxc/isulad_utils.h b/src/lxc/isulad_utils.h
+index 93174ae..3dfa9f7 100644
+--- a/src/lxc/isulad_utils.h
++++ b/src/lxc/isulad_utils.h
+@@ -5,13 +5,15 @@
+  * Author: lifeng
+  * Create: 2020-04-11
+ ******************************************************************************/
+-#ifndef __iSULAD_UTILS_H
+-#define __iSULAD_UTILS_H
++#ifndef __ISULAD_UTILS_H
++#define __ISULAD_UTILS_H
+ 
+ #include <stdio.h>
+ #include <stdbool.h>
+ #include <pwd.h>
+ 
++#include "compiler.h"
++
+ /* isulad: replace space with SPACE_MAGIC_STR */
+ #define SPACE_MAGIC_STR "[#)"
+ 
+@@ -97,7 +99,7 @@ __hidden extern bool lxc_process_alive(pid_t pid, unsigned long long start_time)
+ 
+ __hidden extern bool is_non_negative_num(const char *s);
+ 
+-__hidden int util_getpwent_r(FILE *stream, struct passwd *resbuf, char *buffer, size_t buflen, struct passwd **result);
++__hidden extern int util_getpwent_r(FILE *stream, struct passwd *resbuf, char *buffer, size_t buflen, struct passwd **result);
+ 
+ __hidden extern ssize_t lxc_write_nointr_for_fifo(int fd, const char *buf, size_t count);
+ 
+diff --git a/src/lxc/lsm/lsm.c b/src/lxc/lsm/lsm.c
+index d9380c4..db4bb0c 100644
+--- a/src/lxc/lsm/lsm.c
++++ b/src/lxc/lsm/lsm.c
+@@ -19,6 +19,10 @@ __hidden extern struct lsm_ops *lsm_apparmor_ops_init(void);
+ __hidden extern struct lsm_ops *lsm_selinux_ops_init(void);
+ __hidden extern struct lsm_ops *lsm_nop_ops_init(void);
+ 
++#ifdef HAVE_ISULAD
++static struct lsm_ops *ops_instance = NULL;
++#endif
++
+ struct lsm_ops *lsm_init_static(void)
+ {
+ 	struct lsm_ops *ops = NULL;
+@@ -35,6 +39,30 @@ struct lsm_ops *lsm_init_static(void)
+ 	if (!ops)
+ 		ops = lsm_nop_ops_init();
+ 
++#ifdef HAVE_ISULAD
++	ops_instance = ops;
++#endif
++
+ 	INFO("Initialized LSM security driver %s", ops->name);
+ 	return ops;
+ }
++
++#ifdef HAVE_ISULAD
++int lsm_file_label_set(const char *path, const char *label)
++{
++	if (!ops_instance) {
++		ERROR("LSM driver not inited");
++		return -1;
++	}
++	return ops_instance->file_label_set(path, label);
++}
++
++int lsm_relabel(const char *path, const char *label, bool share)
++{
++	if (!ops_instance) {
++		ERROR("LSM driver not inited");
++		return -1;
++	}
++	return ops_instance->relabel(path, label, share);
++}
++#endif
+diff --git a/src/lxc/lsm/lsm.h b/src/lxc/lsm/lsm.h
+index 93e1a99..571a92d 100644
+--- a/src/lxc/lsm/lsm.h
++++ b/src/lxc/lsm/lsm.h
+@@ -42,4 +42,9 @@ struct lsm_ops {
+ 
+ __hidden extern struct lsm_ops *lsm_init_static(void);
+ 
++#ifdef HAVE_ISULAD
++__hidden extern int lsm_file_label_set(const char *path, const char *label);
++__hidden extern int lsm_relabel(const char *path, const char *label, bool share);
++#endif
++
+ #endif /* __LXC_LSM_H */
+diff --git a/src/lxc/lsm/selinux.c b/src/lxc/lsm/selinux.c
+index 5190110..0bdfcff 100644
+--- a/src/lxc/lsm/selinux.c
++++ b/src/lxc/lsm/selinux.c
+@@ -272,7 +272,7 @@ static int recurse_set_file_label(const char *basePath, const char *label)
+ 			continue;
+ 		} else {
+ 			int nret = snprintf(base, sizeof(base), "%s/%s", basePath, ptr->d_name);
+-			if (nret < 0 || nret >= sizeof(base)) {
++			if (nret < 0 || (size_t)nret >= sizeof(base)) {
+ 				ERROR("Failed to get path");
+ 				return -1;
+ 			}
+diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h
+index 879e899..74c8aa8 100644
+--- a/src/lxc/lxc.h
++++ b/src/lxc/lxc.h
+@@ -39,8 +39,13 @@ struct lxc_handler;
+  * @daemonize    : whether or not the container is daemonized
+  * Returns 0 on success, < 0 otherwise
+  */
++#ifdef HAVE_ISULAD
++__hidden extern int lxc_start(char *const argv[], struct lxc_handler *handler, const char *lxcpath,
++			      bool daemonize, int *error_num, unsigned int start_timeout);
++#else
+ __hidden extern int lxc_start(char *const argv[], struct lxc_handler *handler, const char *lxcpath,
+ 			      bool daemonize, int *error_num);
++#endif
+ 
+ /*
+  * Start the specified command inside an application container
+@@ -51,9 +56,15 @@ __hidden extern int lxc_start(char *const argv[], struct lxc_handler *handler, c
+  * @daemonize    : whether or not the container is daemonized
+  * Returns 0 on success, < 0 otherwise
+  */
++#ifdef HAVE_ISULAD
++__hidden extern int lxc_execute(const char *name, char *const argv[], int quiet,
++				struct lxc_handler *handler, const char *lxcpath, bool daemonize,
++				int *error_num, unsigned int start_timeout);
++#else
+ __hidden extern int lxc_execute(const char *name, char *const argv[], int quiet,
+ 				struct lxc_handler *handler, const char *lxcpath, bool daemonize,
+ 				int *error_num);
++#endif
+ 
+ /*
+  * Close the fd associated with the monitoring
+diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c
+index d4495f7..5720cf7 100644
+--- a/src/lxc/lxccontainer.c
++++ b/src/lxc/lxccontainer.c
+@@ -6100,7 +6100,11 @@ WRAP_API_1(bool, lxcapi_get_container_metrics, struct lxc_container_metrics *)
+ 
+ #endif
+ 
++#ifdef HAVE_ISULAD
++static struct lxc_container *do_lxc_container_new(const char *name, const char *configpath, bool load_config)
++#else
+ struct lxc_container *lxc_container_new(const char *name, const char *configpath)
++#endif
+ {
+ 	struct lxc_container *c;
+ 	size_t len;
+diff --git a/src/lxc/mainloop.c b/src/lxc/mainloop.c
+index 765240e..9522b7d 100644
+--- a/src/lxc/mainloop.c
++++ b/src/lxc/mainloop.c
+@@ -534,7 +534,7 @@ void lxc_mainloop_close(struct lxc_async_descr *descr)
+ }
+ 
+ #ifdef HAVE_ISULAD
+-int isulad_safe_mainloop(struct lxc_epoll_descr *descr, int timeout_ms)
++int isulad_safe_mainloop(struct lxc_async_descr *descr, int timeout_ms)
+ {
+     int ret;
+ 
+diff --git a/src/lxc/mainloop.h b/src/lxc/mainloop.h
+index e8ce082..f485a1f 100644
+--- a/src/lxc/mainloop.h
++++ b/src/lxc/mainloop.h
+@@ -66,7 +66,7 @@ __hidden extern void lxc_mainloop_close(struct lxc_async_descr *descr);
+ define_cleanup_function(struct lxc_async_descr *, lxc_mainloop_close);
+ 
+ #ifdef HAVE_ISULAD
+-__hidden extern int isulad_safe_mainloop(struct lxc_epoll_descr *descr, int timeout_ms);
++__hidden extern int isulad_safe_mainloop(struct lxc_async_descr *descr, int timeout_ms);
+ #endif
+ 
+ #endif
+diff --git a/src/lxc/seccomp.c b/src/lxc/seccomp.c
+index f0fa297..ecba248 100644
+--- a/src/lxc/seccomp.c
++++ b/src/lxc/seccomp.c
+@@ -699,21 +699,21 @@ static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_c
+ 		ctx.architectures[0] = SCMP_ARCH_X86;
+ 		ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_i386,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[0]);
++					      &ctx.architectures[0]);
+ 		if (!ctx.contexts[0])
+ 			goto bad;
+ 
+ 		ctx.architectures[1] = SCMP_ARCH_X32;
+ 		ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_x32,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[1]);
++					      &ctx.architectures[1]);
+ 		if (!ctx.contexts[1])
+ 			goto bad;
+ 
+ 		ctx.architectures[2] = SCMP_ARCH_X86_64;
+ 		ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_amd64,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[2]);
++					      &ctx.architectures[2]);
+ 		if (!ctx.contexts[2])
+ 			goto bad;
+ #ifdef SCMP_ARCH_PPC
+@@ -723,14 +723,14 @@ static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_c
+ 		ctx.architectures[0] = SCMP_ARCH_PPC;
+ 		ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_ppc,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[0]);
++					      &ctx.architectures[0]);
+ 		if (!ctx.contexts[0])
+ 			goto bad;
+ 
+ 		ctx.architectures[2] = SCMP_ARCH_PPC64;
+ 		ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_ppc64,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[2]);
++					      &ctx.architectures[2]);
+ 		if (!ctx.contexts[2])
+ 			goto bad;
+ #endif
+@@ -741,7 +741,7 @@ static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_c
+ 		ctx.architectures[0] = SCMP_ARCH_ARM;
+ 		ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_arm,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[0]);
++					      &ctx.architectures[0]);
+ 		if (!ctx.contexts[0])
+ 			goto bad;
+ 
+@@ -749,7 +749,7 @@ static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_c
+ 		ctx.architectures[2] = SCMP_ARCH_AARCH64;
+ 		ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_arm64,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[2]);
++					      &ctx.architectures[2]);
+ 		if (!ctx.contexts[2])
+ 			goto bad;
+ #endif
+@@ -761,21 +761,21 @@ static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_c
+ 		ctx.architectures[0] = SCMP_ARCH_MIPS;
+ 		ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mips,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[0]);
++					      &ctx.architectures[0]);
+ 		if (!ctx.contexts[0])
+ 			goto bad;
+ 
+ 		ctx.architectures[1] = SCMP_ARCH_MIPS64N32;
+ 		ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mips64n32,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[1]);
++					      &ctx.architectures[1]);
+ 		if (!ctx.contexts[1])
+ 			goto bad;
+ 
+ 		ctx.architectures[2] = SCMP_ARCH_MIPS64;
+ 		ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mips64,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[2]);
++					      &ctx.architectures[2]);
+ 		if (!ctx.contexts[2])
+ 			goto bad;
+ 	} else if (native_arch == lxc_seccomp_arch_mipsel64) {
+@@ -784,21 +784,21 @@ static int parse_config_v2(FILE *f, char *line, size_t *line_bufsz, struct lxc_c
+ 		ctx.architectures[0] = SCMP_ARCH_MIPSEL;
+ 		ctx.contexts[0] = get_new_ctx(lxc_seccomp_arch_mipsel,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[0]);
++					      &ctx.architectures[0]);
+ 		if (!ctx.contexts[0])
+ 			goto bad;
+ 
+ 		ctx.architectures[1] = SCMP_ARCH_MIPSEL64N32;
+ 		ctx.contexts[1] = get_new_ctx(lxc_seccomp_arch_mipsel64n32,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[1]);
++					      &ctx.architectures[1]);
+ 		if (!ctx.contexts[1])
+ 			goto bad;
+ 
+ 		ctx.architectures[2] = SCMP_ARCH_MIPSEL64;
+ 		ctx.contexts[2] = get_new_ctx(lxc_seccomp_arch_mipsel64,
+ 					      default_policy_action,
+-					      &ctx.needs_merge[2]);
++					      &ctx.architectures[2]);
+ 		if (!ctx.contexts[2])
+ 			goto bad;
+ #endif
+diff --git a/src/lxc/start.c b/src/lxc/start.c
+index 70af128..ff9a3fa 100644
+--- a/src/lxc/start.c
++++ b/src/lxc/start.c
+@@ -2067,6 +2067,9 @@ static int lxc_spawn(struct lxc_handler *handler)
+ 	const char *name = handler->name;
+ 	struct lxc_conf *conf = handler->conf;
+ 	struct cgroup_ops *cgroup_ops = handler->cgroup_ops;
++#ifdef HAVE_ISULAD
++	const char *lxcpath = handler->lxcpath;
++#endif
+ 
+ 	id_map = &conf->id_map;
+ 	wants_to_map_ids = !list_empty(id_map);
+@@ -2364,6 +2367,30 @@ static int lxc_spawn(struct lxc_handler *handler)
+ 		goto out_delete_net;
+ 	}
+ 
++#ifdef HAVE_ISULAD
++	if (!lxc_sync_wait_child(handler, START_SYNC_OCI_PRESTART_HOOK))
++		goto out_delete_net;
++
++	/* isulad: Run oci prestart hook at here */
++	ret = run_oci_hooks(name, "oci-prestart", conf, lxcpath);
++	if (ret < 0) {
++		ERROR("Failed to run oci prestart hooks");
++		goto out_delete_net;
++	}
++
++	if (START_TIMEOUT == global_timeout_state) {
++		lxc_write_error_message(conf->errpipe[1], "Starting the container \"%s\" timeout.", name);
++		ERROR("Starting the container \"%s\" timeout.", name);
++		goto out_delete_net;
++	}
++
++       /* Tell the child to continue its initialization. We'll get
++        * START_SYNC_POST_OCI_PRESTART_HOOK when it is ready for us to run oci prestart hooks.
++        */
++       if (lxc_sync_wake_child(handler, START_SYNC_POST_OCI_PRESTART_HOOK))
++               goto out_delete_net;
++#endif
++
+ 	if (!lxc_sync_wait_child(handler, START_SYNC_CGROUP_LIMITS))
+ 		goto out_delete_net;
+ 
+@@ -2394,27 +2421,6 @@ static int lxc_spawn(struct lxc_handler *handler)
+ 		goto out_delete_net;
+ 	}
+ 
+-#ifdef HAVE_ISULAD
+-	/* isulad: Run oci prestart hook at here */
+-	ret = run_oci_hooks(name, "oci-prestart", conf, lxcpath);
+-	if (ret < 0) {
+-		ERROR("Failed to run oci prestart hooks");
+-		goto out_delete_net;
+-	}
+-
+-	if (START_TIMEOUT == global_timeout_state) {
+-		lxc_write_error_message(conf->errpipe[1], "Starting the container \"%s\" timeout.", name);
+-		ERROR("Starting the container \"%s\" timeout.", name);
+-		goto out_delete_net;
+-	}
+-
+-       /* Tell the child to continue its initialization. We'll get
+-        * LXC_SYNC_POST_OCI_PRESTART_HOOK when it is ready for us to run oci prestart hooks.
+-        */
+-       if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_OCI_PRESTART_HOOK))
+-               goto out_delete_net;
+-#endif
+-
+ 	if (!lxc_sync_wake_child(handler, START_SYNC_FDS))
+ 		goto out_delete_net;
+ 
+@@ -2943,7 +2949,7 @@ static int clean_resource_set_env(struct lxc_handler *handler)
+ 	const char *name = handler->name;
+ 	struct lxc_conf *conf = handler->conf;
+ 	char bufstr[PATH_MAX + 1];
+-	int i = 0;
++	size_t i = 0;
+ 	int j = 0;
+ 	int len = 2; //set "LXC_PID" and "LXC_CGNS_AWARE"
+ 
+@@ -3039,7 +3045,6 @@ static struct lxc_handler *lxc_init_clean_handler(char *name, char *lxcpath, str
+ 	handler->data_sock[0] = handler->data_sock[1] = -1;
+ 	handler->conf = conf;
+ 	handler->lxcpath = lxcpath;
+-	handler->pinfd = -1;
+ 	handler->sigfd = -EBADF;
+ 	handler->pidfd = -EBADF;
+ 	handler->init_died = false;
+@@ -3047,7 +3052,7 @@ static struct lxc_handler *lxc_init_clean_handler(char *name, char *lxcpath, str
+ 	handler->pid = pid;
+ 	handler->state_socket_pair[0] = handler->state_socket_pair[1] = -1;
+ 	if (handler->conf->reboot == REBOOT_NONE)
+-		lxc_list_init(&handler->conf->state_clients);
++		INIT_LIST_HEAD(&handler->conf->state_clients);
+ 
+ 	for (i = 0; i < LXC_NS_MAX; i++)
+ 		handler->nsfd[i] = -1;
+@@ -3091,14 +3096,13 @@ static struct lxc_handler *lxc_init_pids_handler(char *name, char *lxcpath, stru
+ 	handler->data_sock[0] = handler->data_sock[1] = -1;
+ 	handler->conf = conf;
+ 	handler->lxcpath = lxcpath;
+-	handler->pinfd = -1;
+ 	handler->sigfd = -EBADF;
+ 	handler->init_died = false;
+ 	handler->state_socket_pair[0] = handler->state_socket_pair[1] = -1;
+ 	handler->monitor_status_fd = -EBADF;
+ 	handler->pidfd = -EBADF;
+ 	if (handler->conf->reboot == REBOOT_NONE)
+-		lxc_list_init(&handler->conf->state_clients);
++		INIT_LIST_HEAD(&handler->conf->state_clients);
+ 
+ 	for (i = 0; i < LXC_NS_MAX; i++)
+ 		handler->nsfd[i] = -1;
+diff --git a/src/lxc/sync.c b/src/lxc/sync.c
+index 1075d98..f156809 100644
+--- a/src/lxc/sync.c
++++ b/src/lxc/sync.c
+@@ -70,6 +70,12 @@ static inline const char *start_sync_to_string(int state)
+ 		return "cgroup-limits";
+ 	case START_SYNC_IDMAPPED_MOUNTS:
+ 		return "idmapped-mounts";
++#ifdef HAVE_ISULAd
++	case START_SYNC_OCI_PRESTART_HOOK:
++		return "oci-prestart-hook";
++	case START_SYNC_POST_OCI_PRESTART_HOOK:
++		return "post-oci-prestart-hook";
++#endif
+ 	case START_SYNC_FDS:
+ 		return "fds";
+ 	case START_SYNC_READY_START:
+diff --git a/src/lxc/sync.h b/src/lxc/sync.h
+index ef03e1e..6802d32 100644
+--- a/src/lxc/sync.h
++++ b/src/lxc/sync.h
+@@ -21,12 +21,13 @@ enum /* start */ {
+ 	START_SYNC_POST_CONFIGURE	=  2,
+ 	START_SYNC_IDMAPPED_MOUNTS	=  3,
+ #ifdef HAVE_ISULAD
+-	LXC_SYNC_OCI_PRESTART_HOOK = 4,
+-	START_SYNC_CGROUP_LIMITS	=  5,
+-	START_SYNC_FDS			=  6,
+-	START_SYNC_READY_START		=  7,
+-	START_SYNC_RESTART		=  8,
+-	START_SYNC_POST_RESTART		=  9,
++	START_SYNC_OCI_PRESTART_HOOK = 4,
++	START_SYNC_POST_OCI_PRESTART_HOOK = 5,
++	START_SYNC_CGROUP_LIMITS	=  6,
++	START_SYNC_FDS			=  7,
++	START_SYNC_READY_START		=  8,
++	START_SYNC_RESTART		=  9,
++	START_SYNC_POST_RESTART		=  10,
+ #else
+ 	START_SYNC_CGROUP_LIMITS	=  4,
+ 	START_SYNC_FDS			=  5,
+diff --git a/src/lxc/terminal.c b/src/lxc/terminal.c
+index 8da00a9..de7ea4f 100644
+--- a/src/lxc/terminal.c
++++ b/src/lxc/terminal.c
+@@ -204,11 +204,11 @@ int lxc_set_terminal_winsz(struct lxc_terminal *terminal, unsigned int height, u
+ 	int ret = 0;
+ 	struct winsize wsz;
+ 
+-	if (terminal->ptmx < 0) {
++	if (terminal->ptx < 0) {
+ 		return 0;
+ 	}
+ 
+-	ret = ioctl(terminal->ptmx, TIOCGWINSZ, &wsz);
++	ret = ioctl(terminal->ptx, TIOCGWINSZ, &wsz);
+ 	if (ret < 0) {
+ 		WARN("Failed to get window size");
+ 		return -1;
+@@ -216,7 +216,7 @@ int lxc_set_terminal_winsz(struct lxc_terminal *terminal, unsigned int height, u
+ 	wsz.ws_col = width;
+ 	wsz.ws_row = height;
+ 
+-	ret = ioctl(terminal->ptmx, TIOCSWINSZ, &wsz);
++	ret = ioctl(terminal->ptx, TIOCSWINSZ, &wsz);
+ 	if (ret < 0)
+ 		WARN("Failed to set window size");
+ 	else
+@@ -299,6 +299,359 @@ static int lxc_terminal_rotate_log_file(struct lxc_terminal *terminal)
+ }
+ 
+ #ifdef HAVE_ISULAD
++/* get time buffer */
++static bool get_time_buffer(struct timespec *timestamp, char *timebuffer,
++                            size_t maxsize)
++{
++	struct tm tm_utc = { 0 };
++	int32_t nanos = 0;
++	time_t seconds;
++	size_t len = 0;
++	int ret = 0;
++
++	if (!timebuffer || !maxsize) {
++		return false;
++	}
++
++	seconds = (time_t)timestamp->tv_sec;
++	gmtime_r(&seconds, &tm_utc);
++	strftime(timebuffer, maxsize, "%Y-%m-%dT%H:%M:%S", &tm_utc);
++
++	nanos = (int32_t)timestamp->tv_nsec;
++	len = strlen(timebuffer);
++	ret = snprintf(timebuffer + len, (maxsize - len), ".%09dZ", nanos);
++	if (ret < 0 || (size_t)ret >= (maxsize - len)) {
++		return false;
++	}
++
++	return true;
++}
++
++/* get now time buffer */
++static bool get_now_time_buffer(char *timebuffer, size_t maxsize)
++{
++	int err = 0;
++	struct timespec ts;
++
++	err = clock_gettime(CLOCK_REALTIME, &ts);
++	if (err != 0) {
++		ERROR("failed to get time");
++		return false;
++	}
++
++	return get_time_buffer(&ts, timebuffer, maxsize);
++}
++
++static int isulad_lxc_terminal_rotate_write_data(struct lxc_terminal *terminal, const char *buf,
++                int bytes_read)
++{
++	int ret;
++	struct stat st;
++	int64_t space_left = -1;
++
++	if (terminal->log_fd < 0)
++		return 0;
++
++	/* A log size <= 0 means that there's no limit on the size of the log
++	 * file at which point we simply ignore whether the log is supposed to
++	 * be rotated or not.
++	 */
++	if (terminal->log_size <= 0)
++		return lxc_write_nointr(terminal->log_fd, buf, bytes_read);
++
++	/* Get current size of the log file. */
++	ret = fstat(terminal->log_fd, &st);
++	if (ret < 0) {
++		SYSERROR("Failed to stat the terminal log file descriptor");
++		return -1;
++	}
++
++	/* handle non-regular files */
++	if ((st.st_mode & S_IFMT) != S_IFREG) {
++		/* This isn't a regular file. so rotating the file seems a
++		 * dangerous thing to do, size limits are also very
++		 * questionable. Let's not risk anything and tell the user that
++		 * he's requesting us to do weird stuff.
++		 */
++		if (terminal->log_rotate > 0 || terminal->log_size > 0)
++			return -EINVAL;
++
++		/* I mean, sure log wherever you want to. */
++		return lxc_write_nointr(terminal->log_fd, buf, bytes_read);
++	}
++
++	space_left = terminal->log_size - st.st_size;
++
++	/* User doesn't want to rotate the log file and there's no more space
++	 * left so simply truncate it.
++	 */
++	if (space_left <= 0 && terminal->log_rotate <= 0) {
++		ret = lxc_terminal_truncate_log_file(terminal);
++		if (ret < 0)
++			return ret;
++
++		if ((uint64_t)bytes_read <= terminal->log_size)
++			return lxc_write_nointr(terminal->log_fd, buf, bytes_read);
++
++		/* Write as much as we can into the buffer and loose the rest. */
++		return lxc_write_nointr(terminal->log_fd, buf, terminal->log_size);
++	}
++
++	/* There's enough space left. */
++	if (bytes_read <= space_left)
++		return lxc_write_nointr(terminal->log_fd, buf, bytes_read);
++
++	/* There'd be more to write but we aren't instructed to rotate the log
++	 * file so simply return. There's no error on our side here.
++	 */
++	if (terminal->log_rotate > 0)
++		ret = lxc_terminal_rotate_log_file(terminal);
++	else
++		ret = lxc_terminal_truncate_log_file(terminal);
++	if (ret < 0)
++		return ret;
++
++	if (terminal->log_size < (uint64_t)bytes_read) {
++		/* Well, this is unfortunate because it means that there is more
++		 * to write than the user has granted us space. There are
++		 * multiple ways to handle this but let's use the simplest one:
++		 * write as much as we can, tell the user that there was more
++		 * stuff to write and move on.
++		 * Note that this scenario shouldn't actually happen with the
++		 * standard pty-based terminal that LXC allocates since it will
++		 * be switched into raw mode. In raw mode only 1 byte at a time
++		 * should be read and written.
++		 */
++		WARN("Size of terminal log file is smaller than the bytes to write");
++		ret = lxc_write_nointr(terminal->log_fd, buf, terminal->log_size);
++		if (ret < 0)
++			return -1;
++		bytes_read -= ret;
++		return bytes_read;
++	}
++
++	/* Yay, we made it. */
++	ret = lxc_write_nointr(terminal->log_fd, buf, bytes_read);
++	if (ret < 0)
++		return -1;
++	bytes_read -= ret;
++	return bytes_read;
++}
++
++static ssize_t isulad_logger_json_write(struct lxc_terminal *terminal, const char *type, const char *buf,
++                                int bytes_read)
++{
++	logger_json_file *msg = NULL;
++	ssize_t ret = -1;
++	size_t len;
++	char *json = NULL;
++	char timebuffer[64] = { 0 };
++	parser_error err = NULL;
++	struct parser_context ctx = { GEN_OPTIONS_SIMPLIFY | GEN_OPTIONS_NOT_VALIDATE_UTF8, stderr };
++
++	if (bytes_read < 0 || bytes_read >= INT_MAX) {
++		return -1;
++	}
++	msg = calloc(sizeof(logger_json_file), 1);
++	if (msg == NULL) {
++		return -errno;
++	}
++	msg->log = calloc(bytes_read, 1);
++	if (!msg->log) {
++		goto cleanup;
++	}
++	memcpy(msg->log, buf, bytes_read);
++	msg->log_len = bytes_read;
++	msg->stream = type ? safe_strdup(type) : safe_strdup("stdout");
++
++	get_now_time_buffer(timebuffer, sizeof(timebuffer));
++	msg->time = safe_strdup(timebuffer);
++
++	json = logger_json_file_generate_json(msg, &ctx, &err);
++	if (!json) {
++		ERROR("Failed to generate json: %s", err);
++		goto cleanup;
++	}
++	len = strlen(json);
++	json[len] = '\n';
++	ret = isulad_lxc_terminal_rotate_write_data(terminal, json, len + 1);
++cleanup:
++	free(json);
++	free_logger_json_file(msg);
++	free(err);
++	return ret;
++}
++
++static inline bool is_syslog(const char *driver)
++{
++	if (driver == NULL) {
++		return false;
++	}
++
++	return (strcmp("syslog", driver) == 0);
++}
++
++static ssize_t isulad_logger_syslog_write(struct lxc_terminal *terminal, const char *buf)
++{
++        syslog(LOG_INFO, "%s", buf);
++        return 0;
++}
++
++static inline ssize_t isulad_logger_write(struct lxc_terminal *terminal, const char *type, const char *buf,
++                                int bytes_read)
++{
++        if (is_syslog(terminal->log_driver)) {
++                return isulad_logger_syslog_write(terminal, buf);
++        }
++
++        return isulad_logger_json_write(terminal, type, buf, bytes_read);
++}
++
++static int isulad_lxc_terminal_write_log_file(struct lxc_terminal *terminal, const char *type, char *buf,
++                                       int bytes_read)
++{
++#define __BUF_CACHE_SIZE (16 * LXC_TERMINAL_BUFFER_SIZE)
++	static char cache[__BUF_CACHE_SIZE];
++	static int size = 0;
++	int upto, index;
++	int begin = 0, buf_readed = 0,  buf_left = 0;
++	int ret;
++
++	if (buf != NULL && bytes_read > 0) {
++		/* Work out how much more data we are okay with reading this time. */
++		upto = size + bytes_read;
++		if (upto > __BUF_CACHE_SIZE) {
++			upto = __BUF_CACHE_SIZE;
++		}
++
++		if (upto > size) {
++			buf_readed = upto - size;
++			memcpy(cache + size, buf, buf_readed);
++			buf_left = bytes_read - buf_readed;
++			size += buf_readed;
++		}
++	}
++
++	// If we have no data to log, and there's no more coming, we're done.
++	if (size == 0)
++		return 0;
++
++	// Break up the data that we've buffered up into lines, and log each in turn.
++	for (index = 0; index < size; index++) {
++		if (cache[index] == '\n') {
++			ret = isulad_logger_write(terminal, type, cache + begin, index - begin + 1);
++			if (ret < 0) {
++				WARN("Failed to log msg");
++			}
++			begin = index + 1;
++		}
++	}
++	/* If there's no more coming, or the buffer is full but
++	 * has no newlines, log whatever we haven't logged yet,
++	 * noting that it's a partial log line. */
++	if (buf == NULL || (begin == 0 && size == __BUF_CACHE_SIZE)) {
++		if (begin < size) {
++			ret = isulad_logger_write(terminal, type, cache + begin, size - begin);
++			if (ret < 0) {
++				WARN("Failed to log msg");
++			}
++			begin = 0;
++			size = 0;
++		}
++		if (buf == NULL) {
++			return 0;
++		}
++	}
++	/* Move any unlogged data to the front of the buffer in preparation for another read. */
++	if (begin > 0) {
++		memcpy(cache, cache + begin, size - begin);
++		size -= begin;
++	}
++	/* Move left data to cache buffer */
++	if (buf_left > 0) {
++		memcpy(cache + size, buf + buf_readed, buf_left);
++		size += buf_left;
++	}
++	return 0;
++}
++
++/* isulad: forward data to all fifos */
++static void lxc_forward_data_to_fifo(struct lxc_list *list, bool is_err, const char *buf, int r)
++{
++	struct lxc_list *it  = NULL;
++	struct lxc_list *next = NULL;
++	struct lxc_fifos_fd *elem = NULL;
++	ssize_t w = 0;
++
++	lxc_list_for_each_safe(it, list, next) {
++		elem = it->elem;
++		if (is_err) {
++			if (elem->err_fd >= 0) {
++				w = lxc_write_nointr_for_fifo(elem->err_fd, buf, r);
++				if (w != r) {
++					WARN("Failed to write to fifo fd %d with error: %s", elem->err_fd, strerror(errno));
++				}
++			}
++		} else {
++			if (elem->out_fd >= 0) {
++				w = lxc_write_nointr_for_fifo(elem->out_fd, buf, r);
++				if (w != r) {
++					WARN("Failed to write to fifo fd %d with error: %s", elem->out_fd, strerror(errno));
++				}
++			}
++		}
++	}
++
++	return;
++}
++
++/* isulad: judge the fd whether is fifo */
++static bool lxc_terminal_is_fifo(int fd, struct lxc_list *list)
++{
++	struct lxc_list *it = NULL;
++	struct lxc_list *next = NULL;
++	struct lxc_fifos_fd *elem = NULL;
++
++	lxc_list_for_each_safe(it, list, next) {
++		elem = it->elem;
++		if (elem->in_fd == fd)
++			return true;
++	}
++
++	return false;
++}
++
++/* isulad: if fd == -1, means delete all the fifos*/
++int lxc_terminal_delete_fifo(int fd, struct lxc_list *list)
++{
++	struct lxc_list *it = NULL;
++	struct lxc_list *next = NULL;
++	struct lxc_fifos_fd *elem = NULL;
++
++	lxc_list_for_each_safe(it, list, next) {
++		elem = it->elem;
++		if (elem->in_fd == fd || -1 == fd) {
++			INFO("Delete fifo fd %d", fd);
++			lxc_list_del(it);
++			if (elem->in_fifo)
++				free(elem->in_fifo);
++			if (elem->out_fifo)
++				free(elem->out_fifo);
++			if (elem->err_fifo)
++				free(elem->err_fifo);
++			if (elem->in_fd >= 0)
++				close(elem->in_fd);
++			if (elem->out_fd >= 0)
++				close(elem->out_fd);
++			if (elem->err_fd >= 0)
++				close(elem->err_fd);
++			free(elem);
++		}
++	}
++
++	return 0;
++}
++
+ static int do_isulad_io(int fd, struct lxc_terminal *terminal)
+ {
+ 	char buf[LXC_TERMINAL_BUFFER_SIZE];
+@@ -373,7 +726,6 @@ static int do_isulad_io(int fd, struct lxc_terminal *terminal)
+ static int isulad_io_handler(int fd, uint32_t events, void *data,
+ 				       struct lxc_async_descr *descr)
+ {
+-	struct lxc_terminal *terminal = data;
+ 	int ret;
+ 
+ 	ret = do_isulad_io(fd, data);
+@@ -491,7 +843,11 @@ static int lxc_terminal_write_log_file(struct lxc_terminal *terminal, char *buf,
+ }
+ #endif
+ 
++#ifdef HAVE_ISULAD
++static int lxc_terminal_ptx_io(struct lxc_terminal *terminal, int fd)
++#else
+ static int lxc_terminal_ptx_io(struct lxc_terminal *terminal)
++#endif
+ {
+ 	char buf[LXC_TERMINAL_BUFFER_SIZE];
+ 	int r, w, w_log, w_rbuf;
+@@ -576,7 +932,11 @@ static int lxc_terminal_ptx_io_handler(int fd, uint32_t events, void *data,
+ 	struct lxc_terminal *terminal = data;
+ 	int ret;
+ 
++#ifdef HAVE_ISULAD
++	ret = lxc_terminal_ptx_io(data, fd);
++#else
+ 	ret = lxc_terminal_ptx_io(data);
++#endif
+ 	if (ret < 0)
+ 		return log_info(LXC_MAINLOOP_CLOSE,
+ 				"Terminal client on fd %d has exited",
+@@ -1408,7 +1768,7 @@ int lxc_terminal_add_fifos(struct lxc_conf *conf, const char *fifonames)
+ 	}
+ 
+ 	if (lxc_mainloop_add_handler(terminal->descr, fifofd_in,
+-	                             lxc_terminal_io_cb, terminal)) {
++	                             lxc_terminal_ptx_cb, default_cleanup_handler, terminal, "fifofd_in")) {
+ 		ERROR("console fifo not added to mainloop");
+ 		lxc_terminal_delete_fifo(fifofd_in, &terminal->fifos);
+ 		ret = -1;
+@@ -1599,6 +1959,7 @@ int lxc_terminal_parent(struct lxc_conf *conf)
+ 	return lxc_terminal_map_ids(conf, &conf->console);
+ }
+ 
++#ifndef HAVE_ISULAD
+ static int lxc_terminal_create_native(const char *name, const char *lxcpath,
+ 				      struct lxc_terminal *terminal)
+ {
+@@ -1627,6 +1988,7 @@ static int lxc_terminal_create_native(const char *name, const char *lxcpath,
+ 
+ 	return 0;
+ }
++#endif
+ 
+ int lxc_terminal_create(const char *name, const char *lxcpath,
+ 			struct lxc_conf *conf, struct lxc_terminal *terminal)
+@@ -1635,6 +1997,7 @@ int lxc_terminal_create(const char *name, const char *lxcpath,
+ 	if (!lxc_terminal_create_native(name, lxcpath, terminal))
+ 		return 0;
+ #else
++	int ret;
+ 	/* isulad: open default fifos */
+ 	ret = lxc_terminal_fifo_default(terminal);
+ 	if (ret < 0) {
+diff --git a/src/lxc/tools/lxc_ls.c b/src/lxc/tools/lxc_ls.c
+index 86a453d..505ed95 100644
+--- a/src/lxc/tools/lxc_ls.c
++++ b/src/lxc/tools/lxc_ls.c
+@@ -1004,7 +1004,7 @@ static int my_parser(struct lxc_arguments *args, int c, char *arg)
+ }
+ 
+ #ifdef HAVE_ISULAD
+-static int ls_get_wrapper(void *wrap, int msgfd);
++static int ls_get_wrapper(void *wrap, int msgfd)
+ #else
+ static int ls_get_wrapper(void *wrap)
+ #endif
+diff --git a/src/lxc/utils.c b/src/lxc/utils.c
+index 25cb0d1..397638e 100644
+--- a/src/lxc/utils.c
++++ b/src/lxc/utils.c
+@@ -37,6 +37,9 @@
+ #include "process_utils.h"
+ #include "syscall_wrappers.h"
+ #include "utils.h"
++#ifdef HAVE_ISULAD
++#include "lsm/lsm.h"
++#endif
+ 
+ #if !HAVE_STRLCPY
+ #include "strlcpy.h"
+diff --git a/src/tests/aa.c b/src/tests/aa.c
+index 417f3fc..f766640 100644
+--- a/src/tests/aa.c
++++ b/src/tests/aa.c
+@@ -40,7 +40,11 @@ static void try_to_remove(void)
+ 	}
+ }
+ 
++#ifdef HAVE_ISULAD
++static int test_attach_write_file(void* payload, int msg_fd)
++#else
+ static int test_attach_write_file(void* payload)
++#endif
+ {
+ 	char *fnam = payload;
+ 	FILE *f;
+diff --git a/src/tests/capabilities.c b/src/tests/capabilities.c
+index 5704942..c54a051 100644
+--- a/src/tests/capabilities.c
++++ b/src/tests/capabilities.c
+@@ -41,7 +41,11 @@
+ __u32 *cap_bset_bits = NULL;
+ __u32 last_cap = 0;
+ 
++#ifdef HAVE_ISULAD
++static int capabilities_allow(void *payload, int msg_fd)
++#else
+ static int capabilities_allow(void *payload)
++#endif
+ {
+ 	for (__u32 cap = 0; cap <= last_cap; cap++) {
+ 		bool bret;
+@@ -62,7 +66,11 @@ static int capabilities_allow(void *payload)
+ 	return EXIT_SUCCESS;
+ }
+ 
++#ifdef HAVE_ISULAD
++static int capabilities_deny(void *payload, int msg_fd)
++#else
+ static int capabilities_deny(void *payload)
++#endif
+ {
+ 	for (__u32 cap = 0; cap <= last_cap; cap++) {
+ 		bool bret;
+@@ -83,7 +91,11 @@ static int capabilities_deny(void *payload)
+ 	return EXIT_SUCCESS;
+ }
+ 
++#ifdef HAVE_ISULAD
++static int run(int (*test)(void *, int), bool allow)
++#else
+ static int run(int (*test)(void *), bool allow)
++#endif
+ {
+ 	int fd_log = -EBADF, fret = -1;
+ 	lxc_attach_options_t attach_options = LXC_ATTACH_OPTIONS_DEFAULT;
+diff --git a/src/tests/mount_injection.c b/src/tests/mount_injection.c
+index f98370b..5e852eb 100644
+--- a/src/tests/mount_injection.c
++++ b/src/tests/mount_injection.c
+@@ -70,7 +70,11 @@ static int comp_field(char *line, const char *str, int nfields)
+ 	return ret;
+ }
+ 
++#ifdef HAVE_ISULAD
++static int find_in_proc_mounts(void *data, int msg_fd)
++#else
+ static int find_in_proc_mounts(void *data)
++#endif
+ {
+ 	char buf[LXC_LINELEN];
+ 	FILE *f;
+diff --git a/src/tests/proc_pid.c b/src/tests/proc_pid.c
+index 9531ec2..56bbf52 100644
+--- a/src/tests/proc_pid.c
++++ b/src/tests/proc_pid.c
+@@ -15,7 +15,11 @@
+ #define PROC_INIT_PATH "/proc/1/oom_score_adj"
+ #define PROC_SELF_PATH "/proc/self/oom_score_adj"
+ 
++#ifdef HAVE_ISULAD
++static int check_oom_score_adj(void *payload, int msg_fd)
++#else
+ static int check_oom_score_adj(void *payload)
++#endif
+ {
+ 	__do_close int fd = -EBADF;
+ 	char buf[INTTYPE_TO_STRLEN(__s64)];
+diff --git a/src/tests/rootfs_options.c b/src/tests/rootfs_options.c
+index 55f86ab..73b88f9 100644
+--- a/src/tests/rootfs_options.c
++++ b/src/tests/rootfs_options.c
+@@ -60,7 +60,11 @@ static int has_mount_properties(const char *path, unsigned int flags)
+ #endif
+ }
+ 
++#ifdef HAVE_ISULAD
++static int rootfs_options(void *payload, int msg_fd)
++#else
+ static int rootfs_options(void *payload)
++#endif
+ {
+ 	int ret;
+ 
+diff --git a/src/tests/sys_mixed.c b/src/tests/sys_mixed.c
+index b51f28c..8a6ae53 100644
+--- a/src/tests/sys_mixed.c
++++ b/src/tests/sys_mixed.c
+@@ -56,7 +56,11 @@ static int is_read_only(const char *path)
+ #endif
+ }
+ 
++#ifdef HAVE_ISULAD
++static int sys_mixed(void *payload, int msg_fd)
++#else
+ static int sys_mixed(void *payload)
++#endif
+ {
+ 	int ret;
+ 
+diff --git a/src/tests/sysctls.c b/src/tests/sysctls.c
+index da4538f..6a715a3 100644
+--- a/src/tests/sysctls.c
++++ b/src/tests/sysctls.c
+@@ -16,7 +16,11 @@
+ #define SYSCTL_CONFIG_KEY "lxc.sysctl.net.ipv4.ip_forward"
+ #define SYSCTL_CONFIG_VALUE "1"
+ 
++#ifdef HAVE_ISULAD
++static int check_sysctls(void *payload, int msg_fd)
++#else
+ static int check_sysctls(void *payload)
++#endif
+ {
+ 	__do_close int fd = -EBADF;
+ 	char buf[INTTYPE_TO_STRLEN(__u64)];
+-- 
+2.25.1
+
diff --git a/lxc.spec b/lxc.spec
index e52a93740e2966c4dae703b64ae6eaf62196e5c9..33fc0464934cc2fdc6977980bab399cd7e8bd37a 100644
--- a/lxc.spec
+++ b/lxc.spec
@@ -1,4 +1,4 @@
-%global _release 1
+%global _release 2
 
 Name:           lxc
 Version:        5.0.2
@@ -9,6 +9,10 @@ URL:            https://github.com/lxc/lxc
 Source0:        https://linuxcontainers.org/downloads/lxc/lxc-5.0.2.tar.gz
 
 Patch0001:	0001-iSulad-add-json-files-and-adapt-to-meson.patch
+Patch0002:	0002-iSulad-adapt-security-conf-attach-cgroup-and-start.patch
+Patch0003:	0003-iSulad-adapt-conf-network-storage-and-termianl.patch
+Patch0004:	0004-iSulad-adapt-confile-lxccontainer-and-start.patch
+Patch0005:	0005-fix-compile-error.patch
 
 BuildRequires:  systemd-units git libtool graphviz docbook2X doxygen chrpath
 BuildRequires:  pkgconfig(libseccomp)
@@ -72,7 +76,7 @@ This package contains documentation for lxc for creating containers.
 %ifarch riscv64
 export LDFLAGS="%{build_ldflags} -latomic -pthread"
 %endif
-meson setup -Disulad=false -Dtests=true -Dprefix=/usr build
+meson setup -Disulad=true -Dtests=true -Dprefix=/usr build
 meson compile -C build
 
 %install
@@ -148,7 +152,6 @@ meson test -C build
 %{_sbindir}/init.%{name}
 %{_sharedstatedir}/%{name}
 %dir %{_sysconfdir}/%{name}
-%config(noreplace) %{_sysconfdir}/%{name}/default.conf
 %config(noreplace) %{_sysconfdir}/lxc/*
 %config(noreplace) %{_sysconfdir}/sysconfig/*
 
@@ -166,7 +169,6 @@ meson test -C build
 %{_includedir}/%{name}/*
 %{_libdir}/pkgconfig/%{name}.pc
 %dir %{_datadir}/%{name}
-%{_datadir}/%{name}/hooks
 %{_datadir}/%{name}/lxc-patch.py*
 %{_datadir}/%{name}/selinux
 %dir %{_datadir}/%{name}/templates
@@ -191,6 +193,12 @@ meson test -C build
 %endif
 
 %changelog
+* Tue Aug 01 2023 zhangxiaoyu<zhangxiaoyu58@huawei.com> - 5.0.2-2
+- Type:enhancement
+- ID:NA
+- SUG:NA
+- DESC: add isulad code and fix compile error
+
 * Thu Jul 13 2023 haozi007<liuhao27@huawei.com> - 5.0.2-1
 - Type:enhancement
 - ID:NA