From 1335abd444b135e4193d332cb905ff8c1e62c33c Mon Sep 17 00:00:00 2001 From: tl Date: Thu, 22 Aug 2024 14:41:34 +0800 Subject: [PATCH 001/116] =?UTF-8?q?sharefs=20revert=20=E8=A7=A3=E8=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: tl --- fs/sharefs/authentication.c | 4 +- fs/sharefs/authentication.h | 5 +- fs/sharefs/dentry.c | 106 +----------------------------------- fs/sharefs/file.c | 16 +----- fs/sharefs/inode.c | 99 ++++++++------------------------- fs/sharefs/lookup.c | 34 +++++------- fs/sharefs/sharefs.h | 21 +++---- fs/sharefs/super.c | 8 ++- 8 files changed, 61 insertions(+), 232 deletions(-) diff --git a/fs/sharefs/authentication.c b/fs/sharefs/authentication.c index 8986fcbcc894..bc6e825092fa 100644 --- a/fs/sharefs/authentication.c +++ b/fs/sharefs/authentication.c @@ -75,7 +75,8 @@ void sharefs_root_inode_perm_init(struct inode *root_inode) hii->perm = SHAREFS_PERM_FIX; } -const struct cred *sharefs_override_file_fsids(struct inode *dir) +#ifdef CONFIG_SHAREFS_SUPPORT_OVERRIDE +const struct cred *sharefs_override_file_fsids(struct inode *dir, __u16 *_perm) { struct cred *cred = NULL; cred = prepare_creds(); @@ -94,3 +95,4 @@ void sharefs_revert_fsids(const struct cred *old_cred) revert_creds(old_cred); put_cred(cur_cred); } +#endif diff --git a/fs/sharefs/authentication.h b/fs/sharefs/authentication.h index 0a096d1ee710..e598fa883c0a 100644 --- a/fs/sharefs/authentication.h +++ b/fs/sharefs/authentication.h @@ -40,8 +40,11 @@ extern void sharefs_exit_configfs(void); void sharefs_root_inode_perm_init(struct inode *root_inode); void fixup_perm_from_level(struct inode *dir, struct dentry *dentry); -const struct cred *sharefs_override_file_fsids(struct inode *dir); +#ifdef CONFIG_SHAREFS_SUPPORT_OVERRIDE +const struct cred *sharefs_override_file_fsids(struct inode *dir, + __u16 *_perm); void sharefs_revert_fsids(const struct cred *old_cred); +#endif static inline bool is_read_only_auth(__u16 perm) { diff --git a/fs/sharefs/dentry.c b/fs/sharefs/dentry.c index c1a4c30f30fe..dee29cace6b7 100644 --- a/fs/sharefs/dentry.c +++ b/fs/sharefs/dentry.c @@ -10,7 +10,6 @@ */ #include "sharefs.h" -#include "authentication.h" /* * returns: 0: tell VFS to invalidate dentry in share directory @@ -30,7 +29,7 @@ static void sharefs_d_release(struct dentry *dentry) */ if (SHAREFS_D(dentry)) { /* release and reset the lower paths */ - sharefs_reset_lower_path(dentry); + sharefs_put_reset_lower_path(dentry); free_dentry_private_data(dentry); } return; @@ -40,106 +39,3 @@ const struct dentry_operations sharefs_dops = { .d_revalidate = sharefs_d_revalidate, .d_release = sharefs_d_release, }; - -static int try_to_create_lower_path(struct dentry *d, struct path *lower_path) -{ - int err; - struct path lower_dir_path; - struct qstr this; - struct dentry *parent; - struct dentry *lower_dentry; - struct inode *lower_dir_inode; - - parent = dget_parent(d); - err = sharefs_get_lower_path(parent, &lower_dir_path, 0); - dput(parent); - if (err) - return err; - /* instantiate a new negative dentry */ - this.name = d->d_name.name; - this.len = strlen(d->d_name.name); - this.hash = full_name_hash(lower_dir_path.dentry, this.name, this.len); - lower_dentry = d_alloc(lower_dir_path.dentry, &this); - if (!lower_dentry) { - err = -ENOMEM; - goto out; - } - lower_dir_inode = d_inode(lower_dir_path.dentry); - if (lower_dir_inode->i_op && lower_dir_inode->i_op->lookup) - lower_dir_inode->i_op->lookup(lower_dir_inode, lower_dentry, 0); - - spin_lock(&SHAREFS_D(d)->lock); - lower_path->dentry = lower_dentry; - lower_path->mnt = lower_dir_path.mnt; - spin_unlock(&SHAREFS_D(d)->lock); - -out: - sharefs_put_lower_path(parent, &lower_dir_path); - return err; -} - -/* Returns struct path. Caller must path_put it. */ -int sharefs_get_lower_path(struct dentry *d, struct path *lower_path, - bool try_to_create) -{ - int err = -ENOMEM; - char *path_buf; - char *path_name = NULL; - struct path lower_root_path; - const struct cred *saved_cred = NULL; - - if (unlikely(!d)) - goto out; - - path_buf = kmalloc(PATH_MAX, GFP_KERNEL); - if (unlikely(!path_buf)) - goto out; - path_name = dentry_path_raw(d, path_buf, PATH_MAX); - if (IS_ERR(path_name)) { - err = PTR_ERR(path_name); - goto out_free; - } - - sharefs_get_lower_root_path(d, &lower_root_path); - saved_cred = sharefs_override_file_fsids(d_inode(lower_root_path.dentry)); - if (!saved_cred) { - sharefs_put_lower_path(d, &lower_root_path); - goto out_free; - } - spin_lock(&SHAREFS_D(d)->lock); - err = vfs_path_lookup(lower_root_path.dentry, lower_root_path.mnt, - path_name, LOOKUP_CREATE, lower_path); - spin_unlock(&SHAREFS_D(d)->lock); - sharefs_revert_fsids(saved_cred); - sharefs_put_lower_path(d, &lower_root_path); - - if (err != -ENOENT || !try_to_create) - goto out_free; - err = try_to_create_lower_path(d, lower_path); - -out_free: - kfree(path_buf); -out: - return err; -} - -int sharefs_get_lower_inode(struct dentry *d, struct inode **lower_inode) -{ - int err = 0; - struct path lower_path; - - err = sharefs_get_lower_path(d, &lower_path, 0); - if (err) - goto out; - - *lower_inode = d_inode(lower_path.dentry); - if ((*lower_inode)->i_flags & S_DEAD) { - err = -ENOENT; - } else if (!igrab(*lower_inode)) { - err = -ESTALE; - } - - sharefs_put_lower_path(d, &lower_path); -out: - return err; -} \ No newline at end of file diff --git a/fs/sharefs/file.c b/fs/sharefs/file.c index 1aa44baf67e5..f04963d57a4a 100644 --- a/fs/sharefs/file.c +++ b/fs/sharefs/file.c @@ -31,7 +31,6 @@ static int sharefs_open(struct inode *inode, struct file *file) int err = 0; struct file *lower_file = NULL; struct path lower_path; - struct inode *lower_inode = NULL; /* don't open unhashed/deleted files */ if (d_unhashed(file->f_path.dentry)) { @@ -47,9 +46,7 @@ static int sharefs_open(struct inode *inode, struct file *file) } /* open lower object and link sharefs's file struct to lower's */ - err = sharefs_get_lower_path(file->f_path.dentry, &lower_path, 0); - if (err) - goto out_free; + sharefs_get_lower_path(file->f_path.dentry, &lower_path); lower_file = dentry_open(&lower_path, file->f_flags, current_cred()); path_put(&lower_path); if (IS_ERR(lower_file)) { @@ -63,21 +60,16 @@ static int sharefs_open(struct inode *inode, struct file *file) sharefs_set_lower_file(file, lower_file); } -out_free: if (err) { kfree(SHAREFS_F(file)); } else { kuid_t uid = inode->i_uid; kgid_t gid = inode->i_gid; mode_t mode = inode->i_mode; - err = sharefs_get_lower_inode(file->f_path.dentry, &lower_inode); - if (err) - goto out_err; - fsstack_copy_attr_all(inode, lower_inode); + fsstack_copy_attr_all(inode, sharefs_lower_inode(inode)); inode->i_uid = uid; inode->i_gid = gid; inode->i_mode = mode; - iput(lower_inode); } out_err: return err; @@ -124,9 +116,7 @@ static int sharefs_fsync(struct file *file, loff_t start, loff_t end, if (err) goto out; lower_file = sharefs_lower_file(file); - err = sharefs_get_lower_path(dentry, &lower_path, 0); - if (err) - goto out; + sharefs_get_lower_path(dentry, &lower_path); err = vfs_fsync_range(lower_file, start, end, datasync); sharefs_put_lower_path(dentry, &lower_path); out: diff --git a/fs/sharefs/inode.c b/fs/sharefs/inode.c index e5cb396263e2..7bd08abe79ea 100644 --- a/fs/sharefs/inode.c +++ b/fs/sharefs/inode.c @@ -20,9 +20,7 @@ static int sharefs_getattr(const struct path *path, struct kstat *stat, struct path lower_path; int ret; - ret = sharefs_get_lower_path(path->dentry, &lower_path, 0); - if (ret) - return ret; + sharefs_get_lower_path(path->dentry, &lower_path); ret = vfs_getattr(&lower_path, stat, request_mask, flags); stat->ino = d_inode(path->dentry)->i_ino; stat->uid = d_inode(path->dentry)->i_uid; @@ -41,14 +39,8 @@ static ssize_t sharefs_listxattr(struct dentry *dentry, char *buffer, size_t buf struct dentry *lower_dentry; struct path lower_path; - err = sharefs_get_lower_path(dentry, &lower_path, 0); - if (err) - return err; + sharefs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; - if (d_inode(lower_dentry)->i_flags & S_DEAD) { - err = -ENOENT; - goto out; - } if (!(d_inode(lower_dentry)->i_opflags & IOP_XATTR)) { err = -EOPNOTSUPP; goto out; @@ -93,23 +85,17 @@ static int sharefs_create(struct inode *dir, struct dentry *dentry, struct dentry *lower_parent_dentry = NULL; struct path lower_path; const struct cred *saved_cred = NULL; - struct inode *lower_inode = NULL; + __u16 child_perm; - saved_cred = sharefs_override_file_fsids(dir); + saved_cred = sharefs_override_file_fsids(dir, &child_perm); if (!saved_cred) { err = -ENOMEM; return err; } - err = sharefs_get_lower_path(dentry, &lower_path, 1); - if (err) - goto out_revert; + sharefs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; lower_parent_dentry = lock_parent(lower_dentry); - if (unlikely(!lower_parent_dentry)) { - err = -ENOENT; - goto out; - } err = vfs_create(d_inode(lower_parent_dentry), lower_dentry, mode, want_excl); if (err) @@ -117,17 +103,12 @@ static int sharefs_create(struct inode *dir, struct dentry *dentry, err = sharefs_interpose(dentry, dir->i_sb, &lower_path); if (err) goto out; - err = sharefs_get_lower_inode(dentry, &lower_inode); - if (err) - goto out; - fsstack_copy_attr_times(dir, lower_inode); + fsstack_copy_attr_times(dir, sharefs_lower_inode(dir)); fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); - iput(lower_inode); out: unlock_dir(lower_parent_dentry); sharefs_put_lower_path(dentry, &lower_path); -out_revert: sharefs_revert_fsids(saved_cred); return err; } @@ -139,23 +120,17 @@ static int sharefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct dentry *lower_parent_dentry = NULL; struct path lower_path; const struct cred *saved_cred = NULL; - struct inode *lower_inode = NULL; + __u16 child_perm; - saved_cred = sharefs_override_file_fsids(dir); + saved_cred = sharefs_override_file_fsids(dir, &child_perm); if (!saved_cred) { err = -ENOMEM; return err; } - err = sharefs_get_lower_path(dentry, &lower_path, 1); - if (err) - goto out_revert; + sharefs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; lower_parent_dentry = lock_parent(lower_dentry); - if (unlikely(!lower_parent_dentry)) { - err = -ENOENT; - goto out; - } err = vfs_mkdir(d_inode(lower_parent_dentry), lower_dentry, mode); if (err) goto out; @@ -164,19 +139,14 @@ static int sharefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (err) goto out; - err = sharefs_get_lower_inode(dentry, &lower_inode); - if (err) - goto out; - fsstack_copy_attr_times(dir, lower_inode); + fsstack_copy_attr_times(dir, sharefs_lower_inode(dir)); fsstack_copy_inode_size(dir, d_inode(lower_parent_dentry)); /* update number of links on parent directory */ - set_nlink(dir, lower_inode->i_nlink); - iput(lower_inode); + set_nlink(dir, sharefs_lower_inode(dir)->i_nlink); out: unlock_dir(lower_parent_dentry); sharefs_put_lower_path(dentry, &lower_path); -out_revert: sharefs_revert_fsids(saved_cred); return err; } @@ -185,20 +155,11 @@ static int sharefs_unlink(struct inode *dir, struct dentry *dentry) { int err; struct dentry *lower_dentry = NULL; - struct inode *lower_dir_inode = NULL; - struct inode *lower_inode = NULL; + struct inode *lower_dir_inode = sharefs_lower_inode(dir); struct dentry *lower_dir_dentry = NULL; struct path lower_path; - err = sharefs_get_lower_inode(dentry->d_parent, &lower_dir_inode); - if (err) - return err; - err = sharefs_get_lower_inode(dentry, &lower_inode); - if (err) - goto put; - err = sharefs_get_lower_path(dentry, &lower_path, 0); - if (err) - goto put_dir; + sharefs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); @@ -207,7 +168,8 @@ static int sharefs_unlink(struct inode *dir, struct dentry *dentry) goto out; fsstack_copy_attr_times(dir, lower_dir_inode); fsstack_copy_inode_size(dir, lower_dir_inode); - set_nlink(dentry->d_inode, lower_inode->i_nlink); + set_nlink(dentry->d_inode, + sharefs_lower_inode(dentry->d_inode)->i_nlink); dentry->d_inode->i_ctime = dir->i_ctime; d_drop(dentry); @@ -215,10 +177,6 @@ static int sharefs_unlink(struct inode *dir, struct dentry *dentry) unlock_dir(lower_dir_dentry); dput(lower_dentry); sharefs_put_lower_path(dentry, &lower_path); -put_dir: - iput(lower_inode); -put: - iput(lower_dir_inode); return err; } @@ -229,9 +187,7 @@ static int sharefs_rmdir(struct inode *dir, struct dentry *dentry) struct dentry *lower_dir_dentry; struct path lower_path; - err = sharefs_get_lower_path(dentry, &lower_path, 0); - if (err) - return err; + sharefs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; lower_dir_dentry = lock_parent(lower_dentry); err = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry); @@ -266,14 +222,8 @@ static int sharefs_rename(struct inode *old_dir, struct dentry *old_dentry, if (flags) return -EINVAL; - err = sharefs_get_lower_path(old_dentry, &lower_old_path, 0); - if (err) - return err; - err = sharefs_get_lower_path(new_dentry, &lower_new_path, 1); - if (err) { - sharefs_put_lower_path(old_dentry, &lower_old_path); - return err; - } + sharefs_get_lower_path(old_dentry, &lower_old_path); + sharefs_get_lower_path(new_dentry, &lower_new_path); lower_old_dentry = lower_old_path.dentry; lower_new_dentry = lower_new_path.dentry; lower_old_dir_dentry = dget_parent(lower_old_dentry); @@ -332,15 +282,11 @@ static int sharefs_setattr(struct dentry *dentry, struct iattr *ia) err = setattr_prepare(dentry, ia); if (err) - return err; + goto out_err; - err = sharefs_get_lower_path(dentry, &lower_path, 0); - if (err) - return err; + sharefs_get_lower_path(dentry, &lower_path); lower_dentry = lower_path.dentry; - err = sharefs_get_lower_inode(dentry, &lower_inode); - if (err) - goto out_err; + lower_inode = sharefs_lower_inode(inode); /* prepare our own lower struct iattr (with the lower file) */ memcpy(&lower_ia, ia, sizeof(lower_ia)); @@ -393,9 +339,8 @@ static int sharefs_setattr(struct dentry *dentry, struct iattr *ia) */ out: - iput(lower_inode); -out_err: sharefs_put_lower_path(dentry, &lower_path); +out_err: return err; } #endif diff --git a/fs/sharefs/lookup.c b/fs/sharefs/lookup.c index 9d5536c83161..0b0c30ef46f1 100644 --- a/fs/sharefs/lookup.c +++ b/fs/sharefs/lookup.c @@ -141,7 +141,6 @@ struct inode *sharefs_iget(struct super_block *sb, struct inode *lower_inode) fsstack_copy_inode_size(inode, lower_inode); unlock_new_inode(inode); - iput(lower_inode); return inode; } @@ -200,14 +199,14 @@ int sharefs_interpose(struct dentry *dentry, struct super_block *sb, */ static struct dentry *__sharefs_lookup(struct dentry *dentry, unsigned int flags, - struct path *lower_parent_path, - struct path *lower_path) + struct path *lower_parent_path) { int err = 0; struct vfsmount *lower_dir_mnt; struct dentry *lower_dir_dentry = NULL; struct dentry *lower_dentry; const char *name; + struct path lower_path; struct qstr this; struct dentry *ret_dentry = NULL; @@ -215,7 +214,7 @@ static struct dentry *__sharefs_lookup(struct dentry *dentry, d_set_d_op(dentry, &sharefs_dops); if (IS_ROOT(dentry)) - return NULL; + goto out; name = dentry->d_name.name; @@ -225,12 +224,12 @@ static struct dentry *__sharefs_lookup(struct dentry *dentry, /* Use vfs_path_lookup to check if the dentry exists or not */ err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, - lower_path); + &lower_path); /* no error: handle positive dentries */ if (!err) { - sharefs_set_lower_path(dentry, lower_path); + sharefs_set_lower_path(dentry, &lower_path); ret_dentry = - __sharefs_interpose(dentry, dentry->d_sb, lower_path); + __sharefs_interpose(dentry, dentry->d_sb, &lower_path); if (IS_ERR(ret_dentry)) { err = PTR_ERR(ret_dentry); /* path_put underlying path on error */ @@ -268,9 +267,9 @@ static struct dentry *__sharefs_lookup(struct dentry *dentry, lower_dentry, flags); setup_lower: - lower_path->dentry = lower_dentry; - lower_path->mnt = mntget(lower_dir_mnt); - sharefs_set_lower_path(dentry, lower_path); + lower_path.dentry = lower_dentry; + lower_path.mnt = mntget(lower_dir_mnt); + sharefs_set_lower_path(dentry, &lower_path); /* * If the intent is to create a file, then don't return an error, so @@ -291,31 +290,29 @@ struct dentry *sharefs_lookup(struct inode *dir, struct dentry *dentry, { int err; struct dentry *ret, *parent; - struct path lower_path; struct path lower_parent_path; #ifdef CONFIG_SHAREFS_SUPPORT_OVERRIDE const struct cred *saved_cred = NULL; + __u16 permission; #endif + parent = dget_parent(dentry); - err = sharefs_get_lower_path(parent, &lower_parent_path, 0); - if (err) { - dput(parent); - return ERR_PTR(err); - } + sharefs_get_lower_path(parent, &lower_parent_path); #ifdef CONFIG_SHAREFS_SUPPORT_OVERRIDE - saved_cred = sharefs_override_file_fsids(dir); + saved_cred = sharefs_override_file_fsids(dir, &permission); if (!saved_cred) { ret = ERR_PTR(-ENOMEM); goto out_err; } #endif + /* allocate dentry private data. We free it in ->d_release */ err = new_dentry_private_data(dentry); if (err) { ret = ERR_PTR(err); goto out; } - ret = __sharefs_lookup(dentry, flags, &lower_parent_path, &lower_path); + ret = __sharefs_lookup(dentry, flags, &lower_parent_path); if (IS_ERR(ret)) { sharefs_err("sharefs_lookup error!"); goto out; @@ -330,7 +327,6 @@ struct dentry *sharefs_lookup(struct inode *dir, struct dentry *dentry, fsstack_copy_attr_atime(d_inode(parent), sharefs_lower_inode(d_inode(parent))); fixup_perm_from_level(d_inode(parent), dentry); - sharefs_put_lower_path(dentry, &lower_path); out: #ifdef CONFIG_SHAREFS_SUPPORT_OVERRIDE sharefs_revert_fsids(saved_cred); diff --git a/fs/sharefs/sharefs.h b/fs/sharefs/sharefs.h index 7b5540b09d33..143f047b235f 100644 --- a/fs/sharefs/sharefs.h +++ b/fs/sharefs/sharefs.h @@ -91,9 +91,6 @@ extern int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, struct path *path); extern int sharefs_parse_options(struct sharefs_sb_info *sbi, const char *data); -extern int sharefs_get_lower_inode(struct dentry *d, struct inode **lower_path); -extern int sharefs_get_lower_path(struct dentry *d, struct path *lower_path, - bool try_to_create); /* * inode to private data @@ -157,18 +154,16 @@ static inline void pathcpy(struct path *dst, const struct path *src) dst->dentry = src->dentry; dst->mnt = src->mnt; } - -static inline void sharefs_get_lower_root_path(const struct dentry *dent, - struct path *lower_root_path) +/* Returns struct path. Caller must path_put it. */ +static inline void sharefs_get_lower_path(const struct dentry *dent, + struct path *lower_path) { - struct super_block *sb = dent->d_sb; - - spin_lock(&SHAREFS_D(sb->s_root)->lock); - pathcpy(lower_root_path, &SHAREFS_D(sb->s_root)->lower_path); - path_get(lower_root_path); - spin_unlock(&SHAREFS_D(sb->s_root)->lock); + spin_lock(&SHAREFS_D(dent)->lock); + pathcpy(lower_path, &SHAREFS_D(dent)->lower_path); + path_get(lower_path); + spin_unlock(&SHAREFS_D(dent)->lock); + return; } - static inline void sharefs_put_lower_path(const struct dentry *dent, struct path *lower_path) { diff --git a/fs/sharefs/super.c b/fs/sharefs/super.c index b7cd611adcfd..bbe65944647f 100644 --- a/fs/sharefs/super.c +++ b/fs/sharefs/super.c @@ -99,9 +99,7 @@ static int sharefs_statfs(struct dentry *dentry, struct kstatfs *buf) int err; struct path lower_path; - err = sharefs_get_lower_path(dentry, &lower_path, 0); - if (err) - return err; + sharefs_get_lower_path(dentry, &lower_path); err = vfs_statfs(&lower_path, buf); sharefs_put_lower_path(dentry, &lower_path); @@ -119,13 +117,17 @@ static int sharefs_statfs(struct dentry *dentry, struct kstatfs *buf) */ static void sharefs_evict_inode(struct inode *inode) { + struct inode *lower_inode; + truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); /* * Decrement a reference to a lower_inode, which was incremented * by our read_inode when it was created initially. */ + lower_inode = sharefs_lower_inode(inode); sharefs_set_lower_inode(inode, NULL); + iput(lower_inode); } void __sharefs_log(const char *level, const bool ratelimited, -- Gitee From 3d0e9e0eb3abf54e25d8b55e7cdcc8cee4566286 Mon Sep 17 00:00:00 2001 From: jidong Date: Thu, 19 Sep 2024 15:35:28 +0800 Subject: [PATCH 002/116] cherry pick ac106d8 from https://gitee.com/JiDong-CS/kernel_linux_5.10_1/pulls/1614 fix the remove_node_by_token bug Signed-off-by: jidong Change-Id: I0d3c3dbfd28dcce853d242f4155e3bf785964b9c --- drivers/accesstokenid/access_tokenid.c | 27 ++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/accesstokenid/access_tokenid.c b/drivers/accesstokenid/access_tokenid.c index f82703cd2f39..1f0134a7e761 100644 --- a/drivers/accesstokenid/access_tokenid.c +++ b/drivers/accesstokenid/access_tokenid.c @@ -160,11 +160,15 @@ static void find_node_by_token(struct token_perm_node *root_node, uint32_t token } } -static int add_node_to_tree(struct token_perm_node *root_node, struct token_perm_node *node) +static int add_node_to_tree(struct token_perm_node **root_node, struct token_perm_node *node) { + if (root_node == NULL) { + pr_err("%s: invalid root_node.\n", __func__); + return -EINVAL; + } struct token_perm_node *target_node = NULL; struct token_perm_node *parent_node = NULL; - find_node_by_token(root_node, node->perm_data.token, &target_node, &parent_node); + find_node_by_token(*root_node, node->perm_data.token, &target_node, &parent_node); if (target_node != NULL) { target_node->perm_data = node->perm_data; return 0; @@ -174,7 +178,7 @@ static int add_node_to_tree(struct token_perm_node *root_node, struct token_perm return -EDQUOT; } if (parent_node == NULL) { - g_token_perm_root = node; + *root_node = node; } else if (parent_node->perm_data.token > node->perm_data.token) { parent_node->left = node; } else { @@ -184,11 +188,15 @@ static int add_node_to_tree(struct token_perm_node *root_node, struct token_perm return 1; } -static struct token_perm_node *remove_node_by_token(struct token_perm_node *root_node, uint32_t token) +static struct token_perm_node *remove_node_by_token(struct token_perm_node **root_node, uint32_t token) { + if (root_node == NULL) { + pr_err("%s: invalid root_node.\n", __func__); + return NULL; + } struct token_perm_node *target_node = NULL; struct token_perm_node *parent_node = NULL; - find_node_by_token(root_node, token, &target_node, &parent_node); + find_node_by_token(*root_node, token, &target_node, &parent_node); if (target_node == NULL) { pr_err("%s: target token to be removed not found.\n", __func__); return NULL; @@ -196,7 +204,7 @@ static struct token_perm_node *remove_node_by_token(struct token_perm_node *root struct token_perm_node **new_node_addr = NULL; if (parent_node == NULL) { - new_node_addr = &root_node; + new_node_addr = root_node; } else if (parent_node->perm_data.token > token) { new_node_addr = &(parent_node->left); } else { @@ -226,7 +234,7 @@ int access_tokenid_add_permission(struct file *file, void __user *uarg) } write_lock(&token_rwlock); - int ret = add_node_to_tree(g_token_perm_root, node); + int ret = add_node_to_tree(&g_token_perm_root, node); write_unlock(&token_rwlock); if (ret <= 0) { kmem_cache_free(g_cache, node); @@ -245,11 +253,10 @@ int access_tokenid_remove_permission(struct file *file, void __user *uarg) return -EFAULT; write_lock(&token_rwlock); - struct token_perm_node *target_node = remove_node_by_token(g_token_perm_root, token); - write_unlock(&token_rwlock); - + struct token_perm_node *target_node = remove_node_by_token(&g_token_perm_root, token); if (target_node != NULL) kmem_cache_free(g_cache, target_node); + write_unlock(&token_rwlock); return 0; } -- Gitee From 8d8373cb2449c1f4c7846385a340ac20fe6a1547 Mon Sep 17 00:00:00 2001 From: y30045862 Date: Wed, 18 Sep 2024 14:56:48 +0800 Subject: [PATCH 003/116] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcommand=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E4=BC=A0=E9=80=92=E9=94=99=E8=AF=AF=E9=97=AE=E9=A2=98?= =?UTF-8?q?=20=EF=BC=88cherry=20picked=20commit=20from=20=20Signed-off-by:=20yangjingbo10=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fs/hmdfs/comm/socket_adapter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hmdfs/comm/socket_adapter.c b/fs/hmdfs/comm/socket_adapter.c index 6ed430268d84..b9f35b9e1626 100644 --- a/fs/hmdfs/comm/socket_adapter.c +++ b/fs/hmdfs/comm/socket_adapter.c @@ -281,7 +281,7 @@ static struct hmdfs_msg_parasite *mp_alloc(struct hmdfs_peer *peer, return ERR_PTR(-ENOMEM); ret = hmdfs_alloc_msg_idr(peer, MSG_IDR_MESSAGE_ASYNC, mp, - mp->head.send_cmd_operations); + req->operations); if (unlikely(ret)) { kfree(mp); return ERR_PTR(ret); -- Gitee From 726edab7802b0c1c39f768883efc01cb9346b8cc Mon Sep 17 00:00:00 2001 From: tl Date: Thu, 31 Oct 2024 10:28:34 +0800 Subject: [PATCH 004/116] sharefs setattr not allow chmod or chown Signed-off-by: tl --- fs/sharefs/inode.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/sharefs/inode.c b/fs/sharefs/inode.c index 7bd08abe79ea..b5808830365f 100644 --- a/fs/sharefs/inode.c +++ b/fs/sharefs/inode.c @@ -308,12 +308,7 @@ static int sharefs_setattr(struct dentry *dentry, struct iattr *ia) truncate_setsize(inode, ia->ia_size); } - /* - * mode change is for clearing setuid/setgid bits. Allow lower fs - * to interpret this in its own way. - */ - if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) - lower_ia.ia_valid &= ~ATTR_MODE; + lower_ia.ia_valid &= ~(ATTR_MODE|ATTR_UID|ATTR_GID); /* notify the (possibly copied-up) lower inode */ /* -- Gitee From 9a7241b3f8d3a050bacf15680160d297f2ef03bc Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Tue, 8 Oct 2024 11:24:56 +0000 Subject: [PATCH 005/116] cgroup/bpf: use a dedicated workqueue for cgroup bpf destruction stable inclusion from stable-v6.6.60 commit 0d86cd70fc6a7ba18becb52ad8334d5ad3eca530 category: bugfix issue: #IBBMZ5 CVE: CVE-2024-53054 Signed-off-by: May27May --------------------------------------- [ Upstream commit 117932eea99b729ee5d12783601a4f7f5fd58a23 ] A hung_task problem shown below was found: INFO: task kworker/0:0:8 blocked for more than 327 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. Workqueue: events cgroup_bpf_release Call Trace: __schedule+0x5a2/0x2050 ? find_held_lock+0x33/0x100 ? wq_worker_sleeping+0x9e/0xe0 schedule+0x9f/0x180 schedule_preempt_disabled+0x25/0x50 __mutex_lock+0x512/0x740 ? cgroup_bpf_release+0x1e/0x4d0 ? cgroup_bpf_release+0xcf/0x4d0 ? process_scheduled_works+0x161/0x8a0 ? cgroup_bpf_release+0x1e/0x4d0 ? mutex_lock_nested+0x2b/0x40 ? __pfx_delay_tsc+0x10/0x10 mutex_lock_nested+0x2b/0x40 cgroup_bpf_release+0xcf/0x4d0 ? process_scheduled_works+0x161/0x8a0 ? trace_event_raw_event_workqueue_execute_start+0x64/0xd0 ? process_scheduled_works+0x161/0x8a0 process_scheduled_works+0x23a/0x8a0 worker_thread+0x231/0x5b0 ? __pfx_worker_thread+0x10/0x10 kthread+0x14d/0x1c0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x59/0x70 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 This issue can be reproduced by the following pressuse test: 1. A large number of cpuset cgroups are deleted. 2. Set cpu on and off repeatly. 3. Set watchdog_thresh repeatly. The scripts can be obtained at LINK mentioned above the signature. The reason for this issue is cgroup_mutex and cpu_hotplug_lock are acquired in different tasks, which may lead to deadlock. It can lead to a deadlock through the following steps: 1. A large number of cpusets are deleted asynchronously, which puts a large number of cgroup_bpf_release works into system_wq. The max_active of system_wq is WQ_DFL_ACTIVE(256). Consequently, all active works are cgroup_bpf_release works, and many cgroup_bpf_release works will be put into inactive queue. As illustrated in the diagram, there are 256 (in the acvtive queue) + n (in the inactive queue) works. 2. Setting watchdog_thresh will hold cpu_hotplug_lock.read and put smp_call_on_cpu work into system_wq. However step 1 has already filled system_wq, 'sscs.work' is put into inactive queue. 'sscs.work' has to wait until the works that were put into the inacvtive queue earlier have executed (n cgroup_bpf_release), so it will be blocked for a while. 3. Cpu offline requires cpu_hotplug_lock.write, which is blocked by step 2. 4. Cpusets that were deleted at step 1 put cgroup_release works into cgroup_destroy_wq. They are competing to get cgroup_mutex all the time. When cgroup_metux is acqured by work at css_killed_work_fn, it will call cpuset_css_offline, which needs to acqure cpu_hotplug_lock.read. However, cpuset_css_offline will be blocked for step 3. 5. At this moment, there are 256 works in active queue that are cgroup_bpf_release, they are attempting to acquire cgroup_mutex, and as a result, all of them are blocked. Consequently, sscs.work can not be executed. Ultimately, this situation leads to four processes being blocked, forming a deadlock. system_wq(step1) WatchDog(step2) cpu offline(step3) cgroup_destroy_wq(step4) ... 2000+ cgroups deleted asyn 256 actives + n inactives __lockup_detector_reconfigure P(cpu_hotplug_lock.read) put sscs.work into system_wq 256 + n + 1(sscs.work) sscs.work wait to be executed warting sscs.work finish percpu_down_write P(cpu_hotplug_lock.write) ...blocking... css_killed_work_fn P(cgroup_mutex) cpuset_css_offline P(cpu_hotplug_lock.read) ...blocking... 256 cgroup_bpf_release mutex_lock(&cgroup_mutex); ..blocking... To fix the problem, place cgroup_bpf_release works on a dedicated workqueue which can break the loop and solve the problem. System wqs are for misc things which shouldn't create a large number of concurrent work items. If something is going to generate >WQ_DFL_ACTIVE(256) concurrent work items, it should use its own dedicated workqueue. Fixes: 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself") Cc: stable@vger.kernel.org # v5.3+ Link: https://lore.kernel.org/cgroups/e90c32d2-2a85-4f28-9154-09c7d320cb60@huawei.com/T/#t Tested-by: Vishal Chourasia Signed-off-by: Chen Ridong Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin Signed-off-by: May27May Conflicts: kernel/bpf/cgroup.c --- kernel/bpf/cgroup.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 54321df6cfac..8de3366fdb70 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -22,6 +22,23 @@ DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); EXPORT_SYMBOL(cgroup_bpf_enabled_key); +/* + * cgroup bpf destruction makes heavy use of work items and there can be a lot + * of concurrent destructions. Use a separate workqueue so that cgroup bpf + * destruction work items don't end up filling up max_active of system_wq + * which may lead to deadlock. + */ +static struct workqueue_struct *cgroup_bpf_destroy_wq; + +static int __init cgroup_bpf_wq_init(void) +{ + cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1); + if (!cgroup_bpf_destroy_wq) + panic("Failed to alloc workqueue for cgroup bpf destroy.\n"); + return 0; +} +core_initcall(cgroup_bpf_wq_init); + void cgroup_bpf_offline(struct cgroup *cgrp) { cgroup_get(cgrp); -- Gitee From 9cd4f9507ae05f1d1f10c40ed582c1ec04cc259a Mon Sep 17 00:00:00 2001 From: Xinqi Zhang Date: Wed, 16 Oct 2024 14:13:38 +0800 Subject: [PATCH 006/116] firmware: arm_scmi: Fix slab-use-after-free in scmi_bus_notifier() mainline inclusion from mainline-v6.12-rc7 commit 295416091e44806760ccf753aeafdafc0ae268f3 category: bugfix issue: #IBBMZ5 CVE: CVE-2024-53068 Signed-off-by: May27May --------------------------------------- The scmi_dev->name is released prematurely in __scmi_device_destroy(), which causes slab-use-after-free when accessing scmi_dev->name in scmi_bus_notifier(). So move the release of scmi_dev->name to scmi_device_release() to avoid slab-use-after-free. | BUG: KASAN: slab-use-after-free in strncmp+0xe4/0xec | Read of size 1 at addr ffffff80a482bcc0 by task swapper/0/1 | | CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.6.38-debug #1 | Hardware name: Qualcomm Technologies, Inc. SA8775P Ride (DT) | Call trace: | dump_backtrace+0x94/0x114 | show_stack+0x18/0x24 | dump_stack_lvl+0x48/0x60 | print_report+0xf4/0x5b0 | kasan_report+0xa4/0xec | __asan_report_load1_noabort+0x20/0x2c | strncmp+0xe4/0xec | scmi_bus_notifier+0x5c/0x54c | notifier_call_chain+0xb4/0x31c | blocking_notifier_call_chain+0x68/0x9c | bus_notify+0x54/0x78 | device_del+0x1bc/0x840 | device_unregister+0x20/0xb4 | __scmi_device_destroy+0xac/0x280 | scmi_device_destroy+0x94/0xd0 | scmi_chan_setup+0x524/0x750 | scmi_probe+0x7fc/0x1508 | platform_probe+0xc4/0x19c | really_probe+0x32c/0x99c | __driver_probe_device+0x15c/0x3c4 | driver_probe_device+0x5c/0x170 | __driver_attach+0x1c8/0x440 | bus_for_each_dev+0xf4/0x178 | driver_attach+0x3c/0x58 | bus_add_driver+0x234/0x4d4 | driver_register+0xf4/0x3c0 | __platform_driver_register+0x60/0x88 | scmi_driver_init+0xb0/0x104 | do_one_initcall+0xb4/0x664 | kernel_init_freeable+0x3c8/0x894 | kernel_init+0x24/0x1e8 | ret_from_fork+0x10/0x20 | | Allocated by task 1: | kasan_save_stack+0x2c/0x54 | kasan_set_track+0x2c/0x40 | kasan_save_alloc_info+0x24/0x34 | __kasan_kmalloc+0xa0/0xb8 | __kmalloc_node_track_caller+0x6c/0x104 | kstrdup+0x48/0x84 | kstrdup_const+0x34/0x40 | __scmi_device_create.part.0+0x8c/0x408 | scmi_device_create+0x104/0x370 | scmi_chan_setup+0x2a0/0x750 | scmi_probe+0x7fc/0x1508 | platform_probe+0xc4/0x19c | really_probe+0x32c/0x99c | __driver_probe_device+0x15c/0x3c4 | driver_probe_device+0x5c/0x170 | __driver_attach+0x1c8/0x440 | bus_for_each_dev+0xf4/0x178 | driver_attach+0x3c/0x58 | bus_add_driver+0x234/0x4d4 | driver_register+0xf4/0x3c0 | __platform_driver_register+0x60/0x88 | scmi_driver_init+0xb0/0x104 | do_one_initcall+0xb4/0x664 | kernel_init_freeable+0x3c8/0x894 | kernel_init+0x24/0x1e8 | ret_from_fork+0x10/0x20 | | Freed by task 1: | kasan_save_stack+0x2c/0x54 | kasan_set_track+0x2c/0x40 | kasan_save_free_info+0x38/0x5c | __kasan_slab_free+0xe8/0x164 | __kmem_cache_free+0x11c/0x230 | kfree+0x70/0x130 | kfree_const+0x20/0x40 | __scmi_device_destroy+0x70/0x280 | scmi_device_destroy+0x94/0xd0 | scmi_chan_setup+0x524/0x750 | scmi_probe+0x7fc/0x1508 | platform_probe+0xc4/0x19c | really_probe+0x32c/0x99c | __driver_probe_device+0x15c/0x3c4 | driver_probe_device+0x5c/0x170 | __driver_attach+0x1c8/0x440 | bus_for_each_dev+0xf4/0x178 | driver_attach+0x3c/0x58 | bus_add_driver+0x234/0x4d4 | driver_register+0xf4/0x3c0 | __platform_driver_register+0x60/0x88 | scmi_driver_init+0xb0/0x104 | do_one_initcall+0xb4/0x664 | kernel_init_freeable+0x3c8/0x894 | kernel_init+0x24/0x1e8 | ret_from_fork+0x10/0x20 Fixes: ee7a9c9f67c5 ("firmware: arm_scmi: Add support for multiple device per protocol") Signed-off-by: Xinqi Zhang Reviewed-by: Cristian Marussi Reviewed-by: Bjorn Andersson Message-Id: <20241016-fix-arm-scmi-slab-use-after-free-v2-1-1783685ef90d@quicinc.com> Signed-off-by: Sudeep Holla Signed-off-by: May27May Conflicts: drivers/firmware/arm_scmi/bus.c --- drivers/firmware/arm_scmi/bus.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/arm_scmi/bus.c b/drivers/firmware/arm_scmi/bus.c index def8a84d1611..14e253f50f3b 100644 --- a/drivers/firmware/arm_scmi/bus.c +++ b/drivers/firmware/arm_scmi/bus.c @@ -137,7 +137,10 @@ EXPORT_SYMBOL_GPL(scmi_driver_unregister); static void scmi_device_release(struct device *dev) { - kfree(to_scmi_dev(dev)); + struct scmi_device *scmi_dev = to_scmi_dev(dev); + + kfree_const(scmi_dev->name); + kfree(scmi_dev); } struct scmi_device * @@ -178,7 +181,6 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol, return scmi_dev; put_dev: - kfree_const(scmi_dev->name); put_device(&scmi_dev->dev); ida_simple_remove(&scmi_bus_id, id); return NULL; @@ -186,7 +188,6 @@ scmi_device_create(struct device_node *np, struct device *parent, int protocol, void scmi_device_destroy(struct scmi_device *scmi_dev) { - kfree_const(scmi_dev->name); scmi_handle_put(scmi_dev->handle); ida_simple_remove(&scmi_bus_id, scmi_dev->id); device_unregister(&scmi_dev->dev); -- Gitee From 7352ca99d854d611576ade5da0bdbcf3e486302c Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 29 Oct 2024 15:44:35 +0100 Subject: [PATCH 007/116] HID: core: zero-initialize the report buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.230 commit d7dc68d82ab3fcfc3f65322465da3d7031d4ab46 category: bugfix issue: #IBBMZ5 CVE: CVE-2024-50302 Signed-off-by: May27May --------------------------------------- [ Upstream commit 177f25d1292c7e16e1199b39c85480f7f8815552 ] Since the report buffer is used by all kinds of drivers in various ways, let's zero-initialize it during allocation to make sure that it can't be ever used to leak kernel memory via specially-crafted report. Fixes: 27ce405039bf ("HID: fix data access in implement()") Reported-by: Benoît Sevens Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: May27May --- drivers/hid/hid-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 476967ab6294..fefbd0cc05ea 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1665,7 +1665,7 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags) u32 len = hid_report_len(report) + 7; - return kmalloc(len, flags); + return kzalloc(len, flags); } EXPORT_SYMBOL_GPL(hid_alloc_report_buf); -- Gitee From 297b070301e2ce78f54c604767c272a193ac5026 Mon Sep 17 00:00:00 2001 From: Yin Fengwei Date: Sat, 29 Apr 2023 16:27:58 +0800 Subject: [PATCH 008/116] THP: avoid lock when check whether THP is in deferred list mainline inclusion from mainline-v6.5-rc1 commit deedad80f660af8199ea3b3f70939f2d226b9154 category: bugfix issue: #IBBMZ5 CVE: CVE-2024-53079 Signed-off-by: May27May --------------------------------------- free_transhuge_page() acquires split queue lock then check whether the THP was added to deferred list or not. It brings high deferred queue lock contention. It's safe to check whether the THP is in deferred list or not without holding the deferred queue lock in free_transhuge_page() because when code hit free_transhuge_page(), there is no one tries to add the folio to _deferred_list. Running page_fault1 of will-it-scale + order 2 folio for anonymous mapping with 96 processes on an Ice Lake 48C/96T test box, we could see the 61% split_queue_lock contention: - 63.02% 0.01% page_fault1_pro [kernel.kallsyms] [k] free_transhuge_page - 63.01% free_transhuge_page + 62.91% _raw_spin_lock_irqsave With this patch applied, the split_queue_lock contention is less than 1%. Link: https://lkml.kernel.org/r/20230429082759.1600796-2-fengwei.yin@intel.com Signed-off-by: Yin Fengwei Acked-by: Kirill A. Shutemov Reviewed-by: "Huang, Ying" Cc: Matthew Wilcox Cc: Ryan Roberts Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: May27May Conflicts: mm/huge_memory.c --- mm/huge_memory.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e4c690c21fc9..9dc57f5c4f36 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2773,12 +2773,19 @@ void free_transhuge_page(struct page *page) struct deferred_split *ds_queue = get_deferred_split_queue(page); unsigned long flags; - spin_lock_irqsave(&ds_queue->split_queue_lock, flags); - if (!list_empty(page_deferred_list(page))) { - ds_queue->split_queue_len--; - list_del(page_deferred_list(page)); + /* + * At this point, there is no one trying to add the folio to + * deferred_list. If folio is not in deferred_list, it's safe + * to check without acquiring the split_queue_lock. + */ + if (data_race(!list_empty(page_deferred_list(page))) { + spin_lock_irqsave(&ds_queue->split_queue_lock, flags); + if (!list_empty(page_deferred_list(page))) { + ds_queue->split_queue_len--; + list_del(page_deferred_list(page)); + } + spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); } - spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); free_compound_page(page); } -- Gitee From c7cfcf8851cb70a1fdaf8b09f9bc4ef356e6cb88 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 27 Oct 2024 13:02:13 -0700 Subject: [PATCH 009/116] mm/thp: fix deferred split unqueue naming and locking mainline inclusion from mainline-v6.12-rc7 commit f8f931bba0f92052cf842b7e30917b1afcc77d5a category: bugfix issue: #IBBMZ5 CVE: CVE-2024-53079 Signed-off-by: May27May --------------------------------------- Recent changes are putting more pressure on THP deferred split queues: under load revealing long-standing races, causing list_del corruptions, "Bad page state"s and worse (I keep BUGs in both of those, so usually don't get to see how badly they end up without). The relevant recent changes being 6.8's mTHP, 6.10's mTHP swapout, and 6.12's mTHP swapin, improved swap allocation, and underused THP splitting. Before fixing locking: rename misleading folio_undo_large_rmappable(), which does not undo large_rmappable, to folio_unqueue_deferred_split(), which is what it does. But that and its out-of-line __callee are mm internals of very limited usability: add comment and WARN_ON_ONCEs to check usage; and return a bool to say if a deferred split was unqueued, which can then be used in WARN_ON_ONCEs around safety checks (sparing callers the arcane conditionals in __folio_unqueue_deferred_split()). Just omit the folio_unqueue_deferred_split() from free_unref_folios(), all of whose callers now call it beforehand (and if any forget then bad_page() will tell) - except for its caller put_pages_list(), which itself no longer has any callers (and will be deleted separately). Swapout: mem_cgroup_swapout() has been resetting folio->memcg_data 0 without checking and unqueueing a THP folio from deferred split list; which is unfortunate, since the split_queue_lock depends on the memcg (when memcg is enabled); so swapout has been unqueueing such THPs later, when freeing the folio, using the pgdat's lock instead: potentially corrupting the memcg's list. __remove_mapping() has frozen refcount to 0 here, so no problem with calling folio_unqueue_deferred_split() before resetting memcg_data. That goes back to 5.4 commit 87eaceb3faa5 ("mm: thp: make deferred split shrinker memcg aware"): which included a check on swapcache before adding to deferred queue, but no check on deferred queue before adding THP to swapcache. That worked fine with the usual sequence of events in reclaim (though there were a couple of rare ways in which a THP on deferred queue could have been swapped out), but 6.12 commit dafff3f4c850 ("mm: split underused THPs") avoids splitting underused THPs in reclaim, which makes swapcache THPs on deferred queue commonplace. Keep the check on swapcache before adding to deferred queue? Yes: it is no longer essential, but preserves the existing behaviour, and is likely to be a worthwhile optimization (vmstat showed much more traffic on the queue under swapping load if the check was removed); update its comment. Memcg-v1 move (deprecated): mem_cgroup_move_account() has been changing folio->memcg_data without checking and unqueueing a THP folio from the deferred list, sometimes corrupting "from" memcg's list, like swapout. Refcount is non-zero here, so folio_unqueue_deferred_split() can only be used in a WARN_ON_ONCE to validate the fix, which must be done earlier: mem_cgroup_move_charge_pte_range() first try to split the THP (splitting of course unqueues), or skip it if that fails. Not ideal, but moving charge has been requested, and khugepaged should repair the THP later: nobody wants new custom unqueueing code just for this deprecated case. The 87eaceb3faa5 commit did have the code to move from one deferred list to another (but was not conscious of its unsafety while refcount non-0); but that was removed by 5.6 commit fac0516b5534 ("mm: thp: don't need care deferred split queue in memcg charge move path"), which argued that the existence of a PMD mapping guarantees that the THP cannot be on a deferred list. As above, false in rare cases, and now commonly false. Backport to 6.11 should be straightforward. Earlier backports must take care that other _deferred_list fixes and dependencies are included. There is not a strong case for backports, but they can fix cornercases. Link: https://lkml.kernel.org/r/8dc111ae-f6db-2da7-b25c-7a20b1effe3b@google.com Fixes: 87eaceb3faa5 ("mm: thp: make deferred split shrinker memcg aware") Fixes: dafff3f4c850 ("mm: split underused THPs") Signed-off-by: Hugh Dickins Acked-by: David Hildenbrand Reviewed-by: Yang Shi Cc: Baolin Wang Cc: Barry Song Cc: Chris Li Cc: Johannes Weiner Cc: Kefeng Wang Cc: Kirill A. Shutemov Cc: Matthew Wilcox (Oracle) Cc: Nhat Pham Cc: Ryan Roberts Cc: Shakeel Butt Cc: Usama Arif Cc: Wei Yang Cc: Zi Yan Cc: Signed-off-by: Andrew Morton Signed-off-by: May27May Conflicts: mm/huge_memory.c mm/internal.h mm/memcontrol-v1.c mm/memcontrol.c mm/migrate.c mm/page_alloc.c mm/swap.c mm/vmscan.c --- mm/huge_memory.c | 9 +++++++-- mm/internal.h | 14 ++++++++++++++ mm/memcontrol.c | 1 + 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9dc57f5c4f36..97a42d154111 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2768,7 +2768,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) return ret; } -void free_transhuge_page(struct page *page) +void __page_unqueue_deferred_split(struct page *page) { struct deferred_split *ds_queue = get_deferred_split_queue(page); unsigned long flags; @@ -2782,10 +2782,15 @@ void free_transhuge_page(struct page *page) spin_lock_irqsave(&ds_queue->split_queue_lock, flags); if (!list_empty(page_deferred_list(page))) { ds_queue->split_queue_len--; - list_del(page_deferred_list(page)); + list_del_init(page_deferred_list(page)); } spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags); } +} + +void free_transhuge_page(struct page *page) +{ + __page_unqueue_deferred_split(page); free_compound_page(page); } diff --git a/mm/internal.h b/mm/internal.h index 9fc8978e62f3..e85e249a4d9b 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -801,4 +801,18 @@ void reclaimacct_collect_data(void); void reclaimacct_collect_reclaim_efficiency(void); #endif +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void __page_unqueue_deferred_split(struct page *page); +static inline void page_unqueue_deferred_split(struct page *page) +{ + if (!PageTransCompound(page)) + return; + + __page_unqueue_deferred_split(page); +} +#else +static inline void page_unqueue_deferred_split(struct page *page) +{ +} +#endif #endif /* __MM_INTERNAL_H */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ead591002b0c..dc3e442cd911 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7320,6 +7320,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) VM_BUG_ON_PAGE(oldid, page); mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); + page_unqueue_deferred_split(page); page->mem_cgroup = NULL; if (!mem_cgroup_is_root(memcg)) -- Gitee From 6ff2b8748925bd92b82503be47d8b9e45c6fff01 Mon Sep 17 00:00:00 2001 From: junhua huang Date: Fri, 2 Dec 2022 15:11:10 +0800 Subject: [PATCH 010/116] arm64:uprobe fix the uprobe SWBP_INSN in big-endian MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.229 commit 7f1ef59185d23450d77e94671c0abb03f8978c01 category: bugfix issue: #IBBMZ5 CVE: CVE-2024-50194 Signed-off-by: May27May --------------------------------------- [ Upstream commit 60f07e22a73d318cddaafa5ef41a10476807cc07 ] We use uprobe in aarch64_be, which we found the tracee task would exit due to SIGILL when we enable the uprobe trace. We can see the replace inst from uprobe is not correct in aarch big-endian. As in Armv8-A, instruction fetches are always treated as little-endian, we should treat the UPROBE_SWBP_INSN as little-endian。 The test case is as following。 bash-4.4# ./mqueue_test_aarchbe 1 1 2 1 10 > /dev/null & bash-4.4# cd /sys/kernel/debug/tracing/ bash-4.4# echo 'p:test /mqueue_test_aarchbe:0xc30 %x0 %x1' > uprobe_events bash-4.4# echo 1 > events/uprobes/enable bash-4.4# bash-4.4# ps PID TTY TIME CMD 140 ? 00:00:01 bash 237 ? 00:00:00 ps [1]+ Illegal instruction ./mqueue_test_aarchbe 1 1 2 1 100 > /dev/null which we debug use gdb as following: bash-4.4# gdb attach 155 (gdb) disassemble send Dump of assembler code for function send: 0x0000000000400c30 <+0>: .inst 0xa00020d4 ; undefined 0x0000000000400c34 <+4>: mov x29, sp 0x0000000000400c38 <+8>: str w0, [sp, #28] 0x0000000000400c3c <+12>: strb w1, [sp, #27] 0x0000000000400c40 <+16>: str xzr, [sp, #40] 0x0000000000400c44 <+20>: str xzr, [sp, #48] 0x0000000000400c48 <+24>: add x0, sp, #0x1b 0x0000000000400c4c <+28>: mov w3, #0x0 // #0 0x0000000000400c50 <+32>: mov x2, #0x1 // #1 0x0000000000400c54 <+36>: mov x1, x0 0x0000000000400c58 <+40>: ldr w0, [sp, #28] 0x0000000000400c5c <+44>: bl 0x405e10 0x0000000000400c60 <+48>: str w0, [sp, #60] 0x0000000000400c64 <+52>: ldr w0, [sp, #60] 0x0000000000400c68 <+56>: ldp x29, x30, [sp], #64 0x0000000000400c6c <+60>: ret End of assembler dump. (gdb) info b No breakpoints or watchpoints. (gdb) c Continuing. Program received signal SIGILL, Illegal instruction. 0x0000000000400c30 in send () (gdb) x/10x 0x400c30 0x400c30 : 0xd42000a0 0xfd030091 0xe01f00b9 0xe16f0039 0x400c40 : 0xff1700f9 0xff1b00f9 0xe06f0091 0x03008052 0x400c50 : 0x220080d2 0xe10300aa (gdb) disassemble 0x400c30 Dump of assembler code for function send: => 0x0000000000400c30 <+0>: .inst 0xa00020d4 ; undefined 0x0000000000400c34 <+4>: mov x29, sp 0x0000000000400c38 <+8>: str w0, [sp, #28] 0x0000000000400c3c <+12>: strb w1, [sp, #27] 0x0000000000400c40 <+16>: str xzr, [sp, #40] Signed-off-by: junhua huang Link: https://lore.kernel.org/r/202212021511106844809@zte.com.cn Signed-off-by: Will Deacon Stable-dep-of: 13f8f1e05f1d ("arm64: probes: Fix uprobes for big-endian kernels") Signed-off-by: Sasha Levin Signed-off-by: May27May --- arch/arm64/include/asm/uprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 315eef654e39..ba4bff5ca674 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -12,7 +12,7 @@ #define MAX_UINSN_BYTES AARCH64_INSN_SIZE -#define UPROBE_SWBP_INSN BRK64_OPCODE_UPROBES +#define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE #define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES -- Gitee From 6cfb5f6dc92950fad54ca9fd8a2408feef20cdd8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 8 Oct 2024 16:58:48 +0100 Subject: [PATCH 011/116] arm64: probes: Fix uprobes for big-endian kernels stable inclusion from stable-v5.10.229 commit cf9ddf9ed94c15564a05bbf6e9f18dffa0c7df80 category: bugfix issue: #IBBMZ5 CVE: CVE-2024-50194 Signed-off-by: May27May --------------------------------------- [ Upstream commit 13f8f1e05f1dc36dbba6cba0ae03354c0dafcde7 ] The arm64 uprobes code is broken for big-endian kernels as it doesn't convert the in-memory instruction encoding (which is always little-endian) into the kernel's native endianness before analyzing and simulating instructions. This may result in a few distinct problems: * The kernel may may erroneously reject probing an instruction which can safely be probed. * The kernel may erroneously erroneously permit stepping an instruction out-of-line when that instruction cannot be stepped out-of-line safely. * The kernel may erroneously simulate instruction incorrectly dur to interpretting the byte-swapped encoding. The endianness mismatch isn't caught by the compiler or sparse because: * The arch_uprobe::{insn,ixol} fields are encoded as arrays of u8, so the compiler and sparse have no idea these contain a little-endian 32-bit value. The core uprobes code populates these with a memcpy() which similarly does not handle endianness. * While the uprobe_opcode_t type is an alias for __le32, both arch_uprobe_analyze_insn() and arch_uprobe_skip_sstep() cast from u8[] to the similarly-named probe_opcode_t, which is an alias for u32. Hence there is no endianness conversion warning. Fix this by changing the arch_uprobe::{insn,ixol} fields to __le32 and adding the appropriate __le32_to_cpu() conversions prior to consuming the instruction encoding. The core uprobes copies these fields as opaque ranges of bytes, and so is unaffected by this change. At the same time, remove MAX_UINSN_BYTES and consistently use AARCH64_INSN_SIZE for clarity. Tested with the following: | #include | #include | | #define noinline __attribute__((noinline)) | | static noinline void *adrp_self(void) | { | void *addr; | | asm volatile( | " adrp %x0, adrp_self\n" | " add %x0, %x0, :lo12:adrp_self\n" | : "=r" (addr)); | } | | | int main(int argc, char *argv) | { | void *ptr = adrp_self(); | bool equal = (ptr == adrp_self); | | printf("adrp_self => %p\n" | "adrp_self() => %p\n" | "%s\n", | adrp_self, ptr, equal ? "EQUAL" : "NOT EQUAL"); | | return 0; | } .... where the adrp_self() function was compiled to: | 00000000004007e0 : | 4007e0: 90000000 adrp x0, 400000 <__ehdr_start> | 4007e4: 911f8000 add x0, x0, #0x7e0 | 4007e8: d65f03c0 ret Before this patch, the ADRP is not recognized, and is assumed to be steppable, resulting in corruption of the result: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0xffffffffff7e0 | NOT EQUAL After this patch, the ADRP is correctly recognized and simulated: | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL | # | # echo 'p /root/adrp-self:0x007e0' > /sys/kernel/tracing/uprobe_events | # echo 1 > /sys/kernel/tracing/events/uprobes/enable | # ./adrp-self | adrp_self => 0x4007e0 | adrp_self() => 0x4007e0 | EQUAL Fixes: 9842ceae9fa8 ("arm64: Add uprobe support") Cc: stable@vger.kernel.org Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Link: https://lore.kernel.org/r/20241008155851.801546-4-mark.rutland@arm.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: May27May --- arch/arm64/include/asm/uprobes.h | 8 +++----- arch/arm64/kernel/probes/uprobes.c | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index ba4bff5ca674..98f29a43bfe8 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -10,11 +10,9 @@ #include #include -#define MAX_UINSN_BYTES AARCH64_INSN_SIZE - #define UPROBE_SWBP_INSN cpu_to_le32(BRK64_OPCODE_UPROBES) #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE -#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES +#define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE typedef u32 uprobe_opcode_t; @@ -23,8 +21,8 @@ struct arch_uprobe_task { struct arch_uprobe { union { - u8 insn[MAX_UINSN_BYTES]; - u8 ixol[MAX_UINSN_BYTES]; + __le32 insn; + __le32 ixol; }; struct arch_probe_insn api; bool simulate; diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 2c247634552b..8a02c549e57f 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -42,7 +42,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); switch (arm_probe_decode_insn(insn, &auprobe->api)) { case INSN_REJECTED: @@ -108,7 +108,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) if (!auprobe->simulate) return false; - insn = *(probe_opcode_t *)(&auprobe->insn[0]); + insn = le32_to_cpu(auprobe->insn); addr = instruction_pointer(regs); if (auprobe->api.handler) -- Gitee From 6662a84c81d106e1c5c0fb79757141fa07d205cc Mon Sep 17 00:00:00 2001 From: junhua huang Date: Wed, 28 Dec 2022 09:54:12 +0800 Subject: [PATCH 012/116] arm64/uprobes: change the uprobe_opcode_t typedef to fix the sparse warning stable inclusion from stable-v5.10.229 commit 4abbba7105831a4402b2c983b24503b908f39396 category: bugfix issue: #IBBMZ5 CVE: CVE-2024-50194 Signed-off-by: May27May --------------------------------------- commit ef08c0fadd8a17ebe429b85e23952dac3263ad34 upstream. After we fixed the uprobe inst endian in aarch_be, the sparse check report the following warning info: sparse warnings: (new ones prefixed by >>) >> kernel/events/uprobes.c:223:25: sparse: sparse: restricted __le32 degrades to integer >> kernel/events/uprobes.c:574:56: sparse: sparse: incorrect type in argument 4 (different base types) @@ expected unsigned int [addressable] [usertype] opcode @@ got restricted __le32 [usertype] @@ kernel/events/uprobes.c:574:56: sparse: expected unsigned int [addressable] [usertype] opcode kernel/events/uprobes.c:574:56: sparse: got restricted __le32 [usertype] >> kernel/events/uprobes.c:1483:32: sparse: sparse: incorrect type in initializer (different base types) @@ expected unsigned int [usertype] insn @@ got restricted __le32 [usertype] @@ kernel/events/uprobes.c:1483:32: sparse: expected unsigned int [usertype] insn kernel/events/uprobes.c:1483:32: sparse: got restricted __le32 [usertype] use the __le32 to u32 for uprobe_opcode_t, to keep the same. Fixes: 60f07e22a73d ("arm64:uprobe fix the uprobe SWBP_INSN in big-endian") Reported-by: kernel test robot Signed-off-by: junhua huang Link: https://lore.kernel.org/r/202212280954121197626@zte.com.cn Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman Signed-off-by: May27May --- arch/arm64/include/asm/uprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h index 98f29a43bfe8..014b02897f8e 100644 --- a/arch/arm64/include/asm/uprobes.h +++ b/arch/arm64/include/asm/uprobes.h @@ -14,7 +14,7 @@ #define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE #define UPROBE_XOL_SLOT_BYTES AARCH64_INSN_SIZE -typedef u32 uprobe_opcode_t; +typedef __le32 uprobe_opcode_t; struct arch_uprobe_task { }; -- Gitee From e131e0dbbf84c8b1dc2f5717d193cb569254d8f2 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Tue, 24 Sep 2024 14:08:43 -0700 Subject: [PATCH 013/116] bpf: sync_linked_regs() must preserve subreg_def mainline inclusion from mainline-v6.12-rc4 commit e9bd9c498cb0f5843996dbe5cbce7a1836a83c70 category: bugfix issue: #IBBMZ5 CVE: CVE-2024-53125 Signed-off-by: May27May --------------------------------------- Range propagation must not affect subreg_def marks, otherwise the following example is rewritten by verifier incorrectly when BPF_F_TEST_RND_HI32 flag is set: 0: call bpf_ktime_get_ns call bpf_ktime_get_ns 1: r0 &= 0x7fffffff after verifier r0 &= 0x7fffffff 2: w1 = w0 rewrites w1 = w0 3: if w0 < 10 goto +0 --------------> r11 = 0x2f5674a6 (r) 4: r1 >>= 32 r11 <<= 32 (r) 5: r0 = r1 r1 |= r11 (r) 6: exit; if w0 < 0xa goto pc+0 r1 >>= 32 r0 = r1 exit (or zero extension of w1 at (2) is missing for architectures that require zero extension for upper register half). The following happens w/o this patch: - r0 is marked as not a subreg at (0); - w1 is marked as subreg at (2); - w1 subreg_def is overridden at (3) by copy_register_state(); - w1 is read at (5) but mark_insn_zext() does not mark (2) for zero extension, because w1 subreg_def is not set; - because of BPF_F_TEST_RND_HI32 flag verifier inserts random value for hi32 bits of (2) (marked (r)); - this random value is read at (5). Fixes: 75748837b7e5 ("bpf: Propagate scalar ranges through register assignments.") Reported-by: Lonial Con Signed-off-by: Lonial Con Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Closes: https://lore.kernel.org/bpf/7e2aa30a62d740db182c170fdd8f81c596df280d.camel@gmail.com Link: https://lore.kernel.org/bpf/20240924210844.1758441-1-eddyz87@gmail.com Signed-off-by: May27May Conflicts: kernel/bpf/verifier.c --- kernel/bpf/verifier.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6fc79a7babd5..f907883627a8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8206,8 +8206,12 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate, struct bpf_reg_state *reg; bpf_for_each_reg_in_vstate(vstate, state, reg, ({ - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) + if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) { + s32 saved_subreg_def = reg->subreg_def; + copy_register_state(reg, known_reg); + reg->subreg_def = saved_subreg_def; + } })); } -- Gitee From 5bcd66e23b8b9335257036d534485921280aa80f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 25 Oct 2024 08:02:29 +0000 Subject: [PATCH 014/116] netfilter: nf_reject_ipv6: fix potential crash in nf_send_reset6() mainline inclusion from mainline-v6.12-rc6 commit 4ed234fe793f27a3b151c43d2106df2ff0d81aac category: bugfix issue: #IBDSFS CVE: CVE-2024-50256 Signed-off-by: wukuan --------------------------------------- I got a syzbot report without a repro [1] crashing in nf_send_reset6() I think the issue is that dev->hard_header_len is zero, and we attempt later to push an Ethernet header. Use LL_MAX_HEADER, as other functions in net/ipv6/netfilter/nf_reject_ipv6.c. [1] skbuff: skb_under_panic: text:ffffffff89b1d008 len:74 put:14 head:ffff88803123aa00 data:ffff88803123a9f2 tail:0x3c end:0x140 dev:syz_tun kernel BUG at net/core/skbuff.c:206 ! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 0 UID: 0 PID: 7373 Comm: syz.1.568 Not tainted 6.12.0-rc2-syzkaller-00631-g6d858708d465 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:skb_panic net/core/skbuff.c:206 [inline] RIP: 0010:skb_under_panic+0x14b/0x150 net/core/skbuff.c:216 Code: 0d 8d 48 c7 c6 60 a6 29 8e 48 8b 54 24 08 8b 0c 24 44 8b 44 24 04 4d 89 e9 50 41 54 41 57 41 56 e8 ba 30 38 02 48 83 c4 20 90 <0f> 0b 0f 1f 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 RSP: 0018:ffffc900045269b0 EFLAGS: 00010282 RAX: 0000000000000088 RBX: dffffc0000000000 RCX: cd66dacdc5d8e800 RDX: 0000000000000000 RSI: 0000000000000200 RDI: 0000000000000000 RBP: ffff88802d39a3d0 R08: ffffffff8174afec R09: 1ffff920008a4ccc R10: dffffc0000000000 R11: fffff520008a4ccd R12: 0000000000000140 R13: ffff88803123aa00 R14: ffff88803123a9f2 R15: 000000000000003c FS: 00007fdbee5ff6c0(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000005d322000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: skb_push+0xe5/0x100 net/core/skbuff.c:2636 eth_header+0x38/0x1f0 net/ethernet/eth.c:83 dev_hard_header include/linux/netdevice.h:3208 [inline] nf_send_reset6+0xce6/0x1270 net/ipv6/netfilter/nf_reject_ipv6.c:358 nft_reject_inet_eval+0x3b9/0x690 net/netfilter/nft_reject_inet.c:48 expr_call_ops_eval net/netfilter/nf_tables_core.c:240 [inline] nft_do_chain+0x4ad/0x1da0 net/netfilter/nf_tables_core.c:288 nft_do_chain_inet+0x418/0x6b0 net/netfilter/nft_chain_filter.c:161 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xc3/0x220 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] NF_HOOK include/linux/netfilter.h:312 [inline] br_nf_pre_routing_ipv6+0x63e/0x770 net/bridge/br_netfilter_ipv6.c:184 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_bridge_pre net/bridge/br_input.c:277 [inline] br_handle_frame+0x9fd/0x1530 net/bridge/br_input.c:424 __netif_receive_skb_core+0x13e8/0x4570 net/core/dev.c:5562 __netif_receive_skb_one_core net/core/dev.c:5666 [inline] __netif_receive_skb+0x12f/0x650 net/core/dev.c:5781 netif_receive_skb_internal net/core/dev.c:5867 [inline] netif_receive_skb+0x1e8/0x890 net/core/dev.c:5926 tun_rx_batched+0x1b7/0x8f0 drivers/net/tun.c:1550 tun_get_user+0x3056/0x47e0 drivers/net/tun.c:2007 tun_chr_write_iter+0x10d/0x1f0 drivers/net/tun.c:2053 new_sync_write fs/read_write.c:590 [inline] vfs_write+0xa6d/0xc90 fs/read_write.c:683 ksys_write+0x183/0x2b0 fs/read_write.c:736 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fdbeeb7d1ff Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 c9 8d 02 00 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 1c 8e 02 00 48 RSP: 002b:00007fdbee5ff000 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00007fdbeed36058 RCX: 00007fdbeeb7d1ff RDX: 000000000000008e RSI: 0000000020000040 RDI: 00000000000000c8 RBP: 00007fdbeebf12be R08: 0000000000000000 R09: 0000000000000000 R10: 000000000000008e R11: 0000000000000293 R12: 0000000000000000 R13: 0000000000000000 R14: 00007fdbeed36058 R15: 00007ffc38de06e8 Fixes: c8d7b98bec43 ("netfilter: move nf_send_resetX() code to nf_reject_ipvX modules") Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso Signed-off-by: wukuan --- net/ipv6/netfilter/nf_reject_ipv6.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 832d9f9cd10a..2b76f5b2118e 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -144,14 +144,12 @@ static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in) void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, int hook) { - struct net_device *br_indev __maybe_unused; - struct sk_buff *nskb; - struct tcphdr _otcph; - const struct tcphdr *otcph; - unsigned int otcplen, hh_len; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; struct dst_entry *dst = NULL; + const struct tcphdr *otcph; + struct sk_buff *nskb; + struct tcphdr _otcph; + unsigned int otcplen; struct flowi6 fl6; if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || @@ -190,9 +188,8 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, if (IS_ERR(dst)) return; - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, + nskb = alloc_skb(LL_MAX_HEADER + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, GFP_ATOMIC); if (!nskb) { @@ -205,9 +202,8 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, nskb->mark = fl6.flowi6_mark; - skb_reserve(nskb, hh_len + dst->header_len); - ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, - ip6_dst_hoplimit(dst)); + skb_reserve(nskb, LL_MAX_HEADER); + nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, ip6_dst_hoplimit(dst)); nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); nf_ct_attach(nskb, oldskb); -- Gitee From 36cfa32b91be6134b593601ad00a133fc9b1e96e Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 30 Oct 2024 03:55:10 +0000 Subject: [PATCH 015/116] initramfs: avoid filename buffer overrun mainline inclusion from mainline-v6.13-rc1 commit e017671f534dd3f568db9e47b0583e853d2da9b5 category: bugfix issue: NA CVE: CVE-2024-53142 Signed-off-by: wukuan --------------------------------------- The initramfs filename field is defined in Documentation/driver-api/early-userspace/buffer-format.rst as: 37 cpio_file := ALGN(4) + cpio_header + filename + "\0" + ALGN(4) + data ... 55 ============= ================== ========================= 56 Field name Field size Meaning 57 ============= ================== ========================= ... 70 c_namesize 8 bytes Length of filename, including final \0 When extracting an initramfs cpio archive, the kernel's do_name() path handler assumes a zero-terminated path at @collected, passing it directly to filp_open() / init_mkdir() / init_mknod(). If a specially crafted cpio entry carries a non-zero-terminated filename and is followed by uninitialized memory, then a file may be created with trailing characters that represent the uninitialized memory. The ability to create an initramfs entry would imply already having full control of the system, so the buffer overrun shouldn't be considered a security vulnerability. Append the output of the following bash script to an existing initramfs and observe any created /initramfs_test_fname_overrunAA* path. E.g. ./reproducer.sh | gzip >> /myinitramfs It's easiest to observe non-zero uninitialized memory when the output is gzipped, as it'll overflow the heap allocated @out_buf in __gunzip(), rather than the initrd_start+initrd_size block. ---- reproducer.sh ---- nilchar="A" # change to "\0" to properly zero terminate / pad magic="070701" ino=1 mode=$(( 0100777 )) uid=0 gid=0 nlink=1 mtime=1 filesize=0 devmajor=0 devminor=1 rdevmajor=0 rdevminor=0 csum=0 fname="initramfs_test_fname_overrun" namelen=$(( ${#fname} + 1 )) # plus one to account for terminator printf "%s%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%s" \ $magic $ino $mode $uid $gid $nlink $mtime $filesize \ $devmajor $devminor $rdevmajor $rdevminor $namelen $csum $fname termpadlen=$(( 1 + ((4 - ((110 + $namelen) & 3)) % 4) )) printf "%.s${nilchar}" $(seq 1 $termpadlen) ---- reproducer.sh ---- Symlink filename fields handled in do_symlink() won't overrun past the data segment, due to the explicit zero-termination of the symlink target. Fix filename buffer overrun by aborting the initramfs FSM if any cpio entry doesn't carry a zero-terminator at the expected (name_len - 1) offset. Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") Signed-off-by: David Disseldorp Link: https://lore.kernel.org/r/20241030035509.20194-2-ddiss@suse.de Signed-off-by: Christian Brauner Signed-off-by: wukuan --- init/initramfs.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/init/initramfs.c b/init/initramfs.c index 55b74d7e5260..4534c6611fa5 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -325,6 +325,15 @@ static int __init do_name(void) { state = SkipIt; next_state = Reset; + + /* name_len > 0 && name_len <= PATH_MAX checked in do_header */ + if (collected[name_len - 1] != '\0') { + pr_err("initramfs name without nulterm: %.*s\n", + (int)name_len, collected); + error("malformed archive"); + return 1; + } + if (strcmp(collected, "TRAILER!!!") == 0) { free_hash(); return 0; @@ -390,6 +399,12 @@ static int __init do_copy(void) static int __init do_symlink(void) { + if (collected[name_len - 1] != '\0') { + pr_err("initramfs symlink without nulterm: %.*s\n", + (int)name_len, collected); + error("malformed archive"); + return 1; + } collected[N_ALIGN(name_len) + body_len] = '\0'; clean_path(collected, 0); init_symlink(collected + N_ALIGN(name_len), collected); -- Gitee From bf951fe9782d07ae4d0b9567c757a7a78a741904 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Thu, 7 Nov 2024 10:34:05 +0800 Subject: [PATCH 016/116] net: fix data-races around sk->sk_forward_alloc mainline inclusion from mainline-v6.12 commit 073d89808c065ac4c672c0a613a71b27a80691cb category: bugfix issue: NA CVE: CVE-2024-53124 Signed-off-by: wukuan --------------------------------------- Syzkaller reported this warning: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 16 at net/ipv4/af_inet.c:156 inet_sock_destruct+0x1c5/0x1e0 Modules linked in: CPU: 0 UID: 0 PID: 16 Comm: ksoftirqd/0 Not tainted 6.12.0-rc5 #26 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:inet_sock_destruct+0x1c5/0x1e0 Code: 24 12 4c 89 e2 5b 48 c7 c7 98 ec bb 82 41 5c e9 d1 18 17 ff 4c 89 e6 5b 48 c7 c7 d0 ec bb 82 41 5c e9 bf 18 17 ff 0f 0b eb 83 <0f> 0b eb 97 0f 0b eb 87 0f 0b e9 68 ff ff ff 66 66 2e 0f 1f 84 00 RSP: 0018:ffffc9000008bd90 EFLAGS: 00010206 RAX: 0000000000000300 RBX: ffff88810b172a90 RCX: 0000000000000007 RDX: 0000000000000002 RSI: 0000000000000300 RDI: ffff88810b172a00 RBP: ffff88810b172a00 R08: ffff888104273c00 R09: 0000000000100007 R10: 0000000000020000 R11: 0000000000000006 R12: ffff88810b172a00 R13: 0000000000000004 R14: 0000000000000000 R15: ffff888237c31f78 FS: 0000000000000000(0000) GS:ffff888237c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffc63fecac8 CR3: 000000000342e000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __warn+0x88/0x130 ? inet_sock_destruct+0x1c5/0x1e0 ? report_bug+0x18e/0x1a0 ? handle_bug+0x53/0x90 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? inet_sock_destruct+0x1c5/0x1e0 __sk_destruct+0x2a/0x200 rcu_do_batch+0x1aa/0x530 ? rcu_do_batch+0x13b/0x530 rcu_core+0x159/0x2f0 handle_softirqs+0xd3/0x2b0 ? __pfx_smpboot_thread_fn+0x10/0x10 run_ksoftirqd+0x25/0x30 smpboot_thread_fn+0xdd/0x1d0 kthread+0xd3/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 ---[ end trace 0000000000000000 ]--- Its possible that two threads call tcp_v6_do_rcv()/sk_forward_alloc_add() concurrently when sk->sk_state == TCP_LISTEN with sk->sk_lock unlocked, which triggers a data-race around sk->sk_forward_alloc: tcp_v6_rcv tcp_v6_do_rcv skb_clone_and_charge_r sk_rmem_schedule __sk_mem_schedule sk_forward_alloc_add() skb_set_owner_r sk_mem_charge sk_forward_alloc_add() __kfree_skb skb_release_all skb_release_head_state sock_rfree sk_mem_uncharge sk_forward_alloc_add() sk_mem_reclaim // set local var reclaimable __sk_mem_reclaim sk_forward_alloc_add() In this syzkaller testcase, two threads call tcp_v6_do_rcv() with skb->truesize=768, the sk_forward_alloc changes like this: (cpu 1) | (cpu 2) | sk_forward_alloc ... | ... | 0 __sk_mem_schedule() | | +4096 = 4096 | __sk_mem_schedule() | +4096 = 8192 sk_mem_charge() | | -768 = 7424 | sk_mem_charge() | -768 = 6656 ... | ... | sk_mem_uncharge() | | +768 = 7424 reclaimable=7424 | | | sk_mem_uncharge() | +768 = 8192 | reclaimable=8192 | __sk_mem_reclaim() | | -4096 = 4096 | __sk_mem_reclaim() | -8192 = -4096 != 0 The skb_clone_and_charge_r() should not be called in tcp_v6_do_rcv() when sk->sk_state is TCP_LISTEN, it happens later in tcp_v6_syn_recv_sock(). Fix the same issue in dccp_v6_do_rcv(). Suggested-by: Eric Dumazet Reviewed-by: Eric Dumazet Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Signed-off-by: Wang Liang Link: https://patch.msgid.link/20241107023405.889239-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: wukuan --- net/dccp/ipv6.c | 2 +- net/ipv6/tcp_ipv6.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 991ca2dc2c02..aa311ab960b6 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -602,7 +602,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) by tcp. Feel free to propose better solution. --ANK (980728) */ - if (np->rxopt.all) + if (np->rxopt.all && sk->sk_state != DCCP_LISTEN) opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d175e673a243..7187fc0971dc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1483,7 +1483,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) by tcp. Feel free to propose better solution. --ANK (980728) */ - if (np->rxopt.all) + if (np->rxopt.all && sk->sk_state != TCP_LISTEN) opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ @@ -1520,8 +1520,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (nsk != sk) { if (tcp_child_process(sk, nsk, skb)) goto reset; - if (opt_skb) - __kfree_skb(opt_skb); return 0; } } else -- Gitee From 9d6c4a063ec2069ac9831232138c5ff83763996c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 5 Nov 2024 17:52:34 -0800 Subject: [PATCH 017/116] netlink: terminate outstanding dump on socket close mainline inclusion from mainline-v6.12 commit 1904fb9ebf911441f90a68e96b22aa73e4410505 category: bugfix issue: NA CVE: CVE-2024-53140 Signed-off-by: wukuan --------------------------------------- Netlink supports iterative dumping of data. It provides the families the following ops: - start - (optional) kicks off the dumping process - dump - actual dump helper, keeps getting called until it returns 0 - done - (optional) pairs with .start, can be used for cleanup The whole process is asynchronous and the repeated calls to .dump don't actually happen in a tight loop, but rather are triggered in response to recvmsg() on the socket. This gives the user full control over the dump, but also means that the user can close the socket without getting to the end of the dump. To make sure .start is always paired with .done we check if there is an ongoing dump before freeing the socket, and if so call .done. The complication is that sockets can get freed from BH and .done is allowed to sleep. So we use a workqueue to defer the call, when needed. Unfortunately this does not work correctly. What we defer is not the cleanup but rather releasing a reference on the socket. We have no guarantee that we own the last reference, if someone else holds the socket they may release it in BH and we're back to square one. The whole dance, however, appears to be unnecessary. Only the user can interact with dumps, so we can clean up when socket is closed. And close always happens in process context. Some async code may still access the socket after close, queue notification skbs to it etc. but no dumps can start, end or otherwise make progress. Delete the workqueue and flush the dump state directly from the release handler. Note that further cleanup is possible in -next, for instance we now always call .done before releasing the main module reference, so dump doesn't have to take a reference of its own. Reported-by: syzkaller Fixes: ed5d7788a934 ("netlink: Do not schedule work from sk_destruct") Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241106015235.2458807-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: wukuan --- net/netlink/af_netlink.c | 31 ++++++++----------------------- net/netlink/af_netlink.h | 2 -- 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index fd5c7d76b0fc..9c603bdf37ba 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -385,15 +385,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) static void netlink_sock_destruct(struct sock *sk) { - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->cb_running) { - if (nlk->cb.done) - nlk->cb.done(&nlk->cb); - module_put(nlk->cb.module); - kfree_skb(nlk->cb.skb); - } - skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { @@ -406,14 +397,6 @@ static void netlink_sock_destruct(struct sock *sk) WARN_ON(nlk_sk(sk)->groups); } -static void netlink_sock_destruct_work(struct work_struct *work) -{ - struct netlink_sock *nlk = container_of(work, struct netlink_sock, - work); - - sk_free(&nlk->sk); -} - /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on * SMP. Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves @@ -730,12 +713,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head) if (!refcount_dec_and_test(&sk->sk_refcnt)) return; - if (nlk->cb_running && nlk->cb.done) { - INIT_WORK(&nlk->work, netlink_sock_destruct_work); - schedule_work(&nlk->work); - return; - } - sk_free(sk); } @@ -785,6 +762,14 @@ static int netlink_release(struct socket *sock) NETLINK_URELEASE, &n); } + /* Terminate any outstanding dump */ + if (nlk->cb_running) { + if (nlk->cb.done) + nlk->cb.done(&nlk->cb); + module_put(nlk->cb.module); + kfree_skb(nlk->cb.skb); + } + module_put(nlk->module); if (netlink_is_kernel(sk)) { diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 5f454c8de6a4..fca955684888 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -4,7 +4,6 @@ #include #include -#include #include /* flags */ @@ -46,7 +45,6 @@ struct netlink_sock { struct rhash_head node; struct rcu_head rcu; - struct work_struct work; }; static inline struct netlink_sock *nlk_sk(struct sock *sk) -- Gitee From dacbcd8cf933cd3eff8d2122571db78113ebed54 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Wed, 23 Oct 2024 11:52:13 +0800 Subject: [PATCH 018/116] net: fix crash when config small gso_max_size/gso_ipv4_max_size mainline inclusion from mainline-v6.12-rc6 commit 9ab5cf19fb0e4680f95e506d6c544259bf1111c4 category: bugfix issue: NA CVE: CVE-2024-50258 Signed-off-by: wukuan --------------------------------------- Config a small gso_max_size/gso_ipv4_max_size will lead to an underflow in sk_dst_gso_max_size(), which may trigger a BUG_ON crash, because sk->sk_gso_max_size would be much bigger than device limits. Call Trace: tcp_write_xmit tso_segs = tcp_init_tso_segs(skb, mss_now); tcp_set_skb_tso_segs tcp_skb_pcount_set // skb->len = 524288, mss_now = 8 // u16 tso_segs = 524288/8 = 65535 -> 0 tso_segs = DIV_ROUND_UP(skb->len, mss_now) BUG_ON(!tso_segs) Add check for the minimum value of gso_max_size and gso_ipv4_max_size. Fixes: 46e6b992c250 ("rtnetlink: allow GSO maximums to be set on device creation") Fixes: 9eefedd58ae1 ("net: add gso_ipv4_max_size and gro_ipv4_max_size per device") Signed-off-by: Wang Liang Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241023035213.517386-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: wukuan --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2806b9ed6387..d6dbb042489f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1864,7 +1864,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 }, [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, [IFLA_GSO_MAX_SEGS] = { .type = NLA_U32 }, - [IFLA_GSO_MAX_SIZE] = { .type = NLA_U32 }, + [IFLA_GSO_MAX_SIZE] = NLA_POLICY_MIN(NLA_U32, MAX_TCP_HEADER + 1), [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, -- Gitee From 690939a558a76fae40ae1866f26b398f5c440559 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:01 +0100 Subject: [PATCH 019/116] Bluetooth: L2CAP: do not leave dangling sk pointer on error in l2cap_sock_create() mainline inclusion from mainline-v6.13-rc1 commit 7c4f78cdb8e7501e9f92d291a7d956591bf73be9 category: bugfix issue: #IBG4O6 CVE: CVE-2024-56605 Signed-off-by: wukuan --------------------------------------- bt_sock_alloc() allocates the sk object and attaches it to the provided sock object. On error l2cap_sock_alloc() frees the sk object, but the dangling pointer is still attached to the sock object, which may create use-after-free in other code. Signed-off-by: Ignat Korchagin Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014153808.51894-3-ignat@cloudflare.com Signed-off-by: Jakub Kicinski Signed-off-by: wukuan --- net/bluetooth/l2cap_sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 756523e5402a..4493fc68efca 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1875,6 +1875,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, chan = l2cap_chan_create(); if (!chan) { sk_free(sk); + sock->sk = NULL; return NULL; } -- Gitee From fb3fc8985185da48b68acfa4497d6b80123e55c0 Mon Sep 17 00:00:00 2001 From: Zijian Zhang Date: Wed, 6 Nov 2024 22:25:19 +0000 Subject: [PATCH 020/116] bpf, sockmap: Several fixes to bpf_msg_pop_data mainline inclusion from mainline-v6.13-rc1 commit 5d609ba262475db450ba69b8e8a557bd768ac07a category: bugfix issue: NA CVE: CVE-2024-56720 Signed-off-by: wukuan --------------------------------------- Several fixes to bpf_msg_pop_data, 1. In sk_msg_shift_left, we should put_page 2. if (len == 0), return early is better 3. pop the entire sk_msg (last == msg->sg.size) should be supported 4. Fix for the value of variable "a" 5. In sk_msg_shift_left, after shifting, i has already pointed to the next element. Addtional sk_msg_iter_var_next may result in BUG. Fixes: 7246d8ed4dcc ("bpf: helper to pop data from messages") Signed-off-by: Zijian Zhang Reviewed-by: John Fastabend Link: https://lore.kernel.org/r/20241106222520.527076-8-zijianzhang@bytedance.com Signed-off-by: Martin KaFai Lau Signed-off-by: wukuan --- net/core/filter.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index f20e79f2e054..61395535c91c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2890,8 +2890,10 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = { static void sk_msg_shift_left(struct sk_msg *msg, int i) { + struct scatterlist *sge = sk_msg_elem(msg, i); int prev; + put_page(sg_page(sge)); do { prev = i; sk_msg_iter_var_next(i); @@ -2928,6 +2930,9 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, if (unlikely(flags)) return -EINVAL; + if (unlikely(len == 0)) + return 0; + /* First find the starting scatterlist element */ i = msg->sg.start; do { @@ -2940,7 +2945,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, } while (i != msg->sg.end); /* Bounds checks: start and pop must be inside message */ - if (start >= offset + l || last >= msg->sg.size) + if (start >= offset + l || last > msg->sg.size) return -EINVAL; space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); @@ -2969,12 +2974,12 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, */ if (start != offset) { struct scatterlist *nsge, *sge = sk_msg_elem(msg, i); - int a = start; + int a = start - offset; int b = sge->length - pop - a; sk_msg_iter_var_next(i); - if (pop < sge->length - a) { + if (b > 0) { if (space) { sge->length = a; sk_msg_shift_right(msg, i); @@ -2993,7 +2998,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, if (unlikely(!page)) return -ENOMEM; - sge->length = a; orig = sg_page(sge); from = sg_virt(sge); to = page_address(page); @@ -3003,7 +3007,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, put_page(orig); } pop = 0; - } else if (pop >= sge->length - a) { + } else { pop -= (sge->length - a); sge->length = a; } @@ -3037,7 +3041,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, pop -= sge->length; sk_msg_shift_left(msg, i); } - sk_msg_iter_var_next(i); } sk_mem_uncharge(msg->sk, len - pop); -- Gitee From 32cef7331bd03d12376df04acca25f459275c4d2 Mon Sep 17 00:00:00 2001 From: Yongliang Gao Date: Fri, 11 Oct 2024 12:31:53 +0800 Subject: [PATCH 021/116] rtc: check if __rtc_read_time was successful in rtc_timer_do_work() mainline inclusion from mainline-v6.13-rc1 commit e8ba8a2bc4f60a1065f23d6a0e7cbea945a0f40d category: bugfix issue: NA CVE: CVE-2024-56739 Signed-off-by: wukuan --------------------------------------- If the __rtc_read_time call fails,, the struct rtc_time tm; may contain uninitialized data, or an illegal date/time read from the RTC hardware. When calling rtc_tm_to_ktime later, the result may be a very large value (possibly KTIME_MAX). If there are periodic timers in rtc->timerqueue, they will continually expire, may causing kernel softlockup. Fixes: 6610e0893b8b ("RTC: Rework RTC code to use timerqueue for events") Signed-off-by: Yongliang Gao Acked-by: Jingqun Li Link: https://lore.kernel.org/r/20241011043153.3788112-1-leonylgao@gmail.com Signed-off-by: Alexandre Belloni Signed-off-by: wukuan --- drivers/rtc/interface.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 146056858135..930586b1cc57 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -906,13 +906,18 @@ void rtc_timer_do_work(struct work_struct *work) struct timerqueue_node *next; ktime_t now; struct rtc_time tm; + int err; struct rtc_device *rtc = container_of(work, struct rtc_device, irqwork); mutex_lock(&rtc->ops_lock); again: - __rtc_read_time(rtc, &tm); + err = __rtc_read_time(rtc, &tm); + if (err) { + mutex_unlock(&rtc->ops_lock); + return; + } now = rtc_tm_to_ktime(tm); while ((next = timerqueue_getnext(&rtc->timerqueue))) { if (next->expires > now) -- Gitee From c317d571f433db20fc8be0feb896270849460b94 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:05 +0100 Subject: [PATCH 022/116] net: inet: do not leave a dangling sk pointer in inet_create() mainline inclusion from mainline-v6.13-rc1 commit 9365fa510c6f82e3aa550a09d0c5c6b44dbc78ff category: bugfix issue: NA CVE: CVE-2024-56601 Signed-off-by: wukuan --------------------------------------- sock_init_data() attaches the allocated sk object to the provided sock object. If inet_create() fails later, the sk object is freed, but the sock object retains the dangling pointer, which may create use-after-free later. Clear the sk pointer in the sock object on error. Signed-off-by: Ignat Korchagin Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014153808.51894-7-ignat@cloudflare.com Signed-off-by: Jakub Kicinski Signed-off-by: wukuan --- net/ipv4/af_inet.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fe8ff31545a5..39683614fb0f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -376,32 +376,30 @@ static int inet_create(struct net *net, struct socket *sock, int protocol, inet->inet_sport = htons(inet->inet_num); /* Add to protocol hash chains. */ err = sk->sk_prot->hash(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; +out_sk_release: + sk_common_release(sk); + sock->sk = NULL; + goto out; } -- Gitee From c74fb4bb270a47b8b12dbcb87593510be3205d97 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:06 +0100 Subject: [PATCH 023/116] net: inet6: do not leave a dangling sk pointer in inet6_create() mainline inclusion from mainline-v6.13-rc1 commit 9df99c395d0f55fb444ef39f4d6f194ca437d884 category: bugfix issue: NA CVE: CVE-2024-56600 Signed-off-by: wukuan --------------------------------------- sock_init_data() attaches the allocated sk pointer to the provided sock object. If inet6_create() fails later, the sk object is released, but the sock object retains the dangling sk pointer, which may cause use-after-free later. Clear the sock sk pointer on error. Signed-off-by: Ignat Korchagin Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014153808.51894-8-ignat@cloudflare.com Signed-off-by: Jakub Kicinski Signed-off-by: wukuan --- net/ipv6/af_inet6.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 0979a4b15f26..a97c2bbbcb83 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -254,31 +254,29 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, */ inet->inet_sport = htons(inet->inet_num); err = sk->sk_prot->hash(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); - if (err) { - sk_common_release(sk); - goto out; - } + if (err) + goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; +out_sk_release: + sk_common_release(sk); + sock->sk = NULL; + goto out; } static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, -- Gitee From 7e92920ec03345e8dc6be191479fb20730039971 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Thu, 28 Nov 2024 09:59:50 +0100 Subject: [PATCH 024/116] net/ipv6: release expired exception dst cached in socket mainline inclusion from mainline-v6.13-rc2 commit 3301ab7d5aeb0fe270f73a3d4810c9d1b6a9f045 category: bugfix issue: NA CVE: CVE-2024-56644 Signed-off-by: wukuan --------------------------------------- Dst objects get leaked in ip6_negative_advice() when this function is executed for an expired IPv6 route located in the exception table. There are several conditions that must be fulfilled for the leak to occur: * an ICMPv6 packet indicating a change of the MTU for the path is received, resulting in an exception dst being created * a TCP connection that uses the exception dst for routing packets must start timing out so that TCP begins retransmissions * after the exception dst expires, the FIB6 garbage collector must not run before TCP executes ip6_negative_advice() for the expired exception dst When TCP executes ip6_negative_advice() for an exception dst that has expired and if no other socket holds a reference to the exception dst, the refcount of the exception dst is 2, which corresponds to the increment made by dst_init() and the increment made by the TCP socket for which the connection is timing out. The refcount made by the socket is never released. The refcount of the dst is decremented in sk_dst_reset() but that decrement is counteracted by a dst_hold() intentionally placed just before the sk_dst_reset() in ip6_negative_advice(). After ip6_negative_advice() has finished, there is no other object tied to the dst. The socket lost its reference stored in sk_dst_cache and the dst is no longer in the exception table. The exception dst becomes a leaked object. As a result of this dst leak, an unbalanced refcount is reported for the loopback device of a net namespace being destroyed under kernels that do not contain e5f80fcf869a ("ipv6: give an IPv6 dev to blackhole_netdev"): unregister_netdevice: waiting for lo to become free. Usage count = 2 Fix the dst leak by removing the dst_hold() in ip6_negative_advice(). The patch that introduced the dst_hold() in ip6_negative_advice() was 92f1655aa2b22 ("net: fix __dst_negative_advice() race"). But 92f1655aa2b22 merely refactored the code with regards to the dst refcount so the issue was present even before 92f1655aa2b22. The bug was introduced in 54c1a859efd9f ("ipv6: Don't drop cache route entry unless timer actually expired.") where the expired cached route is deleted and the sk_dst_cache member of the socket is set to NULL by calling dst_negative_advice() but the refcount belonging to the socket is left unbalanced. The IPv4 version - ipv4_negative_advice() - is not affected by this bug. When the TCP connection times out ipv4_negative_advice() merely resets the sk_dst_cache of the socket while decrementing the refcount of the exception dst. Fixes: 92f1655aa2b22 ("net: fix __dst_negative_advice() race") Fixes: 54c1a859efd9f ("ipv6: Don't drop cache route entry unless timer actually expired.") Link: https://lore.kernel.org/netdev/20241113105611.GA6723@incl/T/#u Signed-off-by: Jiri Wiesner Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241128085950.GA4505@incl Signed-off-by: Jakub Kicinski Signed-off-by: wukuan --- net/ipv6/route.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f41f9232857c..3a0f4e5040e1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2644,10 +2644,10 @@ static void ip6_negative_advice(struct sock *sk, if (rt->rt6i_flags & RTF_CACHE) { rcu_read_lock(); if (rt6_check_expired(rt)) { - /* counteract the dst_release() in sk_dst_reset() */ - dst_hold(dst); + /* rt/dst can not be destroyed yet, + * because of rcu_read_lock() + */ sk_dst_reset(sk); - rt6_remove_exception_rt(rt); } rcu_read_unlock(); -- Gitee From 6161ec52a0c15bc0a71d7bd47d2ba3cae28ccdca Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 10 Oct 2024 19:10:34 +0200 Subject: [PATCH 025/116] PCI: Fix use-after-free of slot->bus on hot remove mainline inclusion from mainline-v6.13-rc1 commit c7acef99642b763ba585f4a43af999fcdbcc3dc4 category: bugfix issue: NA CVE: CVE-2024-53194 Signed-off-by: wukuan --------------------------------------- Dennis reports a boot crash on recent Lenovo laptops with a USB4 dock. Since commit 0fc70886569c ("thunderbolt: Reset USB4 v2 host router") and commit 59a54c5f3dbd ("thunderbolt: Reset topology created by the boot firmware"), USB4 v2 and v1 Host Routers are reset on probe of the thunderbolt driver. The reset clears the Presence Detect State and Data Link Layer Link Active bits at the USB4 Host Router's Root Port and thus causes hot removal of the dock. The crash occurs when pciehp is unbound from one of the dock's Downstream Ports: pciehp creates a pci_slot on bind and destroys it on unbind. The pci_slot contains a pointer to the pci_bus below the Downstream Port, but a reference on that pci_bus is never acquired. The pci_bus is destroyed before the pci_slot, so a use-after-free ensues when pci_slot_release() accesses slot->bus. In principle this should not happen because pci_stop_bus_device() unbinds pciehp (and therefore destroys the pci_slot) before the pci_bus is destroyed by pci_remove_bus_device(). However the stacktrace provided by Dennis shows that pciehp is unbound from pci_remove_bus_device() instead of pci_stop_bus_device(). To understand the significance of this, one needs to know that the PCI core uses a two step process to remove a portion of the hierarchy: It first unbinds all drivers in the sub-hierarchy in pci_stop_bus_device() and then actually removes the devices in pci_remove_bus_device(). There is no precaution to prevent driver binding in-between pci_stop_bus_device() and pci_remove_bus_device(). In Dennis' case, it seems removal of the hierarchy by pciehp races with driver binding by pci_bus_add_devices(). pciehp is bound to the Downstream Port after pci_stop_bus_device() has run, so it is unbound by pci_remove_bus_device() instead of pci_stop_bus_device(). Because the pci_bus has already been destroyed at that point, accesses to it result in a use-after-free. One might conclude that driver binding needs to be prevented after pci_stop_bus_device() has run. However it seems risky that pci_slot points to pci_bus without holding a reference. Solely relying on correct ordering of driver unbind versus pci_bus destruction is certainly not defensive programming. If pci_slot has a need to access data in pci_bus, it ought to acquire a reference. Amend pci_create_slot() accordingly. Dennis reports that the crash is not reproducible with this change. Abridged stacktrace: pcieport 0000:00:07.0: PME: Signaling with IRQ 156 pcieport 0000:00:07.0: pciehp: Slot #12 AttnBtn- PwrCtrl- MRL- AttnInd- PwrInd- HotPlug+ Surprise+ Interlock- NoCompl+ IbPresDis- LLActRep+ pci_bus 0000:20: dev 00, created physical slot 12 pcieport 0000:00:07.0: pciehp: Slot(12): Card not present ... pcieport 0000:21:02.0: pciehp: pcie_disable_notification: SLOTCTRL d8 write cmd 0 Oops: general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6b6b: 0000 [#1] PREEMPT SMP NOPTI CPU: 13 UID: 0 PID: 134 Comm: irq/156-pciehp Not tainted 6.11.0-devel+ #1 RIP: 0010:dev_driver_string+0x12/0x40 pci_destroy_slot pciehp_remove pcie_port_remove_service device_release_driver_internal bus_remove_device device_del device_unregister remove_iter device_for_each_child pcie_portdrv_remove pci_device_remove device_release_driver_internal bus_remove_device device_del pci_remove_bus_device (recursive invocation) pci_remove_bus_device pciehp_unconfigure_device pciehp_disable_slot pciehp_handle_presence_or_link_change pciehp_ist Link: https://lore.kernel.org/r/4bfd4c0e976c1776cd08e76603903b338cf25729.1728579288.git.lukas@wunner.de Reported-by: Dennis Wassenberg Closes: https://lore.kernel.org/r/6de4b45ff2b32dd91a805ec02ec8ec73ef411bf6.camel@secunet.com/ Tested-by: Dennis Wassenberg Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg Cc: stable@vger.kernel.org Signed-off-by: wukuan --- drivers/pci/slot.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index ed2077e7470a..a42e2cf774fd 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -79,6 +79,7 @@ static void pci_slot_release(struct kobject *kobj) up_read(&pci_bus_sem); list_del(&slot->list); + pci_bus_put(slot->bus); kfree(slot); } @@ -260,7 +261,7 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, goto err; } - slot->bus = parent; + slot->bus = pci_bus_get(parent); slot->number = slot_nr; slot->kobj.kset = pci_slots_kset; @@ -268,6 +269,7 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, slot_name = make_slot_name(name); if (!slot_name) { err = -ENOMEM; + pci_bus_put(slot->bus); kfree(slot); goto err; } -- Gitee From 38c2b377df56a4d231222cc2a8a3e1f693c3a459 Mon Sep 17 00:00:00 2001 From: Qi Han Date: Wed, 18 Sep 2024 02:44:00 -0600 Subject: [PATCH 026/116] f2fs: fix f2fs_bug_on when uninstalling filesystem call f2fs_evict_inode. stable inclusion from stable-v5.10.231 commit dff561e4060d28edc9a2960d4a87f3c945a96aa3 category: bugfix issue: #IBGB5Z CVE: CVE-2024-56586 Signed-off-by: zyf1116 --------------------------------------- [ Upstream commit d5c367ef8287fb4d235c46a2f8c8d68715f3a0ca ] creating a large files during checkpoint disable until it runs out of space and then delete it, then remount to enable checkpoint again, and then unmount the filesystem triggers the f2fs_bug_on as below: ------------[ cut here ]------------ kernel BUG at fs/f2fs/inode.c:896! CPU: 2 UID: 0 PID: 1286 Comm: umount Not tainted 6.11.0-rc7-dirty #360 Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:f2fs_evict_inode+0x58c/0x610 Call Trace: __die_body+0x15/0x60 die+0x33/0x50 do_trap+0x10a/0x120 f2fs_evict_inode+0x58c/0x610 do_error_trap+0x60/0x80 f2fs_evict_inode+0x58c/0x610 exc_invalid_op+0x53/0x60 f2fs_evict_inode+0x58c/0x610 asm_exc_invalid_op+0x16/0x20 f2fs_evict_inode+0x58c/0x610 evict+0x101/0x260 dispose_list+0x30/0x50 evict_inodes+0x140/0x190 generic_shutdown_super+0x2f/0x150 kill_block_super+0x11/0x40 kill_f2fs_super+0x7d/0x140 deactivate_locked_super+0x2a/0x70 cleanup_mnt+0xb3/0x140 task_work_run+0x61/0x90 The root cause is: creating large files during disable checkpoint period results in not enough free segments, so when writing back root inode will failed in f2fs_enable_checkpoint. When umount the file system after enabling checkpoint, the root inode is dirty in f2fs_evict_inode function, which triggers BUG_ON. The steps to reproduce are as follows: dd if=/dev/zero of=f2fs.img bs=1M count=55 mount f2fs.img f2fs_dir -o checkpoint=disable:10% dd if=/dev/zero of=big bs=1M count=50 sync rm big mount -o remount,checkpoint=enable f2fs_dir umount f2fs_dir Let's redirty inode when there is not free segments during checkpoint is disable. Signed-off-by: Qi Han Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: zyf1116 --- fs/f2fs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index aeafd8b73cbf..c0aa1aaa4dd8 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -695,8 +695,10 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) !is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; - if (!f2fs_is_checkpoint_ready(sbi)) + if (!f2fs_is_checkpoint_ready(sbi)) { + f2fs_mark_inode_dirty_sync(inode, true); return -ENOSPC; + } /* * We need to balance fs here to prevent from producing dirty node pages -- Gitee From 081a995b14952be440dbdeb70dceb6092a4ea2e0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 Nov 2024 05:40:04 +0100 Subject: [PATCH 027/116] nvme-pci: fix freeing of the HMB descriptor table stable inclusion from stable-v5.10.231 commit 452f9ddd12bebc04cef741e8ba3806bf0e1fd015 category: bugfix issue: NA CVE: CVE-2024-56756 Signed-off-by: zyf1116 --------------------------------------- [ Upstream commit 3c2fb1ca8086eb139b2a551358137525ae8e0d7a ] The HMB descriptor table is sized to the maximum number of descriptors that could be used for a given device, but __nvme_alloc_host_mem could break out of the loop earlier on memory allocation failure and end up using less descriptors than planned for, which leads to an incorrect size passed to dma_free_coherent. In practice this was not showing up because the number of descriptors tends to be low and the dma coherent allocator always allocates and frees at least a page. Fixes: 87ad72a59a38 ("nvme-pci: implement host memory buffer support") Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin Signed-off-by: zyf1116 --- drivers/nvme/host/pci.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0d5bb8f16841..0c8d48009cf8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -147,6 +147,7 @@ struct nvme_dev { /* host memory buffer support: */ u64 host_mem_size; u32 nr_host_mem_descs; + u32 host_mem_descs_size; dma_addr_t host_mem_descs_dma; struct nvme_host_mem_buf_desc *host_mem_descs; void **host_mem_desc_bufs; @@ -1925,10 +1926,10 @@ static void nvme_free_host_mem(struct nvme_dev *dev) kfree(dev->host_mem_desc_bufs); dev->host_mem_desc_bufs = NULL; - dma_free_coherent(dev->dev, - dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), + dma_free_coherent(dev->dev, dev->host_mem_descs_size, dev->host_mem_descs, dev->host_mem_descs_dma); dev->host_mem_descs = NULL; + dev->host_mem_descs_size = 0; dev->nr_host_mem_descs = 0; } @@ -1936,7 +1937,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, u32 chunk_size) { struct nvme_host_mem_buf_desc *descs; - u32 max_entries, len; + u32 max_entries, len, descs_size; dma_addr_t descs_dma; int i = 0; void **bufs; @@ -1949,8 +1950,9 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries) max_entries = dev->ctrl.hmmaxd; - descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs), - &descs_dma, GFP_KERNEL); + descs_size = max_entries * sizeof(*descs); + descs = dma_alloc_coherent(dev->dev, descs_size, &descs_dma, + GFP_KERNEL); if (!descs) goto out; @@ -1979,6 +1981,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, dev->host_mem_size = size; dev->host_mem_descs = descs; dev->host_mem_descs_dma = descs_dma; + dev->host_mem_descs_size = descs_size; dev->host_mem_desc_bufs = bufs; return 0; @@ -1993,8 +1996,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, kfree(bufs); out_free_descs: - dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs, - descs_dma); + dma_free_coherent(dev->dev, descs_size, descs, descs_dma); out: dev->host_mem_descs = NULL; return -ENOMEM; -- Gitee From e9621e629f5889f3417075391ec5e9f4c8e503da Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Sun, 3 Nov 2024 21:35:27 +0530 Subject: [PATCH 028/116] leds: class: Protect brightness_show() with led_cdev->led_access mutex stable inclusion from stable-v5.10.231 commit ddcfc5708da9972ac23a9121b3d819b0a53d6f21 category: bugfix issue: NA CVE: CVE-2024-56587 Signed-off-by: zyf1116 --------------------------------------- [ Upstream commit 4ca7cd938725a4050dcd62ae9472e931d603118d ] There is NULL pointer issue observed if from Process A where hid device being added which results in adding a led_cdev addition and later a another call to access of led_cdev attribute from Process B can result in NULL pointer issue. Use mutex led_cdev->led_access to protect access to led->cdev and its attribute inside brightness_show() and max_brightness_show() and also update the comment for mutex that it should be used to protect the led class device fields. Process A Process B kthread+0x114 worker_thread+0x244 process_scheduled_works+0x248 uhid_device_add_worker+0x24 hid_add_device+0x120 device_add+0x268 bus_probe_device+0x94 device_initial_probe+0x14 __device_attach+0xfc bus_for_each_drv+0x10c __device_attach_driver+0x14c driver_probe_device+0x3c __driver_probe_device+0xa0 really_probe+0x190 hid_device_probe+0x130 ps_probe+0x990 ps_led_register+0x94 devm_led_classdev_register_ext+0x58 led_classdev_register_ext+0x1f8 device_create_with_groups+0x48 device_create_groups_vargs+0xc8 device_add+0x244 kobject_uevent+0x14 kobject_uevent_env[jt]+0x224 mutex_unlock[jt]+0xc4 __mutex_unlock_slowpath+0xd4 wake_up_q+0x70 try_to_wake_up[jt]+0x48c preempt_schedule_common+0x28 __schedule+0x628 __switch_to+0x174 el0t_64_sync+0x1a8/0x1ac el0t_64_sync_handler+0x68/0xbc el0_svc+0x38/0x68 do_el0_svc+0x1c/0x28 el0_svc_common+0x80/0xe0 invoke_syscall+0x58/0x114 __arm64_sys_read+0x1c/0x2c ksys_read+0x78/0xe8 vfs_read+0x1e0/0x2c8 kernfs_fop_read_iter+0x68/0x1b4 seq_read_iter+0x158/0x4ec kernfs_seq_show+0x44/0x54 sysfs_kf_seq_show+0xb4/0x130 dev_attr_show+0x38/0x74 brightness_show+0x20/0x4c dualshock4_led_get_brightness+0xc/0x74 [ 3313.874295][ T4013] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000060 [ 3313.874301][ T4013] Mem abort info: [ 3313.874303][ T4013] ESR = 0x0000000096000006 [ 3313.874305][ T4013] EC = 0x25: DABT (current EL), IL = 32 bits [ 3313.874307][ T4013] SET = 0, FnV = 0 [ 3313.874309][ T4013] EA = 0, S1PTW = 0 [ 3313.874311][ T4013] FSC = 0x06: level 2 translation fault [ 3313.874313][ T4013] Data abort info: [ 3313.874314][ T4013] ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000 [ 3313.874316][ T4013] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 3313.874318][ T4013] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 3313.874320][ T4013] user pgtable: 4k pages, 39-bit VAs, pgdp=00000008f2b0a000 .. [ 3313.874332][ T4013] Dumping ftrace buffer: [ 3313.874334][ T4013] (ftrace buffer empty) .. .. [ dd3313.874639][ T4013] CPU: 6 PID: 4013 Comm: InputReader [ 3313.874648][ T4013] pc : dualshock4_led_get_brightness+0xc/0x74 [ 3313.874653][ T4013] lr : led_update_brightness+0x38/0x60 [ 3313.874656][ T4013] sp : ffffffc0b910bbd0 .. .. [ 3313.874685][ T4013] Call trace: [ 3313.874687][ T4013] dualshock4_led_get_brightness+0xc/0x74 [ 3313.874690][ T4013] brightness_show+0x20/0x4c [ 3313.874692][ T4013] dev_attr_show+0x38/0x74 [ 3313.874696][ T4013] sysfs_kf_seq_show+0xb4/0x130 [ 3313.874700][ T4013] kernfs_seq_show+0x44/0x54 [ 3313.874703][ T4013] seq_read_iter+0x158/0x4ec [ 3313.874705][ T4013] kernfs_fop_read_iter+0x68/0x1b4 [ 3313.874708][ T4013] vfs_read+0x1e0/0x2c8 [ 3313.874711][ T4013] ksys_read+0x78/0xe8 [ 3313.874714][ T4013] __arm64_sys_read+0x1c/0x2c [ 3313.874718][ T4013] invoke_syscall+0x58/0x114 [ 3313.874721][ T4013] el0_svc_common+0x80/0xe0 [ 3313.874724][ T4013] do_el0_svc+0x1c/0x28 [ 3313.874727][ T4013] el0_svc+0x38/0x68 [ 3313.874730][ T4013] el0t_64_sync_handler+0x68/0xbc [ 3313.874732][ T4013] el0t_64_sync+0x1a8/0x1ac Signed-off-by: Mukesh Ojha Reviewed-by: Anish Kumar Link: https://lore.kernel.org/r/20241103160527.82487-1-quic_mojha@quicinc.com Signed-off-by: Lee Jones Signed-off-by: Sasha Levin Signed-off-by: zyf1116 --- drivers/leds/led-class.c | 14 +++++++++++--- include/linux/leds.h | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index fcb9eee3b609..a34d34e3f335 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -28,11 +28,14 @@ static ssize_t brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct led_classdev *led_cdev = dev_get_drvdata(dev); + unsigned int brightness; - /* no lock needed for this */ + mutex_lock(&led_cdev->led_access); led_update_brightness(led_cdev); + brightness = led_cdev->brightness; + mutex_unlock(&led_cdev->led_access); - return sprintf(buf, "%u\n", led_cdev->brightness); + return sprintf(buf, "%u\n", brightness); } static ssize_t brightness_store(struct device *dev, @@ -69,8 +72,13 @@ static ssize_t max_brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct led_classdev *led_cdev = dev_get_drvdata(dev); + unsigned int max_brightness; + + mutex_lock(&led_cdev->led_access); + max_brightness = led_cdev->max_brightness; + mutex_unlock(&led_cdev->led_access); - return sprintf(buf, "%u\n", led_cdev->max_brightness); + return sprintf(buf, "%u\n", max_brightness); } static DEVICE_ATTR_RO(max_brightness); diff --git a/include/linux/leds.h b/include/linux/leds.h index 6a8d6409c993..d8b4a73454e9 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -155,7 +155,7 @@ struct led_classdev { struct kernfs_node *brightness_hw_changed_kn; #endif - /* Ensures consistent access to the LED Flash Class device */ + /* Ensures consistent access to the LED class device */ struct mutex led_access; }; -- Gitee From d4cf8f06d3b3ce7d330677d61eb0e4c2d71f7231 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 14 Nov 2024 01:02:18 +0000 Subject: [PATCH 029/116] usb: dwc3: gadget: Fix looping of queued SG entries stable inclusion from stable-v5.10.231 commit 8ceb21d76426bbe7072cc3e43281e70c0d664cc7 category: bugfix issue: NA CVE: CVE-2024-56698 Signed-off-by: zyf1116 --------------------------------------- commit b7fc65f5141c24785dc8c19249ca4efcf71b3524 upstream. The dwc3_request->num_queued_sgs is decremented on completion. If a partially completed request is handled, then the dwc3_request->num_queued_sgs no longer reflects the total number of num_queued_sgs (it would be cleared). Correctly check the number of request SG entries remained to be prepare and queued. Failure to do this may cause null pointer dereference when accessing non-existent SG entry. Cc: stable@vger.kernel.org Fixes: c96e6725db9d ("usb: dwc3: gadget: Correct the logic for queuing sgs") Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/d07a7c4aa0fcf746cdca0515150dbe5c52000af7.1731545781.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: zyf1116 --- drivers/usb/dwc3/gadget.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 076032128c34..667fa8e8856d 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1185,8 +1185,8 @@ static int dwc3_prepare_trbs_sg(struct dwc3_ep *dep, struct scatterlist *s; int i; unsigned int length = req->request.length; - unsigned int remaining = req->request.num_mapped_sgs - - req->num_queued_sgs; + unsigned int remaining = req->num_pending_sgs; + unsigned int num_queued_sgs = req->request.num_mapped_sgs - remaining; unsigned int num_trbs = req->num_trbs; bool needs_extra_trb = dwc3_needs_extra_trb(dep, req); @@ -1194,7 +1194,7 @@ static int dwc3_prepare_trbs_sg(struct dwc3_ep *dep, * If we resume preparing the request, then get the remaining length of * the request and resume where we left off. */ - for_each_sg(req->request.sg, s, req->num_queued_sgs, i) + for_each_sg(req->request.sg, s, num_queued_sgs, i) length -= sg_dma_len(s); for_each_sg(sg, s, remaining, i) { -- Gitee From 069e20f1f5f63557713e3c032314c5ba3df37934 Mon Sep 17 00:00:00 2001 From: WangYuli Date: Mon, 25 Nov 2024 13:26:16 +0800 Subject: [PATCH 030/116] HID: wacom: fix when get product name maybe null pointer stable inclusion from stable-v5.10.231 commit 5912a921289edb34d40aeab32ea6d52d41e75fed category: bugfix issue: NA CVE: CVE-2024-56629 Signed-off-by: zyf1116 --------------------------------------- commit 59548215b76be98cf3422eea9a67d6ea578aca3d upstream. Due to incorrect dev->product reporting by certain devices, null pointer dereferences occur when dev->product is empty, leading to potential system crashes. This issue was found on EXCELSIOR DL37-D05 device with Loongson-LS3A6000-7A2000-DL37 motherboard. Kernel logs: [ 56.470885] usb 4-3: new full-speed USB device number 4 using ohci-pci [ 56.671638] usb 4-3: string descriptor 0 read error: -22 [ 56.671644] usb 4-3: New USB device found, idVendor=056a, idProduct=0374, bcdDevice= 1.07 [ 56.671647] usb 4-3: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [ 56.678839] hid-generic 0003:056A:0374.0004: hiddev0,hidraw3: USB HID v1.10 Device [HID 056a:0374] on usb-0000:00:05.0-3/input0 [ 56.697719] CPU 2 Unable to handle kernel paging request at virtual address 0000000000000000, era == 90000000066e35c8, ra == ffff800004f98a80 [ 56.697732] Oops[#1]: [ 56.697734] CPU: 2 PID: 2742 Comm: (udev-worker) Tainted: G OE 6.6.0-loong64-desktop #25.00.2000.015 [ 56.697737] Hardware name: Inspur CE520L2/C09901N000000000, BIOS 2.09.00 10/11/2024 [ 56.697739] pc 90000000066e35c8 ra ffff800004f98a80 tp 9000000125478000 sp 900000012547b8a0 [ 56.697741] a0 0000000000000000 a1 ffff800004818b28 a2 0000000000000000 a3 0000000000000000 [ 56.697743] a4 900000012547b8f0 a5 0000000000000000 a6 0000000000000000 a7 0000000000000000 [ 56.697745] t0 ffff800004818b2d t1 0000000000000000 t2 0000000000000003 t3 0000000000000005 [ 56.697747] t4 0000000000000000 t5 0000000000000000 t6 0000000000000000 t7 0000000000000000 [ 56.697748] t8 0000000000000000 u0 0000000000000000 s9 0000000000000000 s0 900000011aa48028 [ 56.697750] s1 0000000000000000 s2 0000000000000000 s3 ffff800004818e80 s4 ffff800004810000 [ 56.697751] s5 90000001000b98d0 s6 ffff800004811f88 s7 ffff800005470440 s8 0000000000000000 [ 56.697753] ra: ffff800004f98a80 wacom_update_name+0xe0/0x300 [wacom] [ 56.697802] ERA: 90000000066e35c8 strstr+0x28/0x120 [ 56.697806] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 56.697816] PRMD: 0000000c (PPLV0 +PIE +PWE) [ 56.697821] EUEN: 00000000 (-FPE -SXE -ASXE -BTE) [ 56.697827] ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) [ 56.697831] ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0) [ 56.697835] BADV: 0000000000000000 [ 56.697836] PRID: 0014d000 (Loongson-64bit, Loongson-3A6000) [ 56.697838] Modules linked in: wacom(+) bnep bluetooth rfkill qrtr nls_iso8859_1 nls_cp437 snd_hda_codec_conexant snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_timer snd soundcore input_leds mousedev led_class joydev deepin_netmonitor(OE) fuse nfnetlink dmi_sysfs ip_tables x_tables overlay amdgpu amdxcp drm_exec gpu_sched drm_buddy radeon drm_suballoc_helper i2c_algo_bit drm_ttm_helper r8169 ttm drm_display_helper spi_loongson_pci xhci_pci cec xhci_pci_renesas spi_loongson_core hid_generic realtek gpio_loongson_64bit [ 56.697887] Process (udev-worker) (pid: 2742, threadinfo=00000000aee0d8b4, task=00000000a9eff1f3) [ 56.697890] Stack : 0000000000000000 ffff800004817e00 0000000000000000 0000251c00000000 [ 56.697896] 0000000000000000 00000011fffffffd 0000000000000000 0000000000000000 [ 56.697901] 0000000000000000 1b67a968695184b9 0000000000000000 90000001000b98d0 [ 56.697906] 90000001000bb8d0 900000011aa48028 0000000000000000 ffff800004f9d74c [ 56.697911] 90000001000ba000 ffff800004f9ce58 0000000000000000 ffff800005470440 [ 56.697916] ffff800004811f88 90000001000b98d0 9000000100da2aa8 90000001000bb8d0 [ 56.697921] 0000000000000000 90000001000ba000 900000011aa48028 ffff800004f9d74c [ 56.697926] ffff8000054704e8 90000001000bb8b8 90000001000ba000 0000000000000000 [ 56.697931] 90000001000bb8d0 9000000006307564 9000000005e666e0 90000001752359b8 [ 56.697936] 9000000008cbe400 900000000804d000 9000000005e666e0 0000000000000000 [ 56.697941] ... [ 56.697944] Call Trace: [ 56.697945] [<90000000066e35c8>] strstr+0x28/0x120 [ 56.697950] [] wacom_update_name+0xe0/0x300 [wacom] [ 56.698000] [] wacom_parse_and_register+0x338/0x900 [wacom] [ 56.698050] [] wacom_probe+0x32c/0x420 [wacom] [ 56.698099] [<9000000006307564>] hid_device_probe+0x144/0x260 [ 56.698103] [<9000000005e65d68>] really_probe+0x208/0x540 [ 56.698109] [<9000000005e661dc>] __driver_probe_device+0x13c/0x1e0 [ 56.698112] [<9000000005e66620>] driver_probe_device+0x40/0x100 [ 56.698116] [<9000000005e6680c>] __device_attach_driver+0x12c/0x180 [ 56.698119] [<9000000005e62bc8>] bus_for_each_drv+0x88/0x160 [ 56.698123] [<9000000005e66468>] __device_attach+0x108/0x260 [ 56.698126] [<9000000005e63918>] device_reprobe+0x78/0x100 [ 56.698129] [<9000000005e62a68>] bus_for_each_dev+0x88/0x160 [ 56.698132] [<9000000006304e54>] __hid_bus_driver_added+0x34/0x80 [ 56.698134] [<9000000005e62bc8>] bus_for_each_drv+0x88/0x160 [ 56.698137] [<9000000006304df0>] __hid_register_driver+0x70/0xa0 [ 56.698142] [<9000000004e10fe4>] do_one_initcall+0x104/0x320 [ 56.698146] [<9000000004f38150>] do_init_module+0x90/0x2c0 [ 56.698151] [<9000000004f3a3d8>] init_module_from_file+0xb8/0x120 [ 56.698155] [<9000000004f3a590>] idempotent_init_module+0x150/0x3a0 [ 56.698159] [<9000000004f3a890>] sys_finit_module+0xb0/0x140 [ 56.698163] [<900000000671e4e8>] do_syscall+0x88/0xc0 [ 56.698166] [<9000000004e12404>] handle_syscall+0xc4/0x160 [ 56.698171] Code: 0011958f 00150224 5800cd85 <2a00022c> 00150004 4000c180 0015022c 03400000 03400000 [ 56.698192] ---[ end trace 0000000000000000 ]--- Fixes: 09dc28acaec7 ("HID: wacom: Improve generic name generation") Reported-by: Zhenxing Chen Co-developed-by: Xu Rao Signed-off-by: Xu Rao Signed-off-by: WangYuli Link: https://patch.msgid.link/B31757FE8E1544CF+20241125052616.18261-1-wangyuli@uniontech.com Cc: stable@vger.kernel.org Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman Signed-off-by: zyf1116 --- drivers/hid/wacom_sys.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 1a7e1d3e7a37..56e9345e9646 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2223,7 +2223,8 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix) if (hid_is_usb(wacom->hdev)) { struct usb_interface *intf = to_usb_interface(wacom->hdev->dev.parent); struct usb_device *dev = interface_to_usbdev(intf); - product_name = dev->product; + if (dev->product != NULL) + product_name = dev->product; } if (wacom->hdev->bus == BUS_I2C) { -- Gitee From 8261df5c111250eed47047397d4789a1348cd4fe Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Fri, 22 Nov 2024 13:10:30 +0100 Subject: [PATCH 031/116] bpf: fix OOB devmap writes when deleting elements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.231 commit 70f3de869865f9c3da0508a5ea29f6f4c1889057 category: bugfix issue: NA CVE: CVE-2024-56615 Signed-off-by: zyf1116 --------------------------------------- commit ab244dd7cf4c291f82faacdc50b45cc0f55b674d upstream. Jordy reported issue against XSKMAP which also applies to DEVMAP - the index used for accessing map entry, due to being a signed integer, causes the OOB writes. Fix is simple as changing the type from int to u32, however, when compared to XSKMAP case, one more thing needs to be addressed. When map is released from system via dev_map_free(), we iterate through all of the entries and an iterator variable is also an int, which implies OOB accesses. Again, change it to be u32. Example splat below: [ 160.724676] BUG: unable to handle page fault for address: ffffc8fc2c001000 [ 160.731662] #PF: supervisor read access in kernel mode [ 160.736876] #PF: error_code(0x0000) - not-present page [ 160.742095] PGD 0 P4D 0 [ 160.744678] Oops: Oops: 0000 [#1] PREEMPT SMP [ 160.749106] CPU: 1 UID: 0 PID: 520 Comm: kworker/u145:12 Not tainted 6.12.0-rc1+ #487 [ 160.757050] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 [ 160.767642] Workqueue: events_unbound bpf_map_free_deferred [ 160.773308] RIP: 0010:dev_map_free+0x77/0x170 [ 160.777735] Code: 00 e8 fd 91 ed ff e8 b8 73 ed ff 41 83 7d 18 19 74 6e 41 8b 45 24 49 8b bd f8 00 00 00 31 db 85 c0 74 48 48 63 c3 48 8d 04 c7 <48> 8b 28 48 85 ed 74 30 48 8b 7d 18 48 85 ff 74 05 e8 b3 52 fa ff [ 160.796777] RSP: 0018:ffffc9000ee1fe38 EFLAGS: 00010202 [ 160.802086] RAX: ffffc8fc2c001000 RBX: 0000000080000000 RCX: 0000000000000024 [ 160.809331] RDX: 0000000000000000 RSI: 0000000000000024 RDI: ffffc9002c001000 [ 160.816576] RBP: 0000000000000000 R08: 0000000000000023 R09: 0000000000000001 [ 160.823823] R10: 0000000000000001 R11: 00000000000ee6b2 R12: dead000000000122 [ 160.831066] R13: ffff88810c928e00 R14: ffff8881002df405 R15: 0000000000000000 [ 160.838310] FS: 0000000000000000(0000) GS:ffff8897e0c40000(0000) knlGS:0000000000000000 [ 160.846528] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 160.852357] CR2: ffffc8fc2c001000 CR3: 0000000005c32006 CR4: 00000000007726f0 [ 160.859604] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 160.866847] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 160.874092] PKRU: 55555554 [ 160.876847] Call Trace: [ 160.879338] [ 160.881477] ? __die+0x20/0x60 [ 160.884586] ? page_fault_oops+0x15a/0x450 [ 160.888746] ? search_extable+0x22/0x30 [ 160.892647] ? search_bpf_extables+0x5f/0x80 [ 160.896988] ? exc_page_fault+0xa9/0x140 [ 160.900973] ? asm_exc_page_fault+0x22/0x30 [ 160.905232] ? dev_map_free+0x77/0x170 [ 160.909043] ? dev_map_free+0x58/0x170 [ 160.912857] bpf_map_free_deferred+0x51/0x90 [ 160.917196] process_one_work+0x142/0x370 [ 160.921272] worker_thread+0x29e/0x3b0 [ 160.925082] ? rescuer_thread+0x4b0/0x4b0 [ 160.929157] kthread+0xd4/0x110 [ 160.932355] ? kthread_park+0x80/0x80 [ 160.936079] ret_from_fork+0x2d/0x50 [ 160.943396] ? kthread_park+0x80/0x80 [ 160.950803] ret_from_fork_asm+0x11/0x20 [ 160.958482] Fixes: 546ac1ffb70d ("bpf: add devmap, a map for storing net device references") CC: stable@vger.kernel.org Reported-by: Jordy Zomer Suggested-by: Jordy Zomer Reviewed-by: Toke Høiland-Jørgensen Acked-by: John Fastabend Signed-off-by: Maciej Fijalkowski Link: https://lore.kernel.org/r/20241122121030.716788-3-maciej.fijalkowski@intel.com Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman Signed-off-by: zyf1116 --- kernel/bpf/devmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 07b5edb2c70f..8a0bb4c1b7a3 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -182,7 +182,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) static void dev_map_free(struct bpf_map *map) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int i; + u32 i; /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, * so the programs (can be more than one that used this map) were @@ -541,7 +541,7 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *old_dev; - int k = *(u32 *)key; + u32 k = *(u32 *)key; if (k >= map->max_entries) return -EINVAL; @@ -563,7 +563,7 @@ static int dev_map_hash_delete_elem(struct bpf_map *map, void *key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *old_dev; - int k = *(u32 *)key; + u32 k = *(u32 *)key; unsigned long flags; int ret = -ENOENT; -- Gitee From dfa5245ea9a2d00233a8bb5e942bb45b5e30ed1f Mon Sep 17 00:00:00 2001 From: guoweikang Date: Wed, 20 Nov 2024 13:27:49 +0800 Subject: [PATCH 032/116] ftrace: Fix regression with module command in stack_trace_filter stable inclusion from stable-v5.10.231 commit 5dabb7af57bc72308a6e2e81a5dd756eef283803 category: bugfix issue: NA CVE: CVE-2024-56569 Signed-off-by: zyf1116 --------------------------------------- commit 45af52e7d3b8560f21d139b3759735eead8b1653 upstream. When executing the following command: # echo "write*:mod:ext3" > /sys/kernel/tracing/stack_trace_filter The current mod command causes a null pointer dereference. While commit 0f17976568b3f ("ftrace: Fix regression with module command in stack_trace_filter") has addressed part of the issue, it left a corner case unhandled, which still results in a kernel crash. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20241120052750.275463-1-guoweikang.kernel@gmail.com Fixes: 04ec7bb642b77 ("tracing: Have the trace_array hold the list of registered func probes"); Signed-off-by: guoweikang Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: zyf1116 --- kernel/trace/ftrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8dcac51b492b..37a0e4f4b1cf 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4297,6 +4297,9 @@ ftrace_mod_callback(struct trace_array *tr, struct ftrace_hash *hash, char *func; int ret; + if (!tr) + return -ENODEV; + /* match_records() modifies func, and we need the original */ func = kstrdup(func_orig, GFP_KERNEL); if (!func) -- Gitee From 36ab8d6af3a88138c9009422157d920f1b753a88 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 25 Nov 2024 22:53:14 +0200 Subject: [PATCH 033/116] drm/dp_mst: Fix MST sideband message body length check mainline inclusion from mainline-v6.13-rc2 commit bd2fccac61b40eaf08d9546acc9fef958bfe4763 category: bugfix issue: NA CVE: CVE-2024-56616 Signed-off-by: zyf1116 --------------------------------------- Fix the MST sideband message body length check, which must be at least 1 byte accounting for the message body CRC (aka message data CRC) at the end of the message. This fixes a case where an MST branch device returns a header with a correct header CRC (indicating a correctly received body length), with the body length being incorrectly set to 0. This will later lead to a memory corruption in drm_dp_sideband_append_payload() and the following errors in dmesg: UBSAN: array-index-out-of-bounds in drivers/gpu/drm/display/drm_dp_mst_topology.c:786:25 index -1 is out of range for type 'u8 [48]' Call Trace: drm_dp_sideband_append_payload+0x33d/0x350 [drm_display_helper] drm_dp_get_one_sb_msg+0x3ce/0x5f0 [drm_display_helper] drm_dp_mst_hpd_irq_handle_event+0xc8/0x1580 [drm_display_helper] memcpy: detected field-spanning write (size 18446744073709551615) of single field "&msg->msg[msg->curlen]" at drivers/gpu/drm/display/drm_dp_mst_topology.c:791 (size 256) Call Trace: drm_dp_sideband_append_payload+0x324/0x350 [drm_display_helper] drm_dp_get_one_sb_msg+0x3ce/0x5f0 [drm_display_helper] drm_dp_mst_hpd_irq_handle_event+0xc8/0x1580 [drm_display_helper] Cc: Cc: Lyude Paul Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20241125205314.1725887-1-imre.deak@intel.com Signed-off-by: zyf1116 --- drivers/gpu/drm/drm_dp_mst_topology.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 27305f339881..0eb2f30c1e3e 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -318,6 +318,9 @@ static bool drm_dp_decode_sideband_msg_hdr(struct drm_dp_sideband_msg_hdr *hdr, hdr->broadcast = (buf[idx] >> 7) & 0x1; hdr->path_msg = (buf[idx] >> 6) & 0x1; hdr->msg_len = buf[idx] & 0x3f; + if (hdr->msg_len < 1) /* min space for body CRC */ + return false; + idx++; hdr->somt = (buf[idx] >> 7) & 0x1; hdr->eomt = (buf[idx] >> 6) & 0x1; -- Gitee From cb52ecd62a8c6f2e7621efada2c09a033a06c061 Mon Sep 17 00:00:00 2001 From: Benoit Sevens Date: Thu, 7 Nov 2024 14:22:02 +0000 Subject: [PATCH 034/116] media: uvcvideo: Skip parsing frames of type UVC_VS_UNDEFINED in uvc_parse_format stable inclusion from stable-v5.10.230 commit faff5bbb2762c44ec7426037b3000e77a11d6773 category: bugfix issue: NA CVE: CVE-2024-53104 Signed-off-by: zyf1116 --------------------------------------- commit ecf2b43018da9579842c774b7f35dbe11b5c38dd upstream. This can lead to out of bounds writes since frames of this type were not taken into account when calculating the size of the frames buffer in uvc_parse_streaming. Fixes: c0efd232929c ("V4L/DVB (8145a): USB Video Class driver") Signed-off-by: Benoit Sevens Cc: stable@vger.kernel.org Acked-by: Greg Kroah-Hartman Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman Signed-off-by: zyf1116 --- drivers/media/usb/uvc/uvc_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 6334f99f1854..12fa3ff387a8 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -661,7 +661,7 @@ static int uvc_parse_format(struct uvc_device *dev, /* Parse the frame descriptors. Only uncompressed, MJPEG and frame * based formats have frame descriptors. */ - while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && + while (ftype && buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && buffer[2] == ftype) { frame = &format->frame[format->nframes]; if (ftype != UVC_VS_FRAME_FRAME_BASED) -- Gitee From 514f576257dcb8acb060b4c16633b37c6aecdcaa Mon Sep 17 00:00:00 2001 From: Lianqin Hu Date: Tue, 3 Dec 2024 12:14:16 +0000 Subject: [PATCH 035/116] usb: gadget: u_serial: Fix the issue that gs_start_io crashed due to accessing null pointer stable inclusion from stable-v5.10.232 commit 28b3c03a6790de1f6f2683919ad657840f0f0f58 category: bugfix issue: NA CVE: CVE-2024-56670 Signed-off-by: zyf1116 --------------------------------------- commit 4cfbca86f6a8b801f3254e0e3c8f2b1d2d64be2b upstream. Considering that in some extreme cases, when u_serial driver is accessed by multiple threads, Thread A is executing the open operation and calling the gs_open, Thread B is executing the disconnect operation and calling the gserial_disconnect function,The port->port_usb pointer will be set to NULL. E.g. Thread A Thread B gs_open() gadget_unbind_driver() gs_start_io() composite_disconnect() gs_start_rx() gserial_disconnect() ... ... spin_unlock(&port->port_lock) status = usb_ep_queue() spin_lock(&port->port_lock) spin_lock(&port->port_lock) port->port_usb = NULL gs_free_requests(port->port_usb->in) spin_unlock(&port->port_lock) Crash This causes thread A to access a null pointer (port->port_usb is null) when calling the gs_free_requests function, causing a crash. If port_usb is NULL, the release request will be skipped as it will be done by gserial_disconnect. So add a null pointer check to gs_start_io before attempting to access the value of the pointer port->port_usb. Call trace: gs_start_io+0x164/0x25c gs_open+0x108/0x13c tty_open+0x314/0x638 chrdev_open+0x1b8/0x258 do_dentry_open+0x2c4/0x700 vfs_open+0x2c/0x3c path_openat+0xa64/0xc60 do_filp_open+0xb8/0x164 do_sys_openat2+0x84/0xf0 __arm64_sys_openat+0x70/0x9c invoke_syscall+0x58/0x114 el0_svc_common+0x80/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x38/0x68 Fixes: c1dca562be8a ("usb gadget: split out serial core") Cc: stable@vger.kernel.org Suggested-by: Prashanth K Signed-off-by: Lianqin Hu Acked-by: Prashanth K Link: https://lore.kernel.org/r/TYUPR06MB62178DC3473F9E1A537DCD02D2362@TYUPR06MB6217.apcprd06.prod.outlook.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: zyf1116 --- drivers/usb/gadget/function/u_serial.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index a717b53847a8..4fd21772a7aa 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -572,9 +572,12 @@ static int gs_start_io(struct gs_port *port) * we didn't in gs_start_tx() */ tty_wakeup(port->port.tty); } else { - gs_free_requests(ep, head, &port->read_allocated); - gs_free_requests(port->port_usb->in, &port->write_pool, - &port->write_allocated); + /* Free reqs only if we are still connected */ + if (port->port_usb) { + gs_free_requests(ep, head, &port->read_allocated); + gs_free_requests(port->port_usb->in, &port->write_pool, + &port->write_allocated); + } status = -EIO; } -- Gitee From d6574d4e86520901b0e40e5b6ca3d6b7d0d044ed Mon Sep 17 00:00:00 2001 From: Li Zetao Date: Thu, 10 Oct 2024 23:41:13 +0800 Subject: [PATCH 036/116] media: ts2020: fix null-ptr-deref in ts2020_probe() stable inclusion from stable-v5.10.231 commit 5a53f97cd5977911850b695add057f9965c1a2d6 category: bugfix issue: NA CVE: CVE-2024-56574 Signed-off-by: zyf1116 --------------------------------------- commit 4a058b34b52ed3feb1f3ff6fd26aefeeeed20cba upstream. KASAN reported a null-ptr-deref issue when executing the following command: # echo ts2020 0x20 > /sys/bus/i2c/devices/i2c-0/new_device KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 53 UID: 0 PID: 970 Comm: systemd-udevd Not tainted 6.12.0-rc2+ #24 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009) RIP: 0010:ts2020_probe+0xad/0xe10 [ts2020] RSP: 0018:ffffc9000abbf598 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffffffffc0714809 RDX: 0000000000000002 RSI: ffff88811550be00 RDI: 0000000000000010 RBP: ffff888109868800 R08: 0000000000000001 R09: fffff52001577eb6 R10: 0000000000000000 R11: ffffc9000abbff50 R12: ffffffffc0714790 R13: 1ffff92001577eb8 R14: ffffffffc07190d0 R15: 0000000000000001 FS: 00007f95f13b98c0(0000) GS:ffff888149280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000555d2634b000 CR3: 0000000152236000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ts2020_probe+0xad/0xe10 [ts2020] i2c_device_probe+0x421/0xb40 really_probe+0x266/0x850 ... The cause of the problem is that when using sysfs to dynamically register an i2c device, there is no platform data, but the probe process of ts2020 needs to use platform data, resulting in a null pointer being accessed. Solve this problem by adding checks to platform data. Fixes: dc245a5f9b51 ("[media] ts2020: implement I2C client bindings") Cc: Signed-off-by: Li Zetao Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman Signed-off-by: zyf1116 --- drivers/media/dvb-frontends/ts2020.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/ts2020.c b/drivers/media/dvb-frontends/ts2020.c index 1f1004ccce1e..8e5eefe08941 100644 --- a/drivers/media/dvb-frontends/ts2020.c +++ b/drivers/media/dvb-frontends/ts2020.c @@ -554,13 +554,19 @@ static int ts2020_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct ts2020_config *pdata = client->dev.platform_data; - struct dvb_frontend *fe = pdata->fe; + struct dvb_frontend *fe; struct ts2020_priv *dev; int ret; u8 u8tmp; unsigned int utmp; char *chip_str; + if (!pdata) { + dev_err(&client->dev, "platform data is mandatory\n"); + return -EINVAL; + } + + fe = pdata->fe; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { ret = -ENOMEM; -- Gitee From caf1f866137ac23198977fc0c042550afceb13ee Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 12 Sep 2024 12:17:00 -0400 Subject: [PATCH 037/116] Bluetooth: hci_event: Align BR/EDR JUST_WORKS paring with LE mainline inclusion from mainline-v6.12-rc2 commit b25e11f978b63cb7857890edb3a698599cddb10e category: bugfix issue: NA CVE: CVE-2024-53144 Signed-off-by: zyf1116 --------------------------------------- This aligned BR/EDR JUST_WORKS method with LE which since 92516cd97fd4 ("Bluetooth: Always request for user confirmation for Just Works") always request user confirmation with confirm_hint set since the likes of bluetoothd have dedicated policy around JUST_WORKS method (e.g. main.conf:JustWorksRepairing). CVE: CVE-2024-8805 Cc: stable@vger.kernel.org Fixes: ba15a58b179e ("Bluetooth: Fix SSP acceptor just-works confirmation without MITM") Signed-off-by: Luiz Augusto von Dentz Tested-by: Kiran K Signed-off-by: zyf1116 Conflicts: net/bluetooth/hci_event.c --- net/bluetooth/hci_event.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 17000153d15a..2ef4dbecb5a6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4728,19 +4728,16 @@ static void hci_user_confirm_request_evt(struct hci_dev *hdev, goto unlock; } - /* If no side requires MITM protection; auto-accept */ + /* If no side requires MITM protection; use JUST_CFM method */ if ((!loc_mitm || conn->remote_cap == HCI_IO_NO_INPUT_OUTPUT) && (!rem_mitm || conn->io_capability == HCI_IO_NO_INPUT_OUTPUT)) { - /* If we're not the initiators request authorization to - * proceed from user space (mgmt_user_confirm with - * confirm_hint set to 1). The exception is if neither - * side had MITM or if the local IO capability is - * NoInputNoOutput, in which case we do auto-accept + /* If we're not the initiator of request authorization and the + * local IO capability is not NoInputNoOutput, use JUST_WORKS + * method (mgmt_user_confirm with confirm_hint set to 1). */ if (!test_bit(HCI_CONN_AUTH_PEND, &conn->flags) && - conn->io_capability != HCI_IO_NO_INPUT_OUTPUT && - (loc_mitm || rem_mitm)) { + conn->io_capability != HCI_IO_NO_INPUT_OUTPUT) { BT_DBG("Confirming auto-accept as acceptor"); confirm_hint = 1; goto confirm; -- Gitee From 3acf6657692cfa931bf08835132d5448ebfbccc5 Mon Sep 17 00:00:00 2001 From: Long Li Date: Mon, 4 Nov 2024 10:05:42 +0800 Subject: [PATCH 038/116] f2fs: fix race in concurrent f2fs_stop_gc_thread stable inclusion from stable-v6.6.64 commit 794fa8792d4eacac191f1cbcc2e81b7369e4662a category: bugfix issue: NA CVE: CVE-2024-53218 Signed-off-by: zyf1116 --------------------------------------- [ Upstream commit 7b0033dbc48340a1c1c3f12448ba17d6587ca092 ] In my test case, concurrent calls to f2fs shutdown report the following stack trace: Oops: general protection fault, probably for non-canonical address 0xc6cfff63bb5513fc: 0000 [#1] PREEMPT SMP PTI CPU: 0 UID: 0 PID: 678 Comm: f2fs_rep_shutdo Not tainted 6.12.0-rc5-next-20241029-g6fb2fa9805c5-dirty #85 Call Trace: ? show_regs+0x8b/0xa0 ? __die_body+0x26/0xa0 ? die_addr+0x54/0x90 ? exc_general_protection+0x24b/0x5c0 ? asm_exc_general_protection+0x26/0x30 ? kthread_stop+0x46/0x390 f2fs_stop_gc_thread+0x6c/0x110 f2fs_do_shutdown+0x309/0x3a0 f2fs_ioc_shutdown+0x150/0x1c0 __f2fs_ioctl+0xffd/0x2ac0 f2fs_ioctl+0x76/0xe0 vfs_ioctl+0x23/0x60 __x64_sys_ioctl+0xce/0xf0 x64_sys_call+0x2b1b/0x4540 do_syscall_64+0xa7/0x240 entry_SYSCALL_64_after_hwframe+0x76/0x7e The root cause is a race condition in f2fs_stop_gc_thread() called from different f2fs shutdown paths: [CPU0] [CPU1] ---------------------- ----------------------- f2fs_stop_gc_thread f2fs_stop_gc_thread gc_th = sbi->gc_thread gc_th = sbi->gc_thread kfree(gc_th) sbi->gc_thread = NULL < gc_th != NULL > kthread_stop(gc_th->f2fs_gc_task) //UAF The commit c7f114d864ac ("f2fs: fix to avoid use-after-free in f2fs_stop_gc_thread()") attempted to fix this issue by using a read semaphore to prevent races between shutdown and remount threads, but it fails to prevent all race conditions. Fix it by converting to write lock of s_umount in f2fs_do_shutdown(). Fixes: 7950e9ac638e ("f2fs: stop gc/discard thread after fs shutdown") Signed-off-by: Long Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: zyf1116 Conflicts: fs/f2fs/file.c --- fs/f2fs/file.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 80cb10d2c3b0..28d51fbd6fd6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2296,12 +2296,20 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) goto out; } + /* + * grab sb->s_umount to avoid racing w/ remount() and other shutdown + * paths. + */ + down_write(&sbi->sb->s_umount); + f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); f2fs_drop_discard_cmd(sbi); clear_opt(sbi, DISCARD); + up_write(&sbi->sb->s_umount); + f2fs_update_time(sbi, REQ_TIME); out: if (in != F2FS_GOING_DOWN_FULLSYNC) -- Gitee From a4d143261537bdb5243737d08a13894764efdc7f Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 12 Oct 2024 00:44:50 +0800 Subject: [PATCH 039/116] f2fs: fix null-ptr-deref in f2fs_submit_page_bio() mainline inclusion from mainline-v6.13-rc1 commit b7d0a97b28083084ebdd8e5c6bccd12e6ec18faa category: bugfix issue: NA CVE: CVE-2024-53221 Signed-off-by: zyf1116 --------------------------------------- There's issue as follows when concurrently installing the f2fs.ko module and mounting the f2fs file system: KASAN: null-ptr-deref in range [0x0000000000000020-0x0000000000000027] RIP: 0010:__bio_alloc+0x2fb/0x6c0 [f2fs] Call Trace: f2fs_submit_page_bio+0x126/0x8b0 [f2fs] __get_meta_page+0x1d4/0x920 [f2fs] get_checkpoint_version.constprop.0+0x2b/0x3c0 [f2fs] validate_checkpoint+0xac/0x290 [f2fs] f2fs_get_valid_checkpoint+0x207/0x950 [f2fs] f2fs_fill_super+0x1007/0x39b0 [f2fs] mount_bdev+0x183/0x250 legacy_get_tree+0xf4/0x1e0 vfs_get_tree+0x88/0x340 do_new_mount+0x283/0x5e0 path_mount+0x2b2/0x15b0 __x64_sys_mount+0x1fe/0x270 do_syscall_64+0x5f/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e Above issue happens as the biset of the f2fs file system is not initialized before register "f2fs_fs_type". To address above issue just register "f2fs_fs_type" at the last in init_f2fs_fs(). Ensure that all f2fs file system resources are initialized. Fixes: f543805fcd60 ("f2fs: introduce private bioset") Signed-off-by: Ye Bin Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: zyf1116 Conflicts: fs/f2fs/super.c --- fs/f2fs/super.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 45306a108b0f..e3c73415c182 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4166,9 +4166,6 @@ static int __init init_f2fs_fs(void) err = register_shrinker(&f2fs_shrinker_info); if (err) goto free_sysfs; - err = register_filesystem(&f2fs_fs_type); - if (err) - goto free_shrinker; f2fs_create_root_stats(); err = f2fs_init_post_read_processing(); if (err) @@ -4185,7 +4182,12 @@ static int __init init_f2fs_fs(void) err = f2fs_init_compress_cache(); if (err) goto free_compress_mempool; + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_compress_cache; return 0; +free_compress_cache: + f2fs_destroy_compress_cache(); free_compress_mempool: f2fs_destroy_compress_mempool(); free_bioset: @@ -4196,8 +4198,6 @@ static int __init init_f2fs_fs(void) f2fs_destroy_post_read_processing(); free_root_stats: f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); -free_shrinker: unregister_shrinker(&f2fs_shrinker_info); free_sysfs: f2fs_exit_sysfs(); @@ -4221,13 +4221,13 @@ static int __init init_f2fs_fs(void) static void __exit exit_f2fs_fs(void) { + unregister_filesystem(&f2fs_fs_type); f2fs_destroy_compress_cache(); f2fs_destroy_compress_mempool(); f2fs_destroy_bioset(); f2fs_destroy_bio_entry_cache(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); unregister_shrinker(&f2fs_shrinker_info); f2fs_exit_sysfs(); f2fs_destroy_garbage_collection_cache(); -- Gitee From 3ff5853182ac1606d12bdde928295fed033c7b37 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 24 Oct 2024 09:35:58 +0800 Subject: [PATCH 040/116] bpf: Check validity of link->type in bpf_link_show_fdinfo() mainline inclusion from mainline-v6.12-rc5 commit 8421d4c8762bd022cb491f2f0f7019ef51b4f0a7 category: bugfix issue: NA CVE: CVE-2024-53099 Signed-off-by: zyf1116 --------------------------------------- If a newly-added link type doesn't invoke BPF_LINK_TYPE(), accessing bpf_link_type_strs[link->type] may result in an out-of-bounds access. To spot such missed invocations early in the future, checking the validity of link->type in bpf_link_show_fdinfo() and emitting a warning when such invocations are missed. Signed-off-by: Hou Tao Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241024013558.1135167-3-houtao@huaweicloud.com Signed-off-by: zyf1116 Conflicts: kernel/bpf/syscall.c --- kernel/bpf/syscall.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ec424fba0020..657bca73c1e4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2399,15 +2399,20 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_link *link = filp->private_data; const struct bpf_prog *prog = link->prog; + enum bpf_link_type type = link->type; char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); + if (type < ARRAY_SIZE(bpf_link_type_strs) && bpf_link_type_strs[type]) { + seq_printf(m, "link_type:\t%s\n", bpf_link_type_strs[type]); + } else { + WARN_ONCE(1, "missing BPF_LINK_TYPE(...) for link type %u\n", type); + seq_printf(m, "link_type:\t<%u>\n", type); + } seq_printf(m, - "link_type:\t%s\n" "link_id:\t%u\n" "prog_tag:\t%s\n" "prog_id:\t%u\n", - bpf_link_type_strs[link->type], link->id, prog_tag, prog->aux->id); -- Gitee From 5696a7709794474e62c0b0d08e49a51437e812b0 Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Mon, 28 Oct 2024 11:23:36 +0800 Subject: [PATCH 041/116] exfat: fix out-of-bounds access of directory entries mainline inclusion from mainline-v6.10-rc2 commit 184fa506e392eb78364d9283c961217ff2c0617b category: bugfix issue: #IBGB6D CVE: CVE-2024-53147 Signed-off-by: zyf1116 --------------------------------------- In the case of the directory size is greater than or equal to the cluster size, if start_clu becomes an EOF cluster(an invalid cluster) due to file system corruption, then the directory entry where ei->hint_femp.eidx hint is outside the directory, resulting in an out-of-bounds access, which may cause further file system corruption. This commit adds a check for start_clu, if it is an invalid cluster, the file or directory will be treated as empty. Cc: stable@vger.kernel.org Signed-off-by: Yuezhang Mo Co-developed-by: Namjae Jeon Signed-off-by: Namjae Jeon Signed-off-by: zyf1116 Conflicts: fs/exfat/namei.c --- fs/exfat/namei.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index d62d961e278d..1ad11d1aaed2 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -655,14 +655,19 @@ static int exfat_find(struct inode *dir, struct qstr *qname, info->type = exfat_get_entry_type(ep); info->attr = le16_to_cpu(ep->dentry.file.attr); info->size = le64_to_cpu(ep2->dentry.stream.valid_size); + + info->start_clu = le32_to_cpu(ep2->dentry.stream.start_clu); + if (!is_valid_cluster(sbi, info->start_clu) && info->size) { + exfat_warn(sb, "start_clu is invalid cluster(0x%x)", + info->start_clu); + info->size = 0; + } + if (info->size == 0) { info->flags = ALLOC_NO_FAT_CHAIN; info->start_clu = EXFAT_EOF_CLUSTER; - } else { + } else info->flags = ep2->dentry.stream.flags; - info->start_clu = - le32_to_cpu(ep2->dentry.stream.start_clu); - } exfat_get_entry_time(sbi, &info->crtime, ep->dentry.file.create_tz, -- Gitee From 009f65ff9dade5eabce3bf4fa7b3110e36a44a9e Mon Sep 17 00:00:00 2001 From: May27May Date: Fri, 17 Jan 2025 10:04:26 +0800 Subject: [PATCH 042/116] mm/thp: fix compile issue of page defer split. issue: #IBII9G Signed-off-by: May27May --- mm/huge_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 97a42d154111..65f273cbaef7 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2778,7 +2778,7 @@ void __page_unqueue_deferred_split(struct page *page) * deferred_list. If folio is not in deferred_list, it's safe * to check without acquiring the split_queue_lock. */ - if (data_race(!list_empty(page_deferred_list(page))) { + if (data_race(!list_empty(page_deferred_list(page)))) { spin_lock_irqsave(&ds_queue->split_queue_lock, flags); if (!list_empty(page_deferred_list(page))) { ds_queue->split_queue_len--; -- Gitee From 2976b0a31dc06495e4c460b6fec8298530dd83fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thi=C3=A9baud=20Weksteen?= Date: Thu, 5 Dec 2024 12:09:19 +1100 Subject: [PATCH 043/116] selinux: ignore unknown extended permissions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-5.10.233 commit 712137b177b45f255ce5687e679d950fcb218256 category: bugfix issue: #IBKGHQ CVE: CVE-2024-57931 Signed-off-by: zhangshuqi --------------------------------------- commit 900f83cf376bdaf798b6f5dcb2eae0c822e908b6 upstream. When evaluating extended permissions, ignore unknown permissions instead of calling BUG(). This commit ensures that future permissions can be added without interfering with older kernels. Cc: stable@vger.kernel.org Fixes: fa1aa143ac4a ("selinux: extended permissions for ioctls") Signed-off-by: Thiébaud Weksteen Signed-off-by: Paul Moore Acked-by: Paul Moore Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- security/selinux/ss/services.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 3db8bd2158d9..128dfcf10b82 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -970,7 +970,10 @@ void services_compute_xperms_decision(struct extended_perms_decision *xpermd, xpermd->driver)) return; } else { - BUG(); + pr_warn_once( + "SELinux: unknown extended permission (%u) will be ignored\n", + node->datum.u.xperms->specified); + return; } if (node->key.specified == AVTAB_XPERMS_ALLOWED) { @@ -1007,7 +1010,8 @@ void services_compute_xperms_decision(struct extended_perms_decision *xpermd, node->datum.u.xperms->perms.p[i]; } } else { - BUG(); + pr_warn_once("SELinux: unknown specified key (%u)\n", + node->key.specified); } } -- Gitee From 3061e67a1470794119474a79311a578e861e6613 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A9vin=20L=27h=C3=B4pital?= Date: Thu, 7 Mar 2024 12:19:06 +0100 Subject: [PATCH 044/116] net: phy: fix phy_get_internal_delay accessing an empty array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-5.10.214 commit 06dd21045a7e8bc8701b0ebedcd9a30a6325878b category: bugfix issue: #IBKGHQ CVE: CVE-2024-27047 Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 4469c0c5b14a0919f5965c7ceac96b523eb57b79 ] The phy_get_internal_delay function could try to access to an empty array in the case that the driver is calling phy_get_internal_delay without defining delay_values and rx-internal-delay-ps or tx-internal-delay-ps is defined to 0 in the device-tree. This will lead to "unable to handle kernel NULL pointer dereference at virtual address 0". To avoid this kernel oops, the test should be delay >= 0. As there is already delay < 0 test just before, the test could only be size == 0. Fixes: 92252eec913b ("net: phy: Add a helper to return the index for of the internal delay") Co-developed-by: Enguerrand de Ribaucourt Signed-off-by: Enguerrand de Ribaucourt Signed-off-by: Kévin L'hôpital Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- drivers/net/phy/phy_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 095d16ceafcf..8654e05ddc41 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2769,7 +2769,7 @@ s32 phy_get_internal_delay(struct phy_device *phydev, struct device *dev, if (delay < 0) return delay; - if (delay && size == 0) + if (size == 0) return delay; if (delay < delay_values[0] || delay > delay_values[size - 1]) { -- Gitee From eb59e8afa4914f625cbd77b53e96acdfc8bac9ed Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Dec 2024 12:16:52 +0000 Subject: [PATCH 045/116] arm64: ptrace: fix partial SETREGSET for NT_ARM_TAGGED_ADDR_CTRL mainline inclusion from mainline-6.13 commit ca62d90085f4af36de745883faab9f8a7cbb45d3 category: bugfix issue: #IBKGHQ CVE: CVE-2024-57874 Signed-off-by: zhangshuqi --------------------------------------- Currently tagged_addr_ctrl_set() doesn't initialize the temporary 'ctrl' variable, and a SETREGSET call with a length of zero will leave this uninitialized. Consequently tagged_addr_ctrl_set() will consume an arbitrary value, potentially leaking up to 64 bits of memory from the kernel stack. The read is limited to a specific slot on the stack, and the issue does not provide a write mechanism. As set_tagged_addr_ctrl() only accepts values where bits [63:4] zero and rejects other values, a partial SETREGSET attempt will randomly succeed or fail depending on the value of the uninitialized value, and the exposure is significantly limited. Fix this by initializing the temporary value before copying the regset from userspace, as for other regsets (e.g. NT_PRSTATUS, NT_PRFPREG, NT_ARM_SYSTEM_CALL). In the case of a zero-length write, the existing value of the tagged address ctrl will be retained. The NT_ARM_TAGGED_ADDR_CTRL regset is only visible in the user_aarch64_view used by a native AArch64 task to manipulate another native AArch64 task. As get_tagged_addr_ctrl() only returns an error value when called for a compat task, tagged_addr_ctrl_get() and tagged_addr_ctrl_set() should never observe an error value from get_tagged_addr_ctrl(). Add a WARN_ON_ONCE() to both to indicate that such an error would be unexpected, and error handlnig is not missing in either case. Fixes: 2200aa7154cb ("arm64: mte: ptrace: Add NT_ARM_TAGGED_ADDR_CTRL regset") Cc: # 5.10.x Signed-off-by: Mark Rutland Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241205121655.1824269-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas Signed-off-by: zhangshuqi --- arch/arm64/kernel/ptrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 2817e39881fe..6c9e7662c07f 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1040,7 +1040,7 @@ static int tagged_addr_ctrl_get(struct task_struct *target, { long ctrl = get_tagged_addr_ctrl(target); - if (IS_ERR_VALUE(ctrl)) + if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl))) return ctrl; return membuf_write(&to, &ctrl, sizeof(ctrl)); @@ -1054,6 +1054,10 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct int ret; long ctrl; + ctrl = get_tagged_addr_ctrl(target); + if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl))) + return ctrl; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); if (ret) return ret; -- Gitee From dedb96363eb94eb7f5d2c408bf638a2207c84439 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 25 Nov 2024 22:16:12 +0100 Subject: [PATCH 046/116] iio: adc: rockchip_saradc: fix information leak in triggered buffer mainline inclusion from mainline-6.13 commit 38724591364e1e3b278b4053f102b49ea06ee17c category: bugfix issue: #IBKGHQ CVE: CVE-2024-57907 Signed-off-by: zhangshuqi --------------------------------------- The 'data' local struct is used to push data to user space from a triggered buffer, but it does not set values for inactive channels, as it only uses iio_for_each_active_channel() to assign new values. Initialize the struct to zero before using it to avoid pushing uninitialized information to userspace. Cc: stable@vger.kernel.org Fixes: 4e130dc7b413 ("iio: adc: rockchip_saradc: Add support iio buffers") Signed-off-by: Javier Carrasco Link: https://patch.msgid.link/20241125-iio_memset_scan_holes-v1-4-0cb6e98d895c@gmail.com Signed-off-by: Jonathan Cameron Signed-off-by: zhangshuqi --- drivers/iio/adc/rockchip_saradc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c index 12584f1631d8..deb58e232770 100644 --- a/drivers/iio/adc/rockchip_saradc.c +++ b/drivers/iio/adc/rockchip_saradc.c @@ -254,6 +254,8 @@ static irqreturn_t rockchip_saradc_trigger_handler(int irq, void *p) int ret; int i, j = 0; + memset(&data, 0, sizeof(data)); + mutex_lock(&i_dev->mlock); for_each_set_bit(i, i_dev->active_scan_mask, i_dev->masklength) { -- Gitee From 45db15e92ee302720d798a28ad388380f2964afd Mon Sep 17 00:00:00 2001 From: Yuezhang Mo Date: Fri, 13 Dec 2024 13:08:37 +0800 Subject: [PATCH 047/116] exfat: fix the infinite loop in exfat_readdir() mainline inclusion from mainline-6.13 commit fee873761bd978d077d8c55334b4966ac4cb7b59 category: bugfix issue: #IBKGHQ CVE: CVE-2024-57940 Signed-off-by: zhangshuqi --------------------------------------- If the file system is corrupted so that a cluster is linked to itself in the cluster chain, and there is an unused directory entry in the cluster, 'dentry' will not be incremented, causing condition 'dentry < max_dentries' unable to prevent an infinite loop. This infinite loop causes s_lock not to be released, and other tasks will hang, such as exfat_sync_fs(). This commit stops traversing the cluster chain when there is unused directory entry in the cluster to avoid this infinite loop. Reported-by: syzbot+205c2644abdff9d3f9fc@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=205c2644abdff9d3f9fc Tested-by: syzbot+205c2644abdff9d3f9fc@syzkaller.appspotmail.com Fixes: ca06197382bd ("exfat: add directory operations") Signed-off-by: Yuezhang Mo Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon Signed-off-by: zhangshuqi --- fs/exfat/dir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index db735a0d32fc..ce28691fae76 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -125,7 +125,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent type = exfat_get_entry_type(ep); if (type == TYPE_UNUSED) { brelse(bh); - break; + goto out; } if (type != TYPE_FILE && type != TYPE_DIR) { @@ -185,6 +185,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent } } +out: dir_entry->namebuf.lfn[0] = '\0'; *cpos = EXFAT_DEN_TO_B(dentry); return 0; -- Gitee From 16fabe4d4705a5934e6c05e71f3fefae72da5338 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 19 Dec 2024 12:53:01 +0100 Subject: [PATCH 048/116] fs: relax assertions on failure to encode file handles mainline inclusion from mainline-6.13 commit 974e3fe0ac61de85015bbe5a4990cf4127b304b2 category: bugfix issue: #IBKGHQ CVE: CVE-2024-57924 Signed-off-by: zhangshuqi --------------------------------------- Encoding file handles is usually performed by a filesystem >encode_fh() method that may fail for various reasons. The legacy users of exportfs_encode_fh(), namely, nfsd and name_to_handle_at(2) syscall are ready to cope with the possibility of failure to encode a file handle. There are a few other users of exportfs_encode_{fh,fid}() that currently have a WARN_ON() assertion when ->encode_fh() fails. Relax those assertions because they are wrong. The second linked bug report states commit 16aac5ad1fa9 ("ovl: support encoding non-decodable file handles") in v6.6 as the regressing commit, but this is not accurate. The aforementioned commit only increases the chances of the assertion and allows triggering the assertion with the reproducer using overlayfs, inotify and drop_caches. Triggering this assertion was always possible with other filesystems and other reasons of ->encode_fh() failures and more particularly, it was also possible with the exact same reproducer using overlayfs that is mounted with options index=on,nfs_export=on also on kernels < v6.6. Therefore, I am not listing the aforementioned commit as a Fixes commit. Backport hint: this patch will have a trivial conflict applying to v6.6.y, and other trivial conflicts applying to stable kernels < v6.6. Reported-by: syzbot+ec07f6f5ce62b858579f@syzkaller.appspotmail.com Tested-by: syzbot+ec07f6f5ce62b858579f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-unionfs/671fd40c.050a0220.4735a.024f.GAE@google.com/ Reported-by: Dmitry Safonov Closes: https://lore.kernel.org/linux-fsdevel/CAGrbwDTLt6drB9eaUagnQVgdPBmhLfqqxAf3F+Juqy_o6oP8uw@mail.gmail.com/ Cc: stable@vger.kernel.org Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/r/20241219115301.465396-1-amir73il@gmail.com Signed-off-by: Christian Brauner Signed-off-by: zhangshuqi --- fs/notify/fdinfo.c | 4 +--- fs/overlayfs/copy_up.c | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 765b50aeadd2..ba608381486d 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -50,10 +50,8 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode) size = f.handle.handle_bytes >> 2; ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, NULL); - if ((ret == FILEID_INVALID) || (ret < 0)) { - WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); + if ((ret == FILEID_INVALID) || (ret < 0)) return; - } f.handle.handle_type = ret; f.handle.handle_bytes = size * sizeof(u32); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 65ac504595ba..8557180bd7e1 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -302,9 +302,8 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper) buflen = (dwords << 2); err = -EIO; - if (WARN_ON(fh_type < 0) || - WARN_ON(buflen > MAX_HANDLE_SZ) || - WARN_ON(fh_type == FILEID_INVALID)) + if (fh_type < 0 || fh_type == FILEID_INVALID || + WARN_ON(buflen > MAX_HANDLE_SZ)) goto out_err; fh->fb.version = OVL_FH_VERSION; -- Gitee From 268e7d281f6ebd399f5a3848ec7344d4a0f99649 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 26 Feb 2024 20:34:06 +0100 Subject: [PATCH 049/116] wifi: cfg80211: check A-MSDU format more carefully mainline inclusion from mainline-6.9 commit 9ad7974856926129f190ffbe3beea78460b3b7cc category: bugfix issue: #IBKGHQ CVE: CVE-2024-35937 Signed-off-by: zhangshuqi --------------------------------------- If it looks like there's another subframe in the A-MSDU but the header isn't fully there, we can end up reading data out of bounds, only to discard later. Make this a bit more careful and check if the subframe header can even be present. Reported-by: syzbot+d050d437fe47d479d210@syzkaller.appspotmail.com Link: https://msgid.link/20240226203405.a731e2c95e38.I82ce7d8c0cc8970ce29d0a39fdc07f1ffc425be4@changeid Signed-off-by: Johannes Berg Signed-off-by: zhangshuqi --- net/wireless/util.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 4b32e85c2d9a..5177fd63b1b8 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -751,24 +751,27 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, struct sk_buff *frame = NULL; u16 ethertype; u8 *payload; - int offset = 0, remaining; + int offset = 0; struct ethhdr eth; bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb); bool reuse_skb = false; bool last = false; while (!last) { + int remaining = skb->len - offset; unsigned int subframe_len; int len; u8 padding; + if (sizeof(eth) > remaining) + goto purge; + skb_copy_bits(skb, offset, ð, sizeof(eth)); len = ntohs(eth.h_proto); subframe_len = sizeof(struct ethhdr) + len; padding = (4 - subframe_len) & 0x3; /* the last MSDU has no padding */ - remaining = skb->len - offset; if (subframe_len > remaining) goto purge; /* mitigate A-MSDU aggregation injection attacks */ -- Gitee From 0559dfcba49ec4b7a75c519d30ded93fc687fb05 Mon Sep 17 00:00:00 2001 From: Yang Erkun Date: Mon, 21 Oct 2024 22:23:42 +0800 Subject: [PATCH 050/116] SUNRPC: make sure cache entry active before cache_show mainline inclusion from mainline-6.13 commit 2862eee078a4d2d1f584e7f24fa50dddfa5f3471 category: bugfix issue: #IBKGHQ CVE: CVE-2024-53174 Signed-off-by: zhangshuqi --------------------------------------- The function `c_show` was called with protection from RCU. This only ensures that `cp` will not be freed. Therefore, the reference count for `cp` can drop to zero, which will trigger a refcount use-after-free warning when `cache_get` is called. To resolve this issue, use `cache_get_rcu` to ensure that `cp` remains active. ------------[ cut here ]------------ refcount_t: addition on 0; use-after-free. WARNING: CPU: 7 PID: 822 at lib/refcount.c:25 refcount_warn_saturate+0xb1/0x120 CPU: 7 UID: 0 PID: 822 Comm: cat Not tainted 6.12.0-rc3+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.1-2.fc37 04/01/2014 RIP: 0010:refcount_warn_saturate+0xb1/0x120 Call Trace: c_show+0x2fc/0x380 [sunrpc] seq_read_iter+0x589/0x770 seq_read+0x1e5/0x270 proc_reg_read+0xe1/0x140 vfs_read+0x125/0x530 ksys_read+0xc1/0x160 do_syscall_64+0x5f/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Yang Erkun Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: zhangshuqi --- net/sunrpc/cache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 20c93b68505e..522e43f66ecd 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1434,7 +1434,9 @@ static int c_show(struct seq_file *m, void *p) seq_printf(m, "# expiry=%lld refcnt=%d flags=%lx\n", convert_to_wallclock(cp->expiry_time), kref_read(&cp->ref), cp->flags); - cache_get(cp); + if (!cache_get_rcu(cp)) + return 0; + if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ seq_puts(m, "# "); -- Gitee From 96d1223b9fefd8bf24bb869baea6e30650bd9302 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 16 Oct 2024 16:13:37 +0800 Subject: [PATCH 051/116] f2fs: fix to do sanity check on node blkaddr in truncate_node() mainline inclusion from mainline-6.13 commit 6babe00ccd34fc65b78ef8b99754e32b4385f23d category: bugfix issue: #IBKGHQ CVE: CVE-2024-56692 Signed-off-by: zhangshuqi --------------------------------------- syzbot reports a f2fs bug as below: ------------[ cut here ]------------ kernel BUG at fs/f2fs/segment.c:2534! RIP: 0010:f2fs_invalidate_blocks+0x35f/0x370 fs/f2fs/segment.c:2534 Call Trace: truncate_node+0x1ae/0x8c0 fs/f2fs/node.c:909 f2fs_remove_inode_page+0x5c2/0x870 fs/f2fs/node.c:1288 f2fs_evict_inode+0x879/0x15c0 fs/f2fs/inode.c:856 evict+0x4e8/0x9b0 fs/inode.c:723 f2fs_handle_failed_inode+0x271/0x2e0 fs/f2fs/inode.c:986 f2fs_create+0x357/0x530 fs/f2fs/namei.c:394 lookup_open fs/namei.c:3595 [inline] open_last_lookups fs/namei.c:3694 [inline] path_openat+0x1c03/0x3590 fs/namei.c:3930 do_filp_open+0x235/0x490 fs/namei.c:3960 do_sys_openat2+0x13e/0x1d0 fs/open.c:1415 do_sys_open fs/open.c:1430 [inline] __do_sys_openat fs/open.c:1446 [inline] __se_sys_openat fs/open.c:1441 [inline] __x64_sys_openat+0x247/0x2a0 fs/open.c:1441 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0010:f2fs_invalidate_blocks+0x35f/0x370 fs/f2fs/segment.c:2534 The root cause is: on a fuzzed image, blkaddr in nat entry may be corrupted, then it will cause system panic when using it in f2fs_invalidate_blocks(), to avoid this, let's add sanity check on nat blkaddr in truncate_node(). Reported-by: syzbot+33379ce4ac76acf7d0c7@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-f2fs-devel/0000000000009a6cd706224ca720@google.com/ Cc: stable@vger.kernel.org Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: zhangshuqi Conflicts: fs/f2fs/node.c --- fs/f2fs/node.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 02cb1c806c3e..c5e1933d3335 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -838,6 +838,15 @@ static int truncate_node(struct dnode_of_data *dn) if (err) return err; + if (ni.blk_addr != NEW_ADDR && + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC_ENHANCE)) { + f2fs_err(sbi, + "nat entry is corrupted, run fsck to fix it, ino:%u, " + "nid:%u, blkaddr:%u", ni.ino, ni.nid, ni.blk_addr); + set_sbi_flag(sbi, SBI_NEED_FSCK); + return -EFSCORRUPTED; + } + /* Deallocate node address */ f2fs_invalidate_blocks(sbi, ni.blk_addr); dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); -- Gitee From 481e4d300fa6481112df2090407466581067d97c Mon Sep 17 00:00:00 2001 From: Zijian Zhang Date: Wed, 16 Oct 2024 23:48:38 +0000 Subject: [PATCH 052/116] tcp_bpf: Fix the sk_mem_uncharge logic in tcp_bpf_sendmsg mainline inclusion from mainline-5.10.231 commit dbedc7e142df5ea238a46fdd7462c1c42cd36a10 category: bugfix issue: #IBKGHQ CVE: CVE-2024-56633 Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit ca70b8baf2bd125b2a4d96e76db79375c07d7ff2 ] The current sk memory accounting logic in __SK_REDIRECT is pre-uncharging tosend bytes, which is either msg->sg.size or a smaller value apply_bytes. Potential problems with this strategy are as follows: - If the actual sent bytes are smaller than tosend, we need to charge some bytes back, as in line 487, which is okay but seems not clean. - When tosend is set to apply_bytes, as in line 417, and (ret < 0), we may miss uncharging (msg->sg.size - apply_bytes) bytes. [...] 415 tosend = msg->sg.size; 416 if (psock->apply_bytes && psock->apply_bytes < tosend) 417 tosend = psock->apply_bytes; [...] 443 sk_msg_return(sk, msg, tosend); 444 release_sock(sk); 446 origsize = msg->sg.size; 447 ret = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress, 448 msg, tosend, flags); 449 sent = origsize - msg->sg.size; [...] 454 lock_sock(sk); 455 if (unlikely(ret < 0)) { 456 int free = sk_msg_free_nocharge(sk, msg); 458 if (!cork) 459 *copied -= free; 460 } [...] 487 if (eval == __SK_REDIRECT) 488 sk_mem_charge(sk, tosend - sent); [...] When running the selftest test_txmsg_redir_wait_sndmem with txmsg_apply, the following warning will be reported: ------------[ cut here ]------------ WARNING: CPU: 6 PID: 57 at net/ipv4/af_inet.c:156 inet_sock_destruct+0x190/0x1a0 Modules linked in: CPU: 6 UID: 0 PID: 57 Comm: kworker/6:0 Not tainted 6.12.0-rc1.bm.1-amd64+ #43 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Workqueue: events sk_psock_destroy RIP: 0010:inet_sock_destruct+0x190/0x1a0 RSP: 0018:ffffad0a8021fe08 EFLAGS: 00010206 RAX: 0000000000000011 RBX: ffff9aab4475b900 RCX: ffff9aab481a0800 RDX: 0000000000000303 RSI: 0000000000000011 RDI: ffff9aab4475b900 RBP: ffff9aab4475b990 R08: 0000000000000000 R09: ffff9aab40050ec0 R10: 0000000000000000 R11: ffff9aae6fdb1d01 R12: ffff9aab49c60400 R13: ffff9aab49c60598 R14: ffff9aab49c60598 R15: dead000000000100 FS: 0000000000000000(0000) GS:ffff9aae6fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffec7e47bd8 CR3: 00000001a1a1c004 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? __warn+0x89/0x130 ? inet_sock_destruct+0x190/0x1a0 ? report_bug+0xfc/0x1e0 ? handle_bug+0x5c/0xa0 ? exc_invalid_op+0x17/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? inet_sock_destruct+0x190/0x1a0 __sk_destruct+0x25/0x220 sk_psock_destroy+0x2b2/0x310 process_scheduled_works+0xa3/0x3e0 worker_thread+0x117/0x240 ? __pfx_worker_thread+0x10/0x10 kthread+0xcf/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x40 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 ---[ end trace 0000000000000000 ]--- In __SK_REDIRECT, a more concise way is delaying the uncharging after sent bytes are finalized, and uncharge this value. When (ret < 0), we shall invoke sk_msg_free. Same thing happens in case __SK_DROP, when tosend is set to apply_bytes, we may miss uncharging (msg->sg.size - apply_bytes) bytes. The same warning will be reported in selftest. [...] 468 case __SK_DROP: 469 default: 470 sk_msg_free_partial(sk, msg, tosend); 471 sk_msg_apply_bytes(psock, tosend); 472 *copied -= (tosend + delta); 473 return -EACCES; [...] So instead of sk_msg_free_partial we can do sk_msg_free here. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Fixes: 8ec95b94716a ("bpf, sockmap: Fix the sk->sk_forward_alloc warning of sk_stream_kill_queues") Signed-off-by: Zijian Zhang Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20241016234838.3167769-3-zijianzhang@bytedance.com Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- net/ipv4/tcp_bpf.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index d0ca1fc325cd..05f9b55a7b08 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -375,7 +375,6 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, cork = true; psock->cork = NULL; } - sk_msg_return(sk, msg, tosend); release_sock(sk); origsize = msg->sg.size; @@ -386,8 +385,9 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, sock_put(sk_redir); lock_sock(sk); + sk_mem_uncharge(sk, sent); if (unlikely(ret < 0)) { - int free = sk_msg_free_nocharge(sk, msg); + int free = sk_msg_free(sk, msg); if (!cork) *copied -= free; @@ -401,7 +401,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, break; case __SK_DROP: default: - sk_msg_free_partial(sk, msg, tosend); + sk_msg_free(sk, msg); sk_msg_apply_bytes(psock, tosend); *copied -= (tosend + delta); return -EACCES; @@ -417,11 +417,8 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, } if (msg && msg->sg.data[msg->sg.start].page_link && - msg->sg.data[msg->sg.start].length) { - if (eval == __SK_REDIRECT) - sk_mem_charge(sk, tosend - sent); + msg->sg.data[msg->sg.start].length) goto more_data; - } } return ret; } -- Gitee From 5e18132540a3f97d581865012a26d41d33deae54 Mon Sep 17 00:00:00 2001 From: Dimitri Fedrau Date: Mon, 9 Dec 2024 11:46:15 +0100 Subject: [PATCH 053/116] power: supply: gpio-charger: Fix set charge current limits mainline inclusion from mainline-5.10.233 commit b29c7783ac1fe36d639c089cf471ac7a46df05f0 category: bugfix issue: #IBKGHQ CVE: CVE-2024-57792 Signed-off-by: zhangshuqi --------------------------------------- commit afc6e39e824ad0e44b2af50a97885caec8d213d1 upstream. Fix set charge current limits for devices which allow to set the lowest charge current limit to be greater zero. If requested charge current limit is below lowest limit, the index equals current_limit_map_size which leads to accessing memory beyond allocated memory. Fixes: be2919d8355e ("power: supply: gpio-charger: add charge-current-limit feature") Cc: stable@vger.kernel.org Signed-off-by: Dimitri Fedrau Link: https://lore.kernel.org/r/20241209-fix-charge-current-limit-v1-1-760d9b8f2af3@liebherr.com Signed-off-by: Sebastian Reichel Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- drivers/power/supply/gpio-charger.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/power/supply/gpio-charger.c b/drivers/power/supply/gpio-charger.c index 68212b39785b..6139f736ecbe 100644 --- a/drivers/power/supply/gpio-charger.c +++ b/drivers/power/supply/gpio-charger.c @@ -67,6 +67,14 @@ static int set_charge_current_limit(struct gpio_charger *gpio_charger, int val) if (gpio_charger->current_limit_map[i].limit_ua <= val) break; } + + /* + * If a valid charge current limit isn't found, default to smallest + * current limitation for safety reasons. + */ + if (i >= gpio_charger->current_limit_map_size) + i = gpio_charger->current_limit_map_size - 1; + mapping = gpio_charger->current_limit_map[i]; for (i = 0; i < ndescs; i++) { -- Gitee From 413244004c445e58481dd6968caa7b0a957a5655 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 25 Oct 2024 12:27:53 +0200 Subject: [PATCH 054/116] s390/cpum_sf: Handle CPU hotplug remove during sampling mainline inclusion from mainline-5.10.231 commit 99192c735ed4bfdff0d215ec85c8a87a677cb898 category: bugfix issue: #IBKGHQ CVE: CVE-2024-57849 Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit a0bd7dacbd51c632b8e2c0500b479af564afadf3 ] CPU hotplug remove handling triggers the following function call sequence: CPUHP_AP_PERF_S390_SF_ONLINE --> s390_pmu_sf_offline_cpu() ... CPUHP_AP_PERF_ONLINE --> perf_event_exit_cpu() The s390 CPUMF sampling CPU hotplug handler invokes: s390_pmu_sf_offline_cpu() +--> cpusf_pmu_setup() +--> setup_pmc_cpu() +--> deallocate_buffers() This function de-allocates all sampling data buffers (SDBs) allocated for that CPU at event initialization. It also clears the PMU_F_RESERVED bit. The CPU is gone and can not be sampled. With the event still being active on the removed CPU, the CPU event hotplug support in kernel performance subsystem triggers the following function calls on the removed CPU: perf_event_exit_cpu() +--> perf_event_exit_cpu_context() +--> __perf_event_exit_context() +--> __perf_remove_from_context() +--> event_sched_out() +--> cpumsf_pmu_del() +--> cpumsf_pmu_stop() +--> hw_perf_event_update() to stop and remove the event. During removal of the event, the sampling device driver tries to read out the remaining samples from the sample data buffers (SDBs). But they have already been freed (and may have been re-assigned). This may lead to a use after free situation in which case the samples are most likely invalid. In the best case the memory has not been reassigned and still contains valid data. Remedy this situation and check if the CPU is still in reserved state (bit PMU_F_RESERVED set). In this case the SDBs have not been released an contain valid data. This is always the case when the event is removed (and no CPU hotplug off occured). If the PMU_F_RESERVED bit is not set, the SDB buffers are gone. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Heiko Carstens Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- arch/s390/kernel/perf_cpum_sf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index bcd31e0b4edb..af41550d34b9 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1900,7 +1900,9 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags) event->hw.state |= PERF_HES_STOPPED; if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { - hw_perf_event_update(event, 1); + /* CPU hotplug off removes SDBs. No samples to extract. */ + if (cpuhw->flags & PMU_F_RESERVED) + hw_perf_event_update(event, 1); event->hw.state |= PERF_HES_UPTODATE; } perf_pmu_enable(event->pmu); -- Gitee From a3905c487f410f126694708a359aa4b64d7d410d Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Mon, 18 Nov 2024 11:09:09 +0800 Subject: [PATCH 055/116] bpf: fix recursive lock when verdict program return SK_PASS mainline inclusion from mainline-6.13 commit 8ca2a1eeadf09862190b2810697702d803ceef2d category: bugfix issue: #IBKGHQ CVE: CVE-2024-56694 Signed-off-by: zhangshuqi --------------------------------------- When the stream_verdict program returns SK_PASS, it places the received skb into its own receive queue, but a recursive lock eventually occurs, leading to an operating system deadlock. This issue has been present since v6.9. ''' sk_psock_strp_data_ready write_lock_bh(&sk->sk_callback_lock) strp_data_ready strp_read_sock read_sock -> tcp_read_sock strp_recv cb.rcv_msg -> sk_psock_strp_read # now stream_verdict return SK_PASS without peer sock assign __SK_PASS = sk_psock_map_verd(SK_PASS, NULL) sk_psock_verdict_apply sk_psock_skb_ingress_self sk_psock_skb_ingress_enqueue sk_psock_data_ready read_lock_bh(&sk->sk_callback_lock) <= dead lock ''' This topic has been discussed before, but it has not been fixed. Previous discussion: https://lore.kernel.org/all/6684a5864ec86_403d20898@john.notmuch Fixes: 6648e613226e ("bpf, skmsg: Fix NULL pointer dereference in sk_psock_skb_ingress_enqueue") Reported-by: Vincent Whitchurch Signed-off-by: Jiayuan Chen Signed-off-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20241118030910.36230-2-mrpre@163.com Signed-off-by: Jakub Kicinski Signed-off-by: zhangshuqi --- net/core/skmsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index bb4fbc60b272..fb8d81aa570f 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -939,9 +939,9 @@ static void sk_psock_strp_data_ready(struct sock *sk) if (tls_sw_has_ctx_rx(sk)) { psock->parser.saved_data_ready(sk); } else { - write_lock_bh(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); strp_data_ready(&psock->parser.strp); - write_unlock_bh(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } } rcu_read_unlock(); -- Gitee From 8c63280caeea30cc07a369f7ac24be9e3167285a Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Tue, 12 Nov 2024 21:54:34 +0800 Subject: [PATCH 056/116] sunrpc: fix one UAF issue caused by sunrpc kernel tcp socket mainline inclusion from mainline-6.13 commit 3f23f96528e8fcf8619895c4c916c52653892ec1 category: bugfix issue: #IBKGHQ CVE: CVE-2024-53168 Signed-off-by: zhangshuqi --------------------------------------- BUG: KASAN: slab-use-after-free in tcp_write_timer_handler+0x156/0x3e0 Read of size 1 at addr ffff888111f322cd by task swapper/0/0 CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.0-rc4-dirty #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 Call Trace: dump_stack_lvl+0x68/0xa0 print_address_description.constprop.0+0x2c/0x3d0 print_report+0xb4/0x270 kasan_report+0xbd/0xf0 tcp_write_timer_handler+0x156/0x3e0 tcp_write_timer+0x66/0x170 call_timer_fn+0xfb/0x1d0 __run_timers+0x3f8/0x480 run_timer_softirq+0x9b/0x100 handle_softirqs+0x153/0x390 __irq_exit_rcu+0x103/0x120 irq_exit_rcu+0xe/0x20 sysvec_apic_timer_interrupt+0x76/0x90 asm_sysvec_apic_timer_interrupt+0x1a/0x20 RIP: 0010:default_idle+0xf/0x20 Code: 4c 01 c7 4c 29 c2 e9 72 ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 66 90 0f 00 2d 33 f8 25 00 fb f4 c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 90 90 90 90 90 RSP: 0018:ffffffffa2007e28 EFLAGS: 00000242 RAX: 00000000000f3b31 RBX: 1ffffffff4400fc7 RCX: ffffffffa09c3196 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff9f00590f RBP: 0000000000000000 R08: 0000000000000001 R09: ffffed102360835d R10: ffff88811b041aeb R11: 0000000000000001 R12: 0000000000000000 R13: ffffffffa202d7c0 R14: 0000000000000000 R15: 00000000000147d0 default_idle_call+0x6b/0xa0 cpuidle_idle_call+0x1af/0x1f0 do_idle+0xbc/0x130 cpu_startup_entry+0x33/0x40 rest_init+0x11f/0x210 start_kernel+0x39a/0x420 x86_64_start_reservations+0x18/0x30 x86_64_start_kernel+0x97/0xa0 common_startup_64+0x13e/0x141 Allocated by task 595: kasan_save_stack+0x24/0x50 kasan_save_track+0x14/0x30 __kasan_slab_alloc+0x87/0x90 kmem_cache_alloc_noprof+0x12b/0x3f0 copy_net_ns+0x94/0x380 create_new_namespaces+0x24c/0x500 unshare_nsproxy_namespaces+0x75/0xf0 ksys_unshare+0x24e/0x4f0 __x64_sys_unshare+0x1f/0x30 do_syscall_64+0x70/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Freed by task 100: kasan_save_stack+0x24/0x50 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0x54/0x70 kmem_cache_free+0x156/0x5d0 cleanup_net+0x5d3/0x670 process_one_work+0x776/0xa90 worker_thread+0x2e2/0x560 kthread+0x1a8/0x1f0 ret_from_fork+0x34/0x60 ret_from_fork_asm+0x1a/0x30 Reproduction script: mkdir -p /mnt/nfsshare mkdir -p /mnt/nfs/netns_1 mkfs.ext4 /dev/sdb mount /dev/sdb /mnt/nfsshare systemctl restart nfs-server chmod 777 /mnt/nfsshare exportfs -i -o rw,no_root_squash *:/mnt/nfsshare ip netns add netns_1 ip link add name veth_1_peer type veth peer veth_1 ifconfig veth_1_peer 11.11.0.254 up ip link set veth_1 netns netns_1 ip netns exec netns_1 ifconfig veth_1 11.11.0.1 ip netns exec netns_1 /root/iptables -A OUTPUT -d 11.11.0.254 -p tcp \ --tcp-flags FIN FIN -j DROP (note: In my environment, a DESTROY_CLIENTID operation is always sent immediately, breaking the nfs tcp connection.) ip netns exec netns_1 timeout -s 9 300 mount -t nfs -o proto=tcp,vers=4.1 \ 11.11.0.254:/mnt/nfsshare /mnt/nfs/netns_1 ip netns del netns_1 The reason here is that the tcp socket in netns_1 (nfs side) has been shutdown and closed (done in xs_destroy), but the FIN message (with ack) is discarded, and the nfsd side keeps sending retransmission messages. As a result, when the tcp sock in netns_1 processes the received message, it sends the message (FIN message) in the sending queue, and the tcp timer is re-established. When the network namespace is deleted, the net structure accessed by tcp's timer handler function causes problems. To fix this problem, let's hold netns refcnt for the tcp kernel socket as done in other modules. This is an ugly hack which can easily be backported to earlier kernels. A proper fix which cleans up the interfaces will follow, but may not be so easy to backport. Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.") Signed-off-by: Liu Jian Acked-by: Jeff Layton Reviewed-by: Kuniyuki Iwashima Signed-off-by: Trond Myklebust Signed-off-by: zhangshuqi --- net/sunrpc/svcsock.c | 5 +++++ net/sunrpc/xprtsock.c | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3d5ee042c501..9b9ac36e2438 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1474,6 +1474,11 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, newlen = error; if (protocol == IPPROTO_TCP) { + sock->sk->sk_net_refcnt = 1; + get_net(net); +#ifdef CONFIG_PROC_FS + this_cpu_add(*net->core.sock_inuse, 1); +#endif if ((error = kernel_listen(sock, 64)) < 0) goto bummer; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ae5b5380f0f0..e2ede0163e21 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1834,6 +1834,14 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, goto out; } + if (protocol == IPPROTO_TCP) { + sock->sk->sk_net_refcnt = 1; + get_net(xprt->xprt_net); +#ifdef CONFIG_PROC_FS + this_cpu_add(*xprt->xprt_net->core.sock_inuse, 1); +#endif + } + filp = sock_alloc_file(sock, O_NONBLOCK, NULL); if (IS_ERR(filp)) return ERR_CAST(filp); -- Gitee From 69920412cd39b3b71980c2bcf7bb05e739a10efc Mon Sep 17 00:00:00 2001 From: Omid Ehtemam-Haghighi Date: Tue, 5 Nov 2024 17:02:36 -0800 Subject: [PATCH 057/116] ipv6: Fix soft lockups in fib6_select_path under high next hop churn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-6.13 commit d9ccb18f83ea2bb654289b6ecf014fd267cc988b category: bugfix issue: #IBKGHQ CVE: CVE-2024-56703 Signed-off-by: zhangshuqi --------------------------------------- Soft lockups have been observed on a cluster of Linux-based edge routers located in a highly dynamic environment. Using the `bird` service, these routers continuously update BGP-advertised routes due to frequently changing nexthop destinations, while also managing significant IPv6 traffic. The lockups occur during the traversal of the multipath circular linked-list in the `fib6_select_path` function, particularly while iterating through the siblings in the list. The issue typically arises when the nodes of the linked list are unexpectedly deleted concurrently on a different core—indicated by their 'next' and 'previous' elements pointing back to the node itself and their reference count dropping to zero. This results in an infinite loop, leading to a soft lockup that triggers a system panic via the watchdog timer. Apply RCU primitives in the problematic code sections to resolve the issue. Where necessary, update the references to fib6_siblings to annotate or use the RCU APIs. Include a test script that reproduces the issue. The script periodically updates the routing table while generating a heavy load of outgoing IPv6 traffic through multiple iperf3 clients. It consistently induces infinite soft lockups within a couple of minutes. Kernel log: 0 [ffffbd13003e8d30] machine_kexec at ffffffff8ceaf3eb 1 [ffffbd13003e8d90] __crash_kexec at ffffffff8d0120e3 2 [ffffbd13003e8e58] panic at ffffffff8cef65d4 3 [ffffbd13003e8ed8] watchdog_timer_fn at ffffffff8d05cb03 4 [ffffbd13003e8f08] __hrtimer_run_queues at ffffffff8cfec62f 5 [ffffbd13003e8f70] hrtimer_interrupt at ffffffff8cfed756 6 [ffffbd13003e8fd0] __sysvec_apic_timer_interrupt at ffffffff8cea01af 7 [ffffbd13003e8ff0] sysvec_apic_timer_interrupt at ffffffff8df1b83d -- -- 8 [ffffbd13003d3708] asm_sysvec_apic_timer_interrupt at ffffffff8e000ecb [exception RIP: fib6_select_path+299] RIP: ffffffff8ddafe7b RSP: ffffbd13003d37b8 RFLAGS: 00000287 RAX: ffff975850b43600 RBX: ffff975850b40200 RCX: 0000000000000000 RDX: 000000003fffffff RSI: 0000000051d383e4 RDI: ffff975850b43618 RBP: ffffbd13003d3800 R8: 0000000000000000 R9: ffff975850b40200 R10: 0000000000000000 R11: 0000000000000000 R12: ffffbd13003d3830 R13: ffff975850b436a8 R14: ffff975850b43600 R15: 0000000000000007 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 9 [ffffbd13003d3808] ip6_pol_route at ffffffff8ddb030c 10 [ffffbd13003d3888] ip6_pol_route_input at ffffffff8ddb068c 11 [ffffbd13003d3898] fib6_rule_lookup at ffffffff8ddf02b5 12 [ffffbd13003d3928] ip6_route_input at ffffffff8ddb0f47 13 [ffffbd13003d3a18] ip6_rcv_finish_core.constprop.0 at ffffffff8dd950d0 14 [ffffbd13003d3a30] ip6_list_rcv_finish.constprop.0 at ffffffff8dd96274 15 [ffffbd13003d3a98] ip6_sublist_rcv at ffffffff8dd96474 16 [ffffbd13003d3af8] ipv6_list_rcv at ffffffff8dd96615 17 [ffffbd13003d3b60] __netif_receive_skb_list_core at ffffffff8dc16fec 18 [ffffbd13003d3be0] netif_receive_skb_list_internal at ffffffff8dc176b3 19 [ffffbd13003d3c50] napi_gro_receive at ffffffff8dc565b9 20 [ffffbd13003d3c80] ice_receive_skb at ffffffffc087e4f5 [ice] 21 [ffffbd13003d3c90] ice_clean_rx_irq at ffffffffc0881b80 [ice] 22 [ffffbd13003d3d20] ice_napi_poll at ffffffffc088232f [ice] 23 [ffffbd13003d3d80] __napi_poll at ffffffff8dc18000 24 [ffffbd13003d3db8] net_rx_action at ffffffff8dc18581 25 [ffffbd13003d3e40] __do_softirq at ffffffff8df352e9 26 [ffffbd13003d3eb0] run_ksoftirqd at ffffffff8ceffe47 27 [ffffbd13003d3ec0] smpboot_thread_fn at ffffffff8cf36a30 28 [ffffbd13003d3ee8] kthread at ffffffff8cf2b39f 29 [ffffbd13003d3f28] ret_from_fork at ffffffff8ce5fa64 30 [ffffbd13003d3f50] ret_from_fork_asm at ffffffff8ce03cbb Fixes: 66f5d6ce53e6 ("ipv6: replace rwlock with rcu and spinlock in fib6_table") Reported-by: Adrian Oliver Signed-off-by: Omid Ehtemam-Haghighi Cc: Shuah Khan Cc: Ido Schimmel Cc: Kuniyuki Iwashima Cc: Simon Horman Reviewed-by: David Ahern Link: https://patch.msgid.link/20241106010236.1239299-1-omid.ehtemamhaghighi@menlosecurity.com Signed-off-by: Jakub Kicinski Signed-off-by: zhangshuqi Conflicts: tools/testing/selftests/net/Makefile --- net/ipv6/ip6_fib.c | 8 +- net/ipv6/route.c | 45 ++- tools/testing/selftests/net/Makefile | 1 + .../net/ipv6_route_update_soft_lockup.sh | 262 ++++++++++++++++++ 4 files changed, 297 insertions(+), 19 deletions(-) create mode 100755 tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 23881e5ebb3d..9fbfa9b599cf 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1174,8 +1174,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, while (sibling) { if (sibling->fib6_metric == rt->fib6_metric && rt6_qualify_for_ecmp(sibling)) { - list_add_tail(&rt->fib6_siblings, - &sibling->fib6_siblings); + list_add_tail_rcu(&rt->fib6_siblings, + &sibling->fib6_siblings); break; } sibling = rcu_dereference_protected(sibling->fib6_next, @@ -1236,7 +1236,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, fib6_siblings) sibling->fib6_nsiblings--; rt->fib6_nsiblings = 0; - list_del_init(&rt->fib6_siblings); + list_del_rcu(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); return err; } @@ -1946,7 +1946,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, &rt->fib6_siblings, fib6_siblings) sibling->fib6_nsiblings--; rt->fib6_nsiblings = 0; - list_del_init(&rt->fib6_siblings); + list_del_rcu(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3a0f4e5040e1..abd3292d7b60 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -413,8 +413,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, struct flowi6 *fl6, int oif, bool have_oif_match, const struct sk_buff *skb, int strict) { - struct fib6_info *sibling, *next_sibling; struct fib6_info *match = res->f6i; + struct fib6_info *sibling; if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) goto out; @@ -437,8 +437,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound)) goto out; - list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, - fib6_siblings) { + list_for_each_entry_rcu(sibling, &match->fib6_siblings, + fib6_siblings) { const struct fib6_nh *nh = sibling->fib6_nh; int nh_upper_bound; @@ -5083,14 +5083,18 @@ static void ip6_route_mpath_notify(struct fib6_info *rt, * nexthop. Since sibling routes are always added at the end of * the list, find the first sibling of the last route appended */ + rcu_read_lock(); + if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { - rt = list_first_entry(&rt_last->fib6_siblings, - struct fib6_info, - fib6_siblings); + rt = list_first_or_null_rcu(&rt_last->fib6_siblings, + struct fib6_info, + fib6_siblings); } if (rt) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); + + rcu_read_unlock(); } static bool ip6_route_mpath_should_notify(const struct fib6_info *rt) @@ -5435,17 +5439,21 @@ static size_t rt6_nlmsg_size(struct fib6_info *f6i) nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size, &nexthop_len); } else { - struct fib6_info *sibling, *next_sibling; struct fib6_nh *nh = f6i->fib6_nh; + struct fib6_info *sibling; nexthop_len = 0; if (f6i->fib6_nsiblings) { rt6_nh_nlmsg_size(nh, &nexthop_len); - list_for_each_entry_safe(sibling, next_sibling, - &f6i->fib6_siblings, fib6_siblings) { + rcu_read_lock(); + + list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, + fib6_siblings) { rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len); } + + rcu_read_unlock(); } nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); } @@ -5605,7 +5613,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex)) goto nla_put_failure; } else if (rt->fib6_nsiblings) { - struct fib6_info *sibling, *next_sibling; + struct fib6_info *sibling; struct nlattr *mp; mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); @@ -5617,14 +5625,21 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, 0) < 0) goto nla_put_failure; - list_for_each_entry_safe(sibling, next_sibling, - &rt->fib6_siblings, fib6_siblings) { + rcu_read_lock(); + + list_for_each_entry_rcu(sibling, &rt->fib6_siblings, + fib6_siblings) { if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, sibling->fib6_nh->fib_nh_weight, - AF_INET6, 0) < 0) + AF_INET6, 0) < 0) { + rcu_read_unlock(); + goto nla_put_failure; + } } + rcu_read_unlock(); + nla_nest_end(skb, mp); } else if (rt->nh) { if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id)) @@ -6059,7 +6074,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, err = -ENOBUFS; seq = info->nlh ? info->nlh->nlmsg_seq : 0; - skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); + skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC); if (!skb) goto errout; @@ -6072,7 +6087,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, goto errout; } rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, - info->nlh, gfp_any()); + info->nlh, GFP_ATOMIC); return; errout: if (err < 0) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ef352477cac6..bbd147404351 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -21,6 +21,7 @@ TEST_PROGS += rxtimestamp.sh TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh +TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh new file mode 100755 index 000000000000..a6b2b1f9c641 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh @@ -0,0 +1,262 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing for potential kernel soft lockup during IPv6 routing table +# refresh under heavy outgoing IPv6 traffic. If a kernel soft lockup +# occurs, a kernel panic will be triggered to prevent associated issues. +# +# +# Test Environment Layout +# +# ┌----------------┐ ┌----------------┐ +# | SOURCE_NS | | SINK_NS | +# | NAMESPACE | | NAMESPACE | +# |(iperf3 clients)| |(iperf3 servers)| +# | | | | +# | | | | +# | ┌-----------| nexthops |---------┐ | +# | |veth_source|<--------------------------------------->|veth_sink|<┐ | +# | └-----------|2001:0DB8:1::0:1/96 2001:0DB8:1::1:1/96 |---------┘ | | +# | | ^ 2001:0DB8:1::1:2/96 | | | +# | | . . | fwd | | +# | ┌---------┐ | . . | | | +# | | IPv6 | | . . | V | +# | | routing | | . 2001:0DB8:1::1:80/96| ┌-----┐ | +# | | table | | . | | lo | | +# | | nexthop | | . └--------┴-----┴-┘ +# | | update | | ............................> 2001:0DB8:2::1:1/128 +# | └-------- ┘ | +# └----------------┘ +# +# The test script sets up two network namespaces, source_ns and sink_ns, +# connected via a veth link. Within source_ns, it continuously updates the +# IPv6 routing table by flushing and inserting IPV6_NEXTHOP_ADDR_COUNT nexthop +# IPs destined for SINK_LOOPBACK_IP_ADDR in sink_ns. This refresh occurs at a +# rate of 1/ROUTING_TABLE_REFRESH_PERIOD per second for TEST_DURATION seconds. +# +# Simultaneously, multiple iperf3 clients within source_ns generate heavy +# outgoing IPv6 traffic. Each client is assigned a unique port number starting +# at 5000 and incrementing sequentially. Each client targets a unique iperf3 +# server running in sink_ns, connected to the SINK_LOOPBACK_IFACE interface +# using the same port number. +# +# The number of iperf3 servers and clients is set to half of the total +# available cores on each machine. +# +# NOTE: We have tested this script on machines with various CPU specifications, +# ranging from lower to higher performance as listed below. The test script +# effectively triggered a kernel soft lockup on machines running an unpatched +# kernel in under a minute: +# +# - 1x Intel Xeon E-2278G 8-Core Processor @ 3.40GHz +# - 1x Intel Xeon E-2378G Processor 8-Core @ 2.80GHz +# - 1x AMD EPYC 7401P 24-Core Processor @ 2.00GHz +# - 1x AMD EPYC 7402P 24-Core Processor @ 2.80GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 1x Ampere Altra Q80-30 80-Core Processor @ 3.00GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 2x Intel Xeon Silver 4214 24-Core Processor @ 2.20GHz +# - 1x AMD EPYC 7502P 32-Core @ 2.50GHz +# - 1x Intel Xeon Gold 6314U 32-Core Processor @ 2.30GHz +# - 2x Intel Xeon Gold 6338 32-Core Processor @ 2.00GHz +# +# On less performant machines, you may need to increase the TEST_DURATION +# parameter to enhance the likelihood of encountering a race condition leading +# to a kernel soft lockup and avoid a false negative result. +# +# NOTE: The test may not produce the expected result in virtualized +# environments (e.g., qemu) due to differences in timing and CPU handling, +# which can affect the conditions needed to trigger a soft lockup. + +source lib.sh +source net_helper.sh + +TEST_DURATION=300 +ROUTING_TABLE_REFRESH_PERIOD=0.01 + +IPERF3_BITRATE="300m" + + +IPV6_NEXTHOP_ADDR_COUNT="128" +IPV6_NEXTHOP_ADDR_MASK="96" +IPV6_NEXTHOP_PREFIX="2001:0DB8:1" + + +SOURCE_TEST_IFACE="veth_source" +SOURCE_TEST_IP_ADDR="2001:0DB8:1::0:1/96" + +SINK_TEST_IFACE="veth_sink" +# ${SINK_TEST_IFACE} is populated with the following range of IPv6 addresses: +# 2001:0DB8:1::1:1 to 2001:0DB8:1::1:${IPV6_NEXTHOP_ADDR_COUNT} +SINK_LOOPBACK_IFACE="lo" +SINK_LOOPBACK_IP_MASK="128" +SINK_LOOPBACK_IP_ADDR="2001:0DB8:2::1:1" + +nexthop_ip_list="" +termination_signal="" +kernel_softlokup_panic_prev_val="" + +terminate_ns_processes_by_pattern() { + local ns=$1 + local pattern=$2 + + for pid in $(ip netns pids ${ns}); do + [ -e /proc/$pid/cmdline ] && grep -qe "${pattern}" /proc/$pid/cmdline && kill -9 $pid + done +} + +cleanup() { + echo "info: cleaning up namespaces and terminating all processes within them..." + + + # Terminate iperf3 instances running in the source_ns. To avoid race + # conditions, first iterate over the PIDs and terminate those + # associated with the bash shells running the + # `while true; do iperf3 -c ...; done` loops. In a second iteration, + # terminate the individual `iperf3 -c ...` instances. + terminate_ns_processes_by_pattern ${source_ns} while + terminate_ns_processes_by_pattern ${source_ns} iperf3 + + # Repeat the same process for sink_ns + terminate_ns_processes_by_pattern ${sink_ns} while + terminate_ns_processes_by_pattern ${sink_ns} iperf3 + + # Check if any iperf3 instances are still running. This could happen + # if a core has entered an infinite loop and the timeout for detecting + # the soft lockup has not expired, but either the test interval has + # already elapsed or the test was terminated manually (e.g., with ^C) + for pid in $(ip netns pids ${source_ns}); do + if [ -e /proc/$pid/cmdline ] && grep -qe 'iperf3' /proc/$pid/cmdline; then + echo "FAIL: unable to terminate some iperf3 instances. Soft lockup is underway. A kernel panic is on the way!" + exit ${ksft_fail} + fi + done + + if [ "$termination_signal" == "SIGINT" ]; then + echo "SKIP: Termination due to ^C (SIGINT)" + elif [ "$termination_signal" == "SIGALRM" ]; then + echo "PASS: No kernel soft lockup occurred during this ${TEST_DURATION} second test" + fi + + cleanup_ns ${source_ns} ${sink_ns} + + sysctl -qw kernel.softlockup_panic=${kernel_softlokup_panic_prev_val} +} + +setup_prepare() { + setup_ns source_ns sink_ns + + ip -n ${source_ns} link add name ${SOURCE_TEST_IFACE} type veth peer name ${SINK_TEST_IFACE} netns ${sink_ns} + + # Setting up the Source namespace + ip -n ${source_ns} addr add ${SOURCE_TEST_IP_ADDR} dev ${SOURCE_TEST_IFACE} + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} qlen 10000 + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} up + ip netns exec ${source_ns} sysctl -qw net.ipv6.fib_multipath_hash_policy=1 + + # Setting up the Sink namespace + ip -n ${sink_ns} addr add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} dev ${SINK_LOOPBACK_IFACE} + ip -n ${sink_ns} link set dev ${SINK_LOOPBACK_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_LOOPBACK_IFACE}.forwarding=1 + + ip -n ${sink_ns} link set ${SINK_TEST_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_TEST_IFACE}.forwarding=1 + + + # Populate nexthop IPv6 addresses on the test interface in the sink_ns + echo "info: populating ${IPV6_NEXTHOP_ADDR_COUNT} IPv6 addresses on the ${SINK_TEST_IFACE} interface ..." + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + ip -n ${sink_ns} addr add ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" "${IP}")/${IPV6_NEXTHOP_ADDR_MASK} dev ${SINK_TEST_IFACE}; + done + + # Preparing list of nexthops + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + nexthop_ip_list=$nexthop_ip_list" nexthop via ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" $IP) dev ${SOURCE_TEST_IFACE} weight 1" + done +} + + +test_soft_lockup_during_routing_table_refresh() { + # Start num_of_iperf_servers iperf3 servers in the sink_ns namespace, + # each listening on ports starting at 5001 and incrementing + # sequentially. Since iperf3 instances may terminate unexpectedly, a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 servers in the sink_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 --bind ${SINK_LOOPBACK_IP_ADDR} -s -p $(printf '5%03d' ${i}) --rcv-timeout 200 &>/dev/null" + ip netns exec ${sink_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + # Wait for the iperf3 servers to be ready + for i in $(seq ${num_of_iperf_servers}); do + port=$(printf '5%03d' ${i}); + wait_local_port_listen ${sink_ns} ${port} tcp + done + + # Continuously refresh the routing table in the background within + # the source_ns namespace + ip netns exec ${source_ns} bash -c " + while \$(ip netns list | grep -q ${source_ns}); do + ip -6 route add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} ${nexthop_ip_list}; + sleep ${ROUTING_TABLE_REFRESH_PERIOD}; + ip -6 route delete ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK}; + done &" + + # Start num_of_iperf_servers iperf3 clients in the source_ns namespace, + # each sending TCP traffic on sequential ports starting at 5001. + # Since iperf3 instances may terminate unexpectedly (e.g., if the route + # to the server is deleted in the background during a route refresh), a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 clients in the source_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 -c ${SINK_LOOPBACK_IP_ADDR} -p $(printf '5%03d' ${i}) --length 64 --bitrate ${IPERF3_BITRATE} -t 0 --connect-timeout 150 &>/dev/null" + ip netns exec ${source_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + echo "info: IPv6 routing table is being updated at the rate of $(echo "1/${ROUTING_TABLE_REFRESH_PERIOD}" | bc)/s for ${TEST_DURATION} seconds ..." + echo "info: A kernel soft lockup, if detected, results in a kernel panic!" + + wait +} + +# Make sure 'iperf3' is installed, skip the test otherwise +if [ ! -x "$(command -v "iperf3")" ]; then + echo "SKIP: 'iperf3' is not installed. Skipping the test." + exit ${ksft_skip} +fi + +# Determine the number of cores on the machine +num_of_iperf_servers=$(( $(nproc)/2 )) + +# Check if we are running on a multi-core machine, skip the test otherwise +if [ "${num_of_iperf_servers}" -eq 0 ]; then + echo "SKIP: This test is not valid on a single core machine!" + exit ${ksft_skip} +fi + +# Since the kernel soft lockup we're testing causes at least one core to enter +# an infinite loop, destabilizing the host and likely affecting subsequent +# tests, we trigger a kernel panic instead of reporting a failure and +# continuing +kernel_softlokup_panic_prev_val=$(sysctl -n kernel.softlockup_panic) +sysctl -qw kernel.softlockup_panic=1 + +handle_sigint() { + termination_signal="SIGINT" + cleanup + exit ${ksft_skip} +} + +handle_sigalrm() { + termination_signal="SIGALRM" + cleanup + exit ${ksft_pass} +} + +trap handle_sigint SIGINT +trap handle_sigalrm SIGALRM + +(sleep ${TEST_DURATION} && kill -s SIGALRM $$)& + +setup_prepare +test_soft_lockup_during_routing_table_refresh -- Gitee From 96aaec2609d55a987244142fcb1a5ea9732bfd06 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Fri, 15 Nov 2024 17:38:04 +0800 Subject: [PATCH 058/116] sunrpc: clear XPRT_SOCK_UPD_TIMEOUT when reset transport mainline inclusion from mainline-6.13 commit 4db9ad82a6c823094da27de4825af693a3475d51 category: bugfix issue: #IBKGHQ CVE: CVE-2024-56688 Signed-off-by: zhangshuqi --------------------------------------- Since transport->sock has been set to NULL during reset transport, XPRT_SOCK_UPD_TIMEOUT also needs to be cleared. Otherwise, the xs_tcp_set_socket_timeouts() may be triggered in xs_tcp_send_request() to dereference the transport->sock that has been set to NULL. Fixes: 7196dbb02ea0 ("SUNRPC: Allow changing of the TCP timeout parameters on the fly") Signed-off-by: Li Lingfeng Signed-off-by: Liu Jian Signed-off-by: Trond Myklebust Signed-off-by: zhangshuqi Conflicts: net/sunrpc/xprtsock.c --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e2ede0163e21..855d4a09581a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1134,6 +1134,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state); + clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); } static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) -- Gitee From 82045ea96779875f6dcc104928eff134a24d6232 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 25 Jun 2024 20:16:39 -0400 Subject: [PATCH 059/116] mm: prevent derefencing NULL ptr in pfn_section_valid() mainline inclusion from mainline-5.10.222 commit 0100aeb8a12d51950418e685f879cc80cb8e5982 category: bugfix issue: #IBKGHQ CVE: CVE-2024-41055 Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 82f0b6f041fad768c28b4ad05a683065412c226e ] Commit 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage") changed pfn_section_valid() to add a READ_ONCE() call around "ms->usage" to fix a race with section_deactivate() where ms->usage can be cleared. The READ_ONCE() call, by itself, is not enough to prevent NULL pointer dereference. We need to check its value before dereferencing it. Link: https://lkml.kernel.org/r/20240626001639.1350646-1-longman@redhat.com Fixes: 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage") Signed-off-by: Waiman Long Cc: Charan Teja Kalla Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi Conflicts: include/linux/mmzone.h --- include/linux/mmzone.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5807bc877b78..2d8d3a616092 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1401,8 +1401,9 @@ static inline int subsection_map_index(unsigned long pfn) static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); + struct mem_section_usage *usage = READ_ONCE(ms->usage); - return test_bit(idx, ms->usage->subsection_map); + return usage ? test_bit(idx, usage->subsection_map) : 0; } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) -- Gitee From 45f3656661cf7a24ac474de638fa0f7ebc4175c8 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 13 Sep 2024 15:06:01 -0300 Subject: [PATCH 060/116] media: uvcvideo: Require entities to have a non-zero unique ID mainline inclusion from mainline-5.10.231 commit bde4e7c1527151b596089b3f984818ab537eeb7f category: bugfix issue: #IBKGHQ CVE: CVE-2024-56571 Signed-off-by: zhangshuqi --------------------------------------- commit 3dd075fe8ebbc6fcbf998f81a75b8c4b159a6195 upstream. Per UVC 1.1+ specification 3.7.2, units and terminals must have a non-zero unique ID. ``` Each Unit and Terminal within the video function is assigned a unique identification number, the Unit ID (UID) or Terminal ID (TID), contained in the bUnitID or bTerminalID field of the descriptor. The value 0x00 is reserved for undefined ID, ``` So, deny allocating an entity with ID 0 or an ID that belongs to a unit that is already added to the list of entities. This also prevents some syzkaller reproducers from triggering warnings due to a chain of entities referring to themselves. In one particular case, an Output Unit is connected to an Input Unit, both with the same ID of 1. But when looking up for the source ID of the Output Unit, that same entity is found instead of the input entity, which leads to such warnings. In another case, a backward chain was considered finished as the source ID was 0. Later on, that entity was found, but its pads were not valid. Here is a sample stack trace for one of those cases. [ 20.650953] usb 1-1: new high-speed USB device number 2 using dummy_hcd [ 20.830206] usb 1-1: Using ep0 maxpacket: 8 [ 20.833501] usb 1-1: config 0 descriptor?? [ 21.038518] usb 1-1: string descriptor 0 read error: -71 [ 21.038893] usb 1-1: Found UVC 0.00 device (2833:0201) [ 21.039299] uvcvideo 1-1:0.0: Entity type for entity Output 1 was not initialized! [ 21.041583] uvcvideo 1-1:0.0: Entity type for entity Input 1 was not initialized! [ 21.042218] ------------[ cut here ]------------ [ 21.042536] WARNING: CPU: 0 PID: 9 at drivers/media/mc/mc-entity.c:1147 media_create_pad_link+0x2c4/0x2e0 [ 21.043195] Modules linked in: [ 21.043535] CPU: 0 UID: 0 PID: 9 Comm: kworker/0:1 Not tainted 6.11.0-rc7-00030-g3480e43aeccf #444 [ 21.044101] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 [ 21.044639] Workqueue: usb_hub_wq hub_event [ 21.045100] RIP: 0010:media_create_pad_link+0x2c4/0x2e0 [ 21.045508] Code: fe e8 20 01 00 00 b8 f4 ff ff ff 48 83 c4 30 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 0f 0b eb e9 0f 0b eb 0a 0f 0b eb 06 <0f> 0b eb 02 0f 0b b8 ea ff ff ff eb d4 66 2e 0f 1f 84 00 00 00 00 [ 21.046801] RSP: 0018:ffffc9000004b318 EFLAGS: 00010246 [ 21.047227] RAX: ffff888004e5d458 RBX: 0000000000000000 RCX: ffffffff818fccf1 [ 21.047719] RDX: 000000000000007b RSI: 0000000000000000 RDI: ffff888004313290 [ 21.048241] RBP: ffff888004313290 R08: 0001ffffffffffff R09: 0000000000000000 [ 21.048701] R10: 0000000000000013 R11: 0001888004313290 R12: 0000000000000003 [ 21.049138] R13: ffff888004313080 R14: ffff888004313080 R15: 0000000000000000 [ 21.049648] FS: 0000000000000000(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000 [ 21.050271] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 21.050688] CR2: 0000592cc27635b0 CR3: 000000000431c000 CR4: 0000000000750ef0 [ 21.051136] PKRU: 55555554 [ 21.051331] Call Trace: [ 21.051480] [ 21.051611] ? __warn+0xc4/0x210 [ 21.051861] ? media_create_pad_link+0x2c4/0x2e0 [ 21.052252] ? report_bug+0x11b/0x1a0 [ 21.052540] ? trace_hardirqs_on+0x31/0x40 [ 21.052901] ? handle_bug+0x3d/0x70 [ 21.053197] ? exc_invalid_op+0x1a/0x50 [ 21.053511] ? asm_exc_invalid_op+0x1a/0x20 [ 21.053924] ? media_create_pad_link+0x91/0x2e0 [ 21.054364] ? media_create_pad_link+0x2c4/0x2e0 [ 21.054834] ? media_create_pad_link+0x91/0x2e0 [ 21.055131] ? _raw_spin_unlock+0x1e/0x40 [ 21.055441] ? __v4l2_device_register_subdev+0x202/0x210 [ 21.055837] uvc_mc_register_entities+0x358/0x400 [ 21.056144] uvc_register_chains+0x1fd/0x290 [ 21.056413] uvc_probe+0x380e/0x3dc0 [ 21.056676] ? __lock_acquire+0x5aa/0x26e0 [ 21.056946] ? find_held_lock+0x33/0xa0 [ 21.057196] ? kernfs_activate+0x70/0x80 [ 21.057533] ? usb_match_dynamic_id+0x1b/0x70 [ 21.057811] ? find_held_lock+0x33/0xa0 [ 21.058047] ? usb_match_dynamic_id+0x55/0x70 [ 21.058330] ? lock_release+0x124/0x260 [ 21.058657] ? usb_match_one_id_intf+0xa2/0x100 [ 21.058997] usb_probe_interface+0x1ba/0x330 [ 21.059399] really_probe+0x1ba/0x4c0 [ 21.059662] __driver_probe_device+0xb2/0x180 [ 21.059944] driver_probe_device+0x5a/0x100 [ 21.060170] __device_attach_driver+0xe9/0x160 [ 21.060427] ? __pfx___device_attach_driver+0x10/0x10 [ 21.060872] bus_for_each_drv+0xa9/0x100 [ 21.061312] __device_attach+0xed/0x190 [ 21.061812] device_initial_probe+0xe/0x20 [ 21.062229] bus_probe_device+0x4d/0xd0 [ 21.062590] device_add+0x308/0x590 [ 21.062912] usb_set_configuration+0x7b6/0xaf0 [ 21.063403] usb_generic_driver_probe+0x36/0x80 [ 21.063714] usb_probe_device+0x7b/0x130 [ 21.063936] really_probe+0x1ba/0x4c0 [ 21.064111] __driver_probe_device+0xb2/0x180 [ 21.064577] driver_probe_device+0x5a/0x100 [ 21.065019] __device_attach_driver+0xe9/0x160 [ 21.065403] ? __pfx___device_attach_driver+0x10/0x10 [ 21.065820] bus_for_each_drv+0xa9/0x100 [ 21.066094] __device_attach+0xed/0x190 [ 21.066535] device_initial_probe+0xe/0x20 [ 21.066992] bus_probe_device+0x4d/0xd0 [ 21.067250] device_add+0x308/0x590 [ 21.067501] usb_new_device+0x347/0x610 [ 21.067817] hub_event+0x156b/0x1e30 [ 21.068060] ? process_scheduled_works+0x48b/0xaf0 [ 21.068337] process_scheduled_works+0x5a3/0xaf0 [ 21.068668] worker_thread+0x3cf/0x560 [ 21.068932] ? kthread+0x109/0x1b0 [ 21.069133] kthread+0x197/0x1b0 [ 21.069343] ? __pfx_worker_thread+0x10/0x10 [ 21.069598] ? __pfx_kthread+0x10/0x10 [ 21.069908] ret_from_fork+0x32/0x40 [ 21.070169] ? __pfx_kthread+0x10/0x10 [ 21.070424] ret_from_fork_asm+0x1a/0x30 [ 21.070737] Cc: stable@vger.kernel.org Reported-by: syzbot+0584f746fde3d52b4675@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0584f746fde3d52b4675 Reported-by: syzbot+dd320d114deb3f5bb79b@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=dd320d114deb3f5bb79b Fixes: a3fbc2e6bb05 ("media: mc-entity.c: use WARN_ON, validate link pads") Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Ricardo Ribalda Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20240913180601.1400596-2-cascardo@igalia.com Signed-off-by: Laurent Pinchart Signed-off-by: Hans Verkuil [ ribalda: The context around the changes differs from master. This version is also missing the gpio unit, so that part is gone from the patch. ] Signed-off-by: Ricardo Ribalda Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- drivers/media/usb/uvc/uvc_driver.c | 63 ++++++++++++++++++------------ 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 12fa3ff387a8..c03e7f3e7b5c 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1019,14 +1019,27 @@ static int uvc_parse_streaming(struct uvc_device *dev, return ret; } -static struct uvc_entity *uvc_alloc_entity(u16 type, u8 id, - unsigned int num_pads, unsigned int extra_size) +static struct uvc_entity *uvc_alloc_new_entity(struct uvc_device *dev, u16 type, + u16 id, unsigned int num_pads, + unsigned int extra_size) { struct uvc_entity *entity; unsigned int num_inputs; unsigned int size; unsigned int i; + /* Per UVC 1.1+ spec 3.7.2, the ID should be non-zero. */ + if (id == 0) { + dev_err(&dev->udev->dev, "Found Unit with invalid ID 0.\n"); + return ERR_PTR(-EINVAL); + } + + /* Per UVC 1.1+ spec 3.7.2, the ID is unique. */ + if (uvc_entity_by_id(dev, id)) { + dev_err(&dev->udev->dev, "Found multiple Units with ID %u\n", id); + return ERR_PTR(-EINVAL); + } + extra_size = roundup(extra_size, sizeof(*entity->pads)); if (num_pads) num_inputs = type & UVC_TERM_OUTPUT ? num_pads : num_pads - 1; @@ -1036,7 +1049,7 @@ static struct uvc_entity *uvc_alloc_entity(u16 type, u8 id, + num_inputs; entity = kzalloc(size, GFP_KERNEL); if (entity == NULL) - return NULL; + return ERR_PTR(-ENOMEM); entity->id = id; entity->type = type; @@ -1107,10 +1120,10 @@ static int uvc_parse_vendor_control(struct uvc_device *dev, break; } - unit = uvc_alloc_entity(UVC_VC_EXTENSION_UNIT, buffer[3], - p + 1, 2*n); - if (unit == NULL) - return -ENOMEM; + unit = uvc_alloc_new_entity(dev, UVC_VC_EXTENSION_UNIT, + buffer[3], p + 1, 2 * n); + if (IS_ERR(unit)) + return PTR_ERR(unit); memcpy(unit->extension.guidExtensionCode, &buffer[4], 16); unit->extension.bNumControls = buffer[20]; @@ -1221,10 +1234,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - term = uvc_alloc_entity(type | UVC_TERM_INPUT, buffer[3], - 1, n + p); - if (term == NULL) - return -ENOMEM; + term = uvc_alloc_new_entity(dev, type | UVC_TERM_INPUT, + buffer[3], 1, n + p); + if (IS_ERR(term)) + return PTR_ERR(term); if (UVC_ENTITY_TYPE(term) == UVC_ITT_CAMERA) { term->camera.bControlSize = n; @@ -1280,10 +1293,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return 0; } - term = uvc_alloc_entity(type | UVC_TERM_OUTPUT, buffer[3], - 1, 0); - if (term == NULL) - return -ENOMEM; + term = uvc_alloc_new_entity(dev, type | UVC_TERM_OUTPUT, + buffer[3], 1, 0); + if (IS_ERR(term)) + return PTR_ERR(term); memcpy(term->baSourceID, &buffer[7], 1); @@ -1304,9 +1317,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - unit = uvc_alloc_entity(buffer[2], buffer[3], p + 1, 0); - if (unit == NULL) - return -ENOMEM; + unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], + p + 1, 0); + if (IS_ERR(unit)) + return PTR_ERR(unit); memcpy(unit->baSourceID, &buffer[5], p); @@ -1328,9 +1342,9 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - unit = uvc_alloc_entity(buffer[2], buffer[3], 2, n); - if (unit == NULL) - return -ENOMEM; + unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], 2, n); + if (IS_ERR(unit)) + return PTR_ERR(unit); memcpy(unit->baSourceID, &buffer[4], 1); unit->processing.wMaxMultiplier = @@ -1359,9 +1373,10 @@ static int uvc_parse_standard_control(struct uvc_device *dev, return -EINVAL; } - unit = uvc_alloc_entity(buffer[2], buffer[3], p + 1, n); - if (unit == NULL) - return -ENOMEM; + unit = uvc_alloc_new_entity(dev, buffer[2], buffer[3], + p + 1, n); + if (IS_ERR(unit)) + return PTR_ERR(unit); memcpy(unit->extension.guidExtensionCode, &buffer[4], 16); unit->extension.bNumControls = buffer[20]; -- Gitee From 0a9f4c26522452a79ba95e33e001fce359b114b3 Mon Sep 17 00:00:00 2001 From: Dong Chenchen Date: Wed, 27 Nov 2024 12:08:50 +0800 Subject: [PATCH 061/116] net: Fix icmp host relookup triggering ip_rt_bug mainline inclusion from mainline-6.13 commit c44daa7e3c73229f7ac74985acb8c7fb909c4e0a category: bugfix issue: #IBKGHQ CVE: CVE-2024-56647 Signed-off-by: zhangshuqi --------------------------------------- arp link failure may trigger ip_rt_bug while xfrm enabled, call trace is: WARNING: CPU: 0 PID: 0 at net/ipv4/route.c:1241 ip_rt_bug+0x14/0x20 Modules linked in: CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.0-rc6-00077-g2e1b3cc9d7f7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:ip_rt_bug+0x14/0x20 Call Trace: ip_send_skb+0x14/0x40 __icmp_send+0x42d/0x6a0 ipv4_link_failure+0xe2/0x1d0 arp_error_report+0x3c/0x50 neigh_invalidate+0x8d/0x100 neigh_timer_handler+0x2e1/0x330 call_timer_fn+0x21/0x120 __run_timer_base.part.0+0x1c9/0x270 run_timer_softirq+0x4c/0x80 handle_softirqs+0xac/0x280 irq_exit_rcu+0x62/0x80 sysvec_apic_timer_interrupt+0x77/0x90 The script below reproduces this scenario: ip xfrm policy add src 0.0.0.0/0 dst 0.0.0.0/0 \ dir out priority 0 ptype main flag localok icmp ip l a veth1 type veth ip a a 192.168.141.111/24 dev veth0 ip l s veth0 up ping 192.168.141.155 -c 1 icmp_route_lookup() create input routes for locally generated packets while xfrm relookup ICMP traffic.Then it will set input route (dst->out = ip_rt_bug) to skb for DESTUNREACH. For ICMP err triggered by locally generated packets, dst->dev of output route is loopback. Generally, xfrm relookup verification is not required on loopback interfaces (net.ipv4.conf.lo.disable_xfrm = 1). Skip icmp relookup for locally generated packets to fix it. Fixes: 8b7817f3a959 ("[IPSEC]: Add ICMP host relookup support") Signed-off-by: Dong Chenchen Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241127040850.1513135-1-dongchenchen2@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: zhangshuqi Conflicts: net/ipv4/icmp.c --- net/ipv4/icmp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 0bbc047e8f6e..155ed02c8b6b 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -518,6 +518,9 @@ static struct rtable *icmp_route_lookup(struct net *net, if (!IS_ERR(rt)) { if (rt != rt2) return rt; + if (inet_addr_type_dev_table(net, route_lookup_dev, + fl4->daddr) == RTN_LOCAL) + return rt; } else if (PTR_ERR(rt) == -EPERM) { rt = NULL; } else -- Gitee From c623f69c5b8c23bab9e913d5cf831b2f112ae15c Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Mon, 14 Oct 2024 16:38:00 +0100 Subject: [PATCH 062/116] af_packet: avoid erroring out after sock_init_data() in packet_create() mainline inclusion from mainline-5.10.231 commit 1dc1e1db927056cb323296e2294a855cd003dfe7 category: bugfix issue: #IBKGHQ CVE: CVE-2024-56606 Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 46f2a11cb82b657fd15bab1c47821b635e03838b ] After sock_init_data() the allocated sk object is attached to the provided sock object. On error, packet_create() frees the sk object leaving the dangling pointer in the sock object on return. Some other code may try to use this pointer and cause use-after-free. Suggested-by: Eric Dumazet Signed-off-by: Ignat Korchagin Reviewed-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241014153808.51894-2-ignat@cloudflare.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi --- net/packet/af_packet.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e31c2b8e0e58..57e99d98bdce 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3321,18 +3321,18 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, if (sock->type == SOCK_PACKET) sock->ops = &packet_ops_spkt; + po = pkt_sk(sk); + err = packet_alloc_pending(po); + if (err) + goto out_sk_free; + sock_init_data(sock, sk); - po = pkt_sk(sk); init_completion(&po->skb_completion); sk->sk_family = PF_PACKET; po->num = proto; po->xmit = dev_queue_xmit; - err = packet_alloc_pending(po); - if (err) - goto out2; - packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; @@ -3367,7 +3367,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, preempt_enable(); return 0; -out2: +out_sk_free: sk_free(sk); out: return err; -- Gitee From a3ef97a898fdd03c7899f07c20c437ed3a3d857e Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Fri, 11 Oct 2024 12:50:02 +0800 Subject: [PATCH 063/116] ubi: fastmap: Fix duplicate slab cache names while attaching mainline inclusion from mainline-6.13 commit bcddf52b7a17adcebc768d26f4e27cf79adb424c category: bugfix issue: #IBKGHQ CVE: CVE-2024-53172 Signed-off-by: zhangshuqi --------------------------------------- Since commit 4c39529663b9 ("slab: Warn on duplicate cache names when DEBUG_VM=y"), the duplicate slab cache names can be detected and a kernel WARNING is thrown out. In UBI fast attaching process, alloc_ai() could be invoked twice with the same slab cache name 'ubi_aeb_slab_cache', which will trigger following warning messages: kmem_cache of name 'ubi_aeb_slab_cache' already exists WARNING: CPU: 0 PID: 7519 at mm/slab_common.c:107 __kmem_cache_create_args+0x100/0x5f0 Modules linked in: ubi(+) nandsim [last unloaded: nandsim] CPU: 0 UID: 0 PID: 7519 Comm: modprobe Tainted: G 6.12.0-rc2 RIP: 0010:__kmem_cache_create_args+0x100/0x5f0 Call Trace: __kmem_cache_create_args+0x100/0x5f0 alloc_ai+0x295/0x3f0 [ubi] ubi_attach+0x3c3/0xcc0 [ubi] ubi_attach_mtd_dev+0x17cf/0x3fa0 [ubi] ubi_init+0x3fb/0x800 [ubi] do_init_module+0x265/0x7d0 __x64_sys_finit_module+0x7a/0xc0 The problem could be easily reproduced by loading UBI device by fastmap with CONFIG_DEBUG_VM=y. Fix it by using different slab names for alloc_ai() callers. Fixes: d2158f69a7d4 ("UBI: Remove alloc_ai() slab name from parameter list") Fixes: fdf10ed710c0 ("ubi: Rework Fastmap attach base code") Signed-off-by: Zhihao Cheng Signed-off-by: Richard Weinberger Signed-off-by: zhangshuqi --- drivers/mtd/ubi/attach.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c index ae5abe492b52..adc47b87b38a 100644 --- a/drivers/mtd/ubi/attach.c +++ b/drivers/mtd/ubi/attach.c @@ -1447,7 +1447,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai, return err; } -static struct ubi_attach_info *alloc_ai(void) +static struct ubi_attach_info *alloc_ai(const char *slab_name) { struct ubi_attach_info *ai; @@ -1461,7 +1461,7 @@ static struct ubi_attach_info *alloc_ai(void) INIT_LIST_HEAD(&ai->alien); INIT_LIST_HEAD(&ai->fastmap); ai->volumes = RB_ROOT; - ai->aeb_slab_cache = kmem_cache_create("ubi_aeb_slab_cache", + ai->aeb_slab_cache = kmem_cache_create(slab_name, sizeof(struct ubi_ainf_peb), 0, 0, NULL); if (!ai->aeb_slab_cache) { @@ -1491,7 +1491,7 @@ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai) err = -ENOMEM; - scan_ai = alloc_ai(); + scan_ai = alloc_ai("ubi_aeb_slab_cache_fastmap"); if (!scan_ai) goto out; @@ -1557,7 +1557,7 @@ int ubi_attach(struct ubi_device *ubi, int force_scan) int err; struct ubi_attach_info *ai; - ai = alloc_ai(); + ai = alloc_ai("ubi_aeb_slab_cache"); if (!ai) return -ENOMEM; @@ -1575,7 +1575,7 @@ int ubi_attach(struct ubi_device *ubi, int force_scan) if (err > 0 || mtd_is_eccerr(err)) { if (err != UBI_NO_FASTMAP) { destroy_ai(ai); - ai = alloc_ai(); + ai = alloc_ai("ubi_aeb_slab_cache"); if (!ai) return -ENOMEM; @@ -1614,7 +1614,7 @@ int ubi_attach(struct ubi_device *ubi, int force_scan) if (ubi->fm && ubi_dbg_chk_fastmap(ubi)) { struct ubi_attach_info *scan_ai; - scan_ai = alloc_ai(); + scan_ai = alloc_ai("ubi_aeb_slab_cache_dbg_chk_fastmap"); if (!scan_ai) { err = -ENOMEM; goto out_wl; -- Gitee From 90a895f2e90d1345adde07c08b38b0d85a499e29 Mon Sep 17 00:00:00 2001 From: Waqar Hameed Date: Wed, 9 Oct 2024 16:46:59 +0200 Subject: [PATCH 064/116] ubifs: authentication: Fix use-after-free in ubifs_tnc_end_commit mainline inclusion from mainline-6.13 commit 4617fb8fc15effe8eda4dd898d4e33eb537a7140 category: bugfix issue: #IBKGHQ CVE: CVE-2024-53171 Signed-off-by: zhangshuqi --------------------------------------- After an insertion in TNC, the tree might split and cause a node to change its `znode->parent`. A further deletion of other nodes in the tree (which also could free the nodes), the aforementioned node's `znode->cparent` could still point to a freed node. This `znode->cparent` may not be updated when getting nodes to commit in `ubifs_tnc_start_commit()`. This could then trigger a use-after-free when accessing the `znode->cparent` in `write_index()` in `ubifs_tnc_end_commit()`. This can be triggered by running rm -f /etc/test-file.bin dd if=/dev/urandom of=/etc/test-file.bin bs=1M count=60 conv=fsync in a loop, and with `CONFIG_UBIFS_FS_AUTHENTICATION`. KASAN then reports: BUG: KASAN: use-after-free in ubifs_tnc_end_commit+0xa5c/0x1950 Write of size 32 at addr ffffff800a3af86c by task ubifs_bgt0_20/153 Call trace: dump_backtrace+0x0/0x340 show_stack+0x18/0x24 dump_stack_lvl+0x9c/0xbc print_address_description.constprop.0+0x74/0x2b0 kasan_report+0x1d8/0x1f0 kasan_check_range+0xf8/0x1a0 memcpy+0x84/0xf4 ubifs_tnc_end_commit+0xa5c/0x1950 do_commit+0x4e0/0x1340 ubifs_bg_thread+0x234/0x2e0 kthread+0x36c/0x410 ret_from_fork+0x10/0x20 Allocated by task 401: kasan_save_stack+0x38/0x70 __kasan_kmalloc+0x8c/0xd0 __kmalloc+0x34c/0x5bc tnc_insert+0x140/0x16a4 ubifs_tnc_add+0x370/0x52c ubifs_jnl_write_data+0x5d8/0x870 do_writepage+0x36c/0x510 ubifs_writepage+0x190/0x4dc __writepage+0x58/0x154 write_cache_pages+0x394/0x830 do_writepages+0x1f0/0x5b0 filemap_fdatawrite_wbc+0x170/0x25c file_write_and_wait_range+0x140/0x190 ubifs_fsync+0xe8/0x290 vfs_fsync_range+0xc0/0x1e4 do_fsync+0x40/0x90 __arm64_sys_fsync+0x34/0x50 invoke_syscall.constprop.0+0xa8/0x260 do_el0_svc+0xc8/0x1f0 el0_svc+0x34/0x70 el0t_64_sync_handler+0x108/0x114 el0t_64_sync+0x1a4/0x1a8 Freed by task 403: kasan_save_stack+0x38/0x70 kasan_set_track+0x28/0x40 kasan_set_free_info+0x28/0x4c __kasan_slab_free+0xd4/0x13c kfree+0xc4/0x3a0 tnc_delete+0x3f4/0xe40 ubifs_tnc_remove_range+0x368/0x73c ubifs_tnc_remove_ino+0x29c/0x2e0 ubifs_jnl_delete_inode+0x150/0x260 ubifs_evict_inode+0x1d4/0x2e4 evict+0x1c8/0x450 iput+0x2a0/0x3c4 do_unlinkat+0x2cc/0x490 __arm64_sys_unlinkat+0x90/0x100 invoke_syscall.constprop.0+0xa8/0x260 do_el0_svc+0xc8/0x1f0 el0_svc+0x34/0x70 el0t_64_sync_handler+0x108/0x114 el0t_64_sync+0x1a4/0x1a8 The offending `memcpy()` in `ubifs_copy_hash()` has a use-after-free when a node becomes root in TNC but still has a `cparent` to an already freed node. More specifically, consider the following TNC: zroot / / zp1 / / zn Inserting a new node `zn_new` with a key smaller then `zn` will trigger a split in `tnc_insert()` if `zp1` is full: zroot / \ / \ zp1 zp2 / \ / \ zn_new zn `zn->parent` has now been moved to `zp2`, *but* `zn->cparent` still points to `zp1`. Now, consider a removal of all the nodes _except_ `zn`. Just when `tnc_delete()` is about to delete `zroot` and `zp2`: zroot \ \ zp2 \ \ zn `zroot` and `zp2` get freed and the tree collapses: zn `zn` now becomes the new `zroot`. `get_znodes_to_commit()` will now only find `zn`, the new `zroot`, and `write_index()` will check its `znode->cparent` that wrongly points to the already freed `zp1`. `ubifs_copy_hash()` thus gets wrongly called with `znode->cparent->zbranch[znode->iip].hash` that triggers the use-after-free! Fix this by explicitly setting `znode->cparent` to `NULL` in `get_znodes_to_commit()` for the root node. The search for the dirty nodes is bottom-up in the tree. Thus, when `find_next_dirty(znode)` returns NULL, the current `znode` _is_ the root node. Add an assert for this. Fixes: 16a26b20d2af ("ubifs: authentication: Add hashes to index nodes") Tested-by: Waqar Hameed Co-developed-by: Zhihao Cheng Signed-off-by: Zhihao Cheng Signed-off-by: Waqar Hameed Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger Signed-off-by: zhangshuqi --- fs/ubifs/tnc_commit.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 234be1c4dc87..dc4f794fd5b7 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -657,6 +657,8 @@ static int get_znodes_to_commit(struct ubifs_info *c) znode->alt = 0; cnext = find_next_dirty(znode); if (!cnext) { + ubifs_assert(c, !znode->parent); + znode->cparent = NULL; znode->cnext = c->cnext; break; } -- Gitee From cd2015a3a1ee9d5dd74f46d2dbf6c67b8ab6e9e8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Dec 2024 12:54:55 +0000 Subject: [PATCH 065/116] net: defer final 'struct net' free in netns dismantle mainline inclusion from mainline-6.13 commit 0f6ede9fbc747e2553612271bce108f7517e7a45 category: bugfix issue: #IBKGHQ CVE: CVE-2024-56658 Signed-off-by: zhangshuqi --------------------------------------- Ilya reported a slab-use-after-free in dst_destroy [1] Issue is in xfrm6_net_init() and xfrm4_net_init() : They copy xfrm[46]_dst_ops_template into net->xfrm.xfrm[46]_dst_ops. But net structure might be freed before all the dst callbacks are called. So when dst_destroy() calls later : if (dst->ops->destroy) dst->ops->destroy(dst); dst->ops points to the old net->xfrm.xfrm[46]_dst_ops, which has been freed. See a relevant issue fixed in : ac888d58869b ("net: do not delay dst_entries_add() in dst_release()") A fix is to queue the 'struct net' to be freed after one another cleanup_net() round (and existing rcu_barrier()) [1] BUG: KASAN: slab-use-after-free in dst_destroy (net/core/dst.c:112) Read of size 8 at addr ffff8882137ccab0 by task swapper/37/0 Dec 03 05:46:18 kernel: CPU: 37 UID: 0 PID: 0 Comm: swapper/37 Kdump: loaded Not tainted 6.12.0 #67 Hardware name: Red Hat KVM/RHEL, BIOS 1.16.1-1.el9 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:124) print_address_description.constprop.0 (mm/kasan/report.c:378) ? dst_destroy (net/core/dst.c:112) print_report (mm/kasan/report.c:489) ? dst_destroy (net/core/dst.c:112) ? kasan_addr_to_slab (mm/kasan/common.c:37) kasan_report (mm/kasan/report.c:603) ? dst_destroy (net/core/dst.c:112) ? rcu_do_batch (kernel/rcu/tree.c:2567) dst_destroy (net/core/dst.c:112) rcu_do_batch (kernel/rcu/tree.c:2567) ? __pfx_rcu_do_batch (kernel/rcu/tree.c:2491) ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4339 kernel/locking/lockdep.c:4406) rcu_core (kernel/rcu/tree.c:2825) handle_softirqs (kernel/softirq.c:554) __irq_exit_rcu (kernel/softirq.c:589 kernel/softirq.c:428 kernel/softirq.c:637) irq_exit_rcu (kernel/softirq.c:651) sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1049 arch/x86/kernel/apic/apic.c:1049) asm_sysvec_apic_timer_interrupt (./arch/x86/include/asm/idtentry.h:702) RIP: 0010:default_idle (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:92 arch/x86/kernel/process.c:743) Code: 00 4d 29 c8 4c 01 c7 4c 29 c2 e9 6e ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 66 90 0f 00 2d c7 c9 27 00 fb f4 c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 90 RSP: 0018:ffff888100d2fe00 EFLAGS: 00000246 RAX: 00000000001870ed RBX: 1ffff110201a5fc2 RCX: ffffffffb61a3e46 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffffb3d4d123 RBP: 0000000000000000 R08: 0000000000000001 R09: ffffed11c7e1835d R10: ffff888e3f0c1aeb R11: 0000000000000000 R12: 0000000000000000 R13: ffff888100d20000 R14: dffffc0000000000 R15: 0000000000000000 ? ct_kernel_exit.constprop.0 (kernel/context_tracking.c:148) ? cpuidle_idle_call (kernel/sched/idle.c:186) default_idle_call (./include/linux/cpuidle.h:143 kernel/sched/idle.c:118) cpuidle_idle_call (kernel/sched/idle.c:186) ? __pfx_cpuidle_idle_call (kernel/sched/idle.c:168) ? lock_release (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5848) ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4347 kernel/locking/lockdep.c:4406) ? tsc_verify_tsc_adjust (arch/x86/kernel/tsc_sync.c:59) do_idle (kernel/sched/idle.c:326) cpu_startup_entry (kernel/sched/idle.c:423 (discriminator 1)) start_secondary (arch/x86/kernel/smpboot.c:202 arch/x86/kernel/smpboot.c:282) ? __pfx_start_secondary (arch/x86/kernel/smpboot.c:232) ? soft_restart_cpu (arch/x86/kernel/head_64.S:452) common_startup_64 (arch/x86/kernel/head_64.S:414) Dec 03 05:46:18 kernel: Allocated by task 12184: kasan_save_stack (mm/kasan/common.c:48) kasan_save_track (./arch/x86/include/asm/current.h:49 mm/kasan/common.c:60 mm/kasan/common.c:69) __kasan_slab_alloc (mm/kasan/common.c:319 mm/kasan/common.c:345) kmem_cache_alloc_noprof (mm/slub.c:4085 mm/slub.c:4134 mm/slub.c:4141) copy_net_ns (net/core/net_namespace.c:421 net/core/net_namespace.c:480) create_new_namespaces (kernel/nsproxy.c:110) unshare_nsproxy_namespaces (kernel/nsproxy.c:228 (discriminator 4)) ksys_unshare (kernel/fork.c:3313) __x64_sys_unshare (kernel/fork.c:3382) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Dec 03 05:46:18 kernel: Freed by task 11: kasan_save_stack (mm/kasan/common.c:48) kasan_save_track (./arch/x86/include/asm/current.h:49 mm/kasan/common.c:60 mm/kasan/common.c:69) kasan_save_free_info (mm/kasan/generic.c:582) __kasan_slab_free (mm/kasan/common.c:271) kmem_cache_free (mm/slub.c:4579 mm/slub.c:4681) cleanup_net (net/core/net_namespace.c:456 net/core/net_namespace.c:446 net/core/net_namespace.c:647) process_one_work (kernel/workqueue.c:3229) worker_thread (kernel/workqueue.c:3304 kernel/workqueue.c:3391) kthread (kernel/kthread.c:389) ret_from_fork (arch/x86/kernel/process.c:147) ret_from_fork_asm (arch/x86/entry/entry_64.S:257) Dec 03 05:46:18 kernel: Last potentially related work creation: kasan_save_stack (mm/kasan/common.c:48) __kasan_record_aux_stack (mm/kasan/generic.c:541) insert_work (./include/linux/instrumented.h:68 ./include/asm-generic/bitops/instrumented-non-atomic.h:141 kernel/workqueue.c:788 kernel/workqueue.c:795 kernel/workqueue.c:2186) __queue_work (kernel/workqueue.c:2340) queue_work_on (kernel/workqueue.c:2391) xfrm_policy_insert (net/xfrm/xfrm_policy.c:1610) xfrm_add_policy (net/xfrm/xfrm_user.c:2116) xfrm_user_rcv_msg (net/xfrm/xfrm_user.c:3321) netlink_rcv_skb (net/netlink/af_netlink.c:2536) xfrm_netlink_rcv (net/xfrm/xfrm_user.c:3344) netlink_unicast (net/netlink/af_netlink.c:1316 net/netlink/af_netlink.c:1342) netlink_sendmsg (net/netlink/af_netlink.c:1886) sock_write_iter (net/socket.c:729 net/socket.c:744 net/socket.c:1165) vfs_write (fs/read_write.c:590 fs/read_write.c:683) ksys_write (fs/read_write.c:736) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Dec 03 05:46:18 kernel: Second to last potentially related work creation: kasan_save_stack (mm/kasan/common.c:48) __kasan_record_aux_stack (mm/kasan/generic.c:541) insert_work (./include/linux/instrumented.h:68 ./include/asm-generic/bitops/instrumented-non-atomic.h:141 kernel/workqueue.c:788 kernel/workqueue.c:795 kernel/workqueue.c:2186) __queue_work (kernel/workqueue.c:2340) queue_work_on (kernel/workqueue.c:2391) __xfrm_state_insert (./include/linux/workqueue.h:723 net/xfrm/xfrm_state.c:1150 net/xfrm/xfrm_state.c:1145 net/xfrm/xfrm_state.c:1513) xfrm_state_update (./include/linux/spinlock.h:396 net/xfrm/xfrm_state.c:1940) xfrm_add_sa (net/xfrm/xfrm_user.c:912) xfrm_user_rcv_msg (net/xfrm/xfrm_user.c:3321) netlink_rcv_skb (net/netlink/af_netlink.c:2536) xfrm_netlink_rcv (net/xfrm/xfrm_user.c:3344) netlink_unicast (net/netlink/af_netlink.c:1316 net/netlink/af_netlink.c:1342) netlink_sendmsg (net/netlink/af_netlink.c:1886) sock_write_iter (net/socket.c:729 net/socket.c:744 net/socket.c:1165) vfs_write (fs/read_write.c:590 fs/read_write.c:683) ksys_write (fs/read_write.c:736) do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) Fixes: a8a572a6b5f2 ("xfrm: dst_entries_init() per-net dst_ops") Reported-by: Ilya Maximets Closes: https://lore.kernel.org/netdev/CANn89iKKYDVpB=MtmfH7nyv2p=rJWSLedO5k7wSZgtY_tO8WQg@mail.gmail.com/T/#m02c98c3009fe66382b73cfb4db9cf1df6fab3fbf Signed-off-by: Eric Dumazet Acked-by: Paolo Abeni Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241204125455.3871859-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: zhangshuqi Conflicts: net/core/net_namespace.c --- include/net/net_namespace.h | 1 + net/core/net_namespace.c | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index ac0f068f46fc..3c0ba2a91d79 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -83,6 +83,7 @@ struct net { * or to unregister pernet ops * (pernet_ops_rwsem write locked). */ + struct llist_node defer_free_list; struct llist_node cleanup_list; /* namespaces on death row */ #ifdef CONFIG_KEYS diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 72cfe5248b76..a57cbffc42da 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -452,10 +452,26 @@ static struct net *net_alloc(void) goto out; } +static LLIST_HEAD(defer_free_list); + +static void net_complete_free(void) +{ + struct llist_node *kill_list; + struct net *net, *next; + + /* Get the list of namespaces to free from last round. */ + kill_list = llist_del_all(&defer_free_list); + + llist_for_each_entry_safe(net, next, kill_list, defer_free_list) + kmem_cache_free(net_cachep, net); + +} + static void net_free(struct net *net) { kfree(rcu_access_pointer(net->gen)); - kmem_cache_free(net_cachep, net); + /* Wait for an extra rcu_barrier() before final free. */ + llist_add(&net->defer_free_list, &defer_free_list); } void net_drop_ns(void *p) @@ -628,6 +644,8 @@ static void cleanup_net(struct work_struct *work) */ rcu_barrier(); + net_complete_free(); + /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); -- Gitee From 43a9c589d09a0d0037d601be852775797463a938 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 8 Nov 2024 12:13:31 -0500 Subject: [PATCH 066/116] NFSv4.0: Fix a use-after-free problem in the asynchronous open() mainline inclusion from mainline-6.13 commit 2fdb05dc0931250574f0cb0ebeb5ed8e20f4a889 category: bugfix issue: #IBKGHQ CVE: CVE-2024-53173 Signed-off-by: zhangshuqi --------------------------------------- Yang Erkun reports that when two threads are opening files at the same time, and are forced to abort before a reply is seen, then the call to nfs_release_seqid() in nfs4_opendata_free() can result in a use-after-free of the pointer to the defunct rpc task of the other thread. The fix is to ensure that if the RPC call is aborted before the call to nfs_wait_on_sequence() is complete, then we must call nfs_release_seqid() in nfs4_open_release() before the rpc_task is freed. Reported-by: Yang Erkun Fixes: 24ac23ab88df ("NFSv4: Convert open() into an asynchronous RPC call") Reviewed-by: Yang Erkun Signed-off-by: Trond Myklebust Signed-off-by: zhangshuqi --- fs/nfs/nfs4proc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7c3c96ed6085..9ed5fcd2dd14 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2518,12 +2518,14 @@ static void nfs4_open_release(void *calldata) struct nfs4_opendata *data = calldata; struct nfs4_state *state = NULL; + /* In case of error, no cleanup! */ + if (data->rpc_status != 0 || !data->rpc_done) { + nfs_release_seqid(data->o_arg.seqid); + goto out_free; + } /* If this request hasn't been cancelled, do nothing */ if (!data->cancelled) goto out_free; - /* In case of error, no cleanup! */ - if (data->rpc_status != 0 || !data->rpc_done) - goto out_free; /* In case we need an open_confirm, no cleanup! */ if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) goto out_free; -- Gitee From 8af9a7893b8a52a26ca8e2488f3af11f5de461cd Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Mon, 13 May 2024 09:43:45 +0800 Subject: [PATCH 067/116] net: stmmac: move the EST lock to struct stmmac_priv mainline inclusion from mainline-6.10 commit 36ac9e7f2e5786bd37c5cd91132e1f39c29b8197 category: bugfix issue: #IBKGHQ CVE: CVE-2024-38594 Signed-off-by: zhangshuqi --------------------------------------- Reinitialize the whole EST structure would also reset the mutex lock which is embedded in the EST structure, and then trigger the following warning. To address this, move the lock to struct stmmac_priv. We also need to reacquire the mutex lock when doing this initialization. DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 3 PID: 505 at kernel/locking/mutex.c:587 __mutex_lock+0xd84/0x1068 Modules linked in: CPU: 3 PID: 505 Comm: tc Not tainted 6.9.0-rc6-00053-g0106679839f7-dirty #29 Hardware name: NXP i.MX8MPlus EVK board (DT) pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __mutex_lock+0xd84/0x1068 lr : __mutex_lock+0xd84/0x1068 sp : ffffffc0864e3570 x29: ffffffc0864e3570 x28: ffffffc0817bdc78 x27: 0000000000000003 x26: ffffff80c54f1808 x25: ffffff80c9164080 x24: ffffffc080d723ac x23: 0000000000000000 x22: 0000000000000002 x21: 0000000000000000 x20: 0000000000000000 x19: ffffffc083bc3000 x18: ffffffffffffffff x17: ffffffc08117b080 x16: 0000000000000002 x15: ffffff80d2d40000 x14: 00000000000002da x13: ffffff80d2d404b8 x12: ffffffc082b5a5c8 x11: ffffffc082bca680 x10: ffffffc082bb2640 x9 : ffffffc082bb2698 x8 : 0000000000017fe8 x7 : c0000000ffffefff x6 : 0000000000000001 x5 : ffffff8178fe0d48 x4 : 0000000000000000 x3 : 0000000000000027 x2 : ffffff8178fe0d50 x1 : 0000000000000000 x0 : 0000000000000000 Call trace: __mutex_lock+0xd84/0x1068 mutex_lock_nested+0x28/0x34 tc_setup_taprio+0x118/0x68c stmmac_setup_tc+0x50/0xf0 taprio_change+0x868/0xc9c Fixes: b2aae654a479 ("net: stmmac: add mutex lock to protect est parameters") Signed-off-by: Xiaolei Wang Reviewed-by: Simon Horman Reviewed-by: Serge Semin Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/20240513014346.1718740-2-xiaolei.wang@windriver.com Signed-off-by: Jakub Kicinski Signed-off-by: zhangshuqi Conflicts: drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 ++ .../net/ethernet/stmicro/stmmac/stmmac_tc.c | 20 ++++++++++--------- include/linux/stmmac.h | 1 - 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 617c960cfb5a..b98e84a0839a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -192,6 +192,8 @@ struct stmmac_priv { struct stmmac_extra_stats xstats ____cacheline_aligned_in_smp; struct stmmac_safety_stats sstats; struct plat_stmmacenet_data *plat; + /* Protect est parameters */ + struct mutex est_lock; struct dma_features dma_cap; struct stmmac_counters mmc; int hw_cap_support; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 43165c662740..2429c57fa452 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -688,17 +688,19 @@ static int tc_setup_taprio(struct stmmac_priv *priv, if (!plat->est) return -ENOMEM; - mutex_init(&priv->plat->est->lock); + mutex_init(&priv->est_lock); } else { + mutex_lock(&priv->est_lock); memset(plat->est, 0, sizeof(*plat->est)); + mutex_unlock(&priv->est_lock); } size = qopt->num_entries; - mutex_lock(&priv->plat->est->lock); + mutex_lock(&priv->est_lock); priv->plat->est->gcl_size = size; priv->plat->est->enable = qopt->enable; - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); for (i = 0; i < size; i++) { s64 delta_ns = qopt->entries[i].interval; @@ -729,7 +731,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->gcl[i] = delta_ns | (gates << wid); } - mutex_lock(&priv->plat->est->lock); + mutex_lock(&priv->est_lock); /* Adjust for real system time */ priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); current_time_ns = timespec64_to_ktime(current_time); @@ -755,7 +757,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->ctr[1] = (u32)ctr; if (fpe && !priv->dma_cap.fpesel) { - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); return -EOPNOTSUPP; } @@ -763,14 +765,14 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->tx_queues_to_use, priv->plat->rx_queues_to_use, fpe); if (ret && fpe) { - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); netdev_err(priv->dev, "failed to enable Frame Preemption\n"); return ret; } ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); if (ret) { netdev_err(priv->dev, "failed to configure EST\n"); goto disable; @@ -781,11 +783,11 @@ static int tc_setup_taprio(struct stmmac_priv *priv, disable: if (priv->plat->est) { - mutex_lock(&priv->plat->est->lock); + mutex_lock(&priv->est_lock); priv->plat->est->enable = false; stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); - mutex_unlock(&priv->plat->est->lock); + mutex_unlock(&priv->est_lock); } return ret; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index abf7b8ec1fb6..184d7a814dc9 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -112,7 +112,6 @@ struct stmmac_axi { #define EST_GCL 1024 struct stmmac_est { - struct mutex lock; int enable; u32 btr_offset[2]; u32 btr[2]; -- Gitee From 1a230cbb1cc75c9c00d0bfdab3a217ff67d4cc4a Mon Sep 17 00:00:00 2001 From: Wander Lairson Costa Date: Wed, 24 Jul 2024 11:22:47 -0300 Subject: [PATCH 068/116] sched/deadline: Fix warning in migrate_enable for boosted tasks mainline inclusion from mainline-6.13 commit 0664e2c311b9fa43b33e3e81429cd0c2d7f9c638 category: bugfix issue: #IBKGHQ CVE: CVE-2024-56583 Signed-off-by: zhangshuqi --------------------------------------- When running the following command: while true; do stress-ng --cyclic 30 --timeout 30s --minimize --quiet done a warning is eventually triggered: WARNING: CPU: 43 PID: 2848 at kernel/sched/deadline.c:794 setup_new_dl_entity+0x13e/0x180 ... Call Trace: ? show_trace_log_lvl+0x1c4/0x2df ? enqueue_dl_entity+0x631/0x6e0 ? setup_new_dl_entity+0x13e/0x180 ? __warn+0x7e/0xd0 ? report_bug+0x11a/0x1a0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 enqueue_dl_entity+0x631/0x6e0 enqueue_task_dl+0x7d/0x120 __do_set_cpus_allowed+0xe3/0x280 __set_cpus_allowed_ptr_locked+0x140/0x1d0 __set_cpus_allowed_ptr+0x54/0xa0 migrate_enable+0x7e/0x150 rt_spin_unlock+0x1c/0x90 group_send_sig_info+0xf7/0x1a0 ? kill_pid_info+0x1f/0x1d0 kill_pid_info+0x78/0x1d0 kill_proc_info+0x5b/0x110 __x64_sys_kill+0x93/0xc0 do_syscall_64+0x5c/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 RIP: 0033:0x7f0dab31f92b This warning occurs because set_cpus_allowed dequeues and enqueues tasks with the ENQUEUE_RESTORE flag set. If the task is boosted, the warning is triggered. A boosted task already had its parameters set by rt_mutex_setprio, and a new call to setup_new_dl_entity is unnecessary, hence the WARN_ON call. Check if we are requeueing a boosted task and avoid calling setup_new_dl_entity if that's the case. Fixes: 295d6d5e3736 ("sched/deadline: Fix switching to -deadline") Signed-off-by: Wander Lairson Costa Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juri Lelli Link: https://lore.kernel.org/r/20240724142253.27145-2-wander@redhat.com Signed-off-by: zhangshuqi Conflicts: kernel/sched/deadline.c --- kernel/sched/deadline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index e811fdbefdd3..f8c5803cf142 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1519,6 +1519,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) } else if (flags & ENQUEUE_REPLENISH) { replenish_dl_entity(dl_se); } else if ((flags & ENQUEUE_RESTORE) && + !is_dl_boosted(dl_se) && dl_time_before(dl_se->deadline, rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) { setup_new_dl_entity(dl_se); -- Gitee From 39e84daad04a8490008cfb2ef837430ba242dc58 Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Mon, 16 Dec 2024 15:32:38 +0800 Subject: [PATCH 069/116] tracing: Prevent bad count for tracing_cpumask_write mainline inclusion from mainline-6.13 commit 98feccbf32cfdde8c722bc4587aaa60ee5ac33f0 category: bugfix issue: #IBKGHQ CVE: CVE-2024-56763 Signed-off-by: zhangshuqi --------------------------------------- If a large count is provided, it will trigger a warning in bitmap_parse_user. Also check zero for it. Cc: stable@vger.kernel.org Fixes: 9e01c1b74c953 ("cpumask: convert kernel trace functions") Link: https://lore.kernel.org/20241216073238.2573704-1-lizhi.xu@windriver.com Reported-by: syzbot+0aecfd34fb878546f3fd@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0aecfd34fb878546f3fd Tested-by: syzbot+0aecfd34fb878546f3fd@syzkaller.appspotmail.com Signed-off-by: Lizhi Xu Signed-off-by: Steven Rostedt (Google) Signed-off-by: zhangshuqi Conflicts: kernel/trace/trace.c --- kernel/trace/trace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 548f694fc857..3ff88eb2850d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4896,6 +4896,9 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf, cpumask_var_t tracing_cpumask_new; int err; + if (count == 0 || count > KMALLOC_MAX_SIZE) + return -EINVAL; + if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL)) return -ENOMEM; -- Gitee From b14bdc7cf749745b4b756145ff4d9fdd4808c8db Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Fri, 17 May 2024 08:58:00 -0700 Subject: [PATCH 070/116] media: dvb-frontends: dib3000mb: fix uninit-value in dib3000_write_reg mainline inclusion from mainline-6.13 commit 2dd59fe0e19e1ab955259978082b62e5751924c7 category: bugfix issue: #IBKGHQ CVE: CVE-2024-56769 Signed-off-by: zhangshuqi --------------------------------------- Syzbot reports [1] an uninitialized value issue found by KMSAN in dib3000_read_reg(). Local u8 rb[2] is used in i2c_transfer() as a read buffer; in case that call fails, the buffer may end up with some undefined values. Since no elaborate error handling is expected in dib3000_write_reg(), simply zero out rb buffer to mitigate the problem. [1] Syzkaller report dvb-usb: bulk message failed: -22 (6/0) ===================================================== BUG: KMSAN: uninit-value in dib3000mb_attach+0x2d8/0x3c0 drivers/media/dvb-frontends/dib3000mb.c:758 dib3000mb_attach+0x2d8/0x3c0 drivers/media/dvb-frontends/dib3000mb.c:758 dibusb_dib3000mb_frontend_attach+0x155/0x2f0 drivers/media/usb/dvb-usb/dibusb-mb.c:31 dvb_usb_adapter_frontend_init+0xed/0x9a0 drivers/media/usb/dvb-usb/dvb-usb-dvb.c:290 dvb_usb_adapter_init drivers/media/usb/dvb-usb/dvb-usb-init.c:90 [inline] dvb_usb_init drivers/media/usb/dvb-usb/dvb-usb-init.c:186 [inline] dvb_usb_device_init+0x25a8/0x3760 drivers/media/usb/dvb-usb/dvb-usb-init.c:310 dibusb_probe+0x46/0x250 drivers/media/usb/dvb-usb/dibusb-mb.c:110 ... Local variable rb created at: dib3000_read_reg+0x86/0x4e0 drivers/media/dvb-frontends/dib3000mb.c:54 dib3000mb_attach+0x123/0x3c0 drivers/media/dvb-frontends/dib3000mb.c:758 ... Fixes: 74340b0a8bc6 ("V4L/DVB (4457): Remove dib3000-common-module") Reported-by: syzbot+c88fc0ebe0d5935c70da@syzkaller.appspotmail.com Signed-off-by: Nikita Zhandarovich Link: https://lore.kernel.org/r/20240517155800.9881-1-n.zhandarovich@fintech.ru Signed-off-by: Mauro Carvalho Chehab Signed-off-by: zhangshuqi --- drivers/media/dvb-frontends/dib3000mb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/dvb-frontends/dib3000mb.c b/drivers/media/dvb-frontends/dib3000mb.c index c598b2a63325..7c452ddd9e40 100644 --- a/drivers/media/dvb-frontends/dib3000mb.c +++ b/drivers/media/dvb-frontends/dib3000mb.c @@ -51,7 +51,7 @@ MODULE_PARM_DESC(debug, "set debugging level (1=info,2=xfer,4=setfe,8=getfe (|-a static int dib3000_read_reg(struct dib3000_state *state, u16 reg) { u8 wb[] = { ((reg >> 8) | 0x80) & 0xff, reg & 0xff }; - u8 rb[2]; + u8 rb[2] = {}; struct i2c_msg msg[] = { { .addr = state->config.demod_address, .flags = 0, .buf = wb, .len = 2 }, { .addr = state->config.demod_address, .flags = I2C_M_RD, .buf = rb, .len = 2 }, -- Gitee From fd7662dd8318a2954d4718a9c66d324cb13daf53 Mon Sep 17 00:00:00 2001 From: Martin Ottens Date: Tue, 10 Dec 2024 14:14:11 +0100 Subject: [PATCH 071/116] net/sched: netem: account for backlog updates from child qdisc mainline inclusion from mainline-5.10.232 commit 216509dda290f6db92c816dd54b83c1df9da9e76 category: bugfix issue: #IBKGHQ CVE: CVE-2024-56770 Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit f8d4bc455047cf3903cd6f85f49978987dbb3027 ] In general, 'qlen' of any classful qdisc should keep track of the number of packets that the qdisc itself and all of its children holds. In case of netem, 'qlen' only accounts for the packets in its internal tfifo. When netem is used with a child qdisc, the child qdisc can use 'qdisc_tree_reduce_backlog' to inform its parent, netem, about created or dropped SKBs. This function updates 'qlen' and the backlog statistics of netem, but netem does not account for changes made by a child qdisc. 'qlen' then indicates the wrong number of packets in the tfifo. If a child qdisc creates new SKBs during enqueue and informs its parent about this, netem's 'qlen' value is increased. When netem dequeues the newly created SKBs from the child, the 'qlen' in netem is not updated. If 'qlen' reaches the configured sch->limit, the enqueue function stops working, even though the tfifo is not full. Reproduce the bug: Ensure that the sender machine has GSO enabled. Configure netem as root qdisc and tbf as its child on the outgoing interface of the machine as follows: $ tc qdisc add dev root handle 1: netem delay 100ms limit 100 $ tc qdisc add dev parent 1:0 tbf rate 50Mbit burst 1542 latency 50ms Send bulk TCP traffic out via this interface, e.g., by running an iPerf3 client on the machine. Check the qdisc statistics: $ tc -s qdisc show dev Statistics after 10s of iPerf3 TCP test before the fix (note that netem's backlog > limit, netem stopped accepting packets): qdisc netem 1: root refcnt 2 limit 1000 delay 100ms Sent 2767766 bytes 1848 pkt (dropped 652, overlimits 0 requeues 0) backlog 4294528236b 1155p requeues 0 qdisc tbf 10: parent 1:1 rate 50Mbit burst 1537b lat 50ms Sent 2767766 bytes 1848 pkt (dropped 327, overlimits 7601 requeues 0) backlog 0b 0p requeues 0 Statistics after the fix: qdisc netem 1: root refcnt 2 limit 1000 delay 100ms Sent 37766372 bytes 24974 pkt (dropped 9, overlimits 0 requeues 0) backlog 0b 0p requeues 0 qdisc tbf 10: parent 1:1 rate 50Mbit burst 1537b lat 50ms Sent 37766372 bytes 24974 pkt (dropped 327, overlimits 96017 requeues 0) backlog 0b 0p requeues 0 tbf segments the GSO SKBs (tbf_segment) and updates the netem's 'qlen'. The interface fully stops transferring packets and "locks". In this case, the child qdisc and tfifo are empty, but 'qlen' indicates the tfifo is at its limit and no more packets are accepted. This patch adds a counter for the entries in the tfifo. Netem's 'qlen' is only decreased when a packet is returned by its dequeue function, and not during enqueuing into the child qdisc. External updates to 'qlen' are thus accounted for and only the behavior of the backlog statistics changes. As in other qdiscs, 'qlen' then keeps track of how many packets are held in netem and all of its children. As before, sch->limit remains as the maximum number of packets in the tfifo. The same applies to netem's backlog statistics. Fixes: 50612537e9ab ("netem: fix classful handling") Signed-off-by: Martin Ottens Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20241210131412.1837202-1-martin.ottens@fau.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: zhangshuqi Conflicts: net/sched/sch_netem.c --- net/sched/sch_netem.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 08aaa6efc62c..c6416cf0fcb0 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -77,6 +77,8 @@ struct netem_sched_data { struct sk_buff *t_head; struct sk_buff *t_tail; + u32 t_len; + /* optional qdisc for classful handling (NULL at netem init) */ struct Qdisc *qdisc; @@ -373,6 +375,7 @@ static void tfifo_reset(struct Qdisc *sch) rtnl_kfree_skbs(q->t_head, q->t_tail); q->t_head = NULL; q->t_tail = NULL; + q->t_len = 0; } static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) @@ -402,6 +405,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) rb_link_node(&nskb->rbnode, parent, p); rb_insert_color(&nskb->rbnode, &q->t_root); } + q->t_len++; sch->q.qlen++; } @@ -517,7 +521,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1<<(prandom_u32() % 8); } - if (unlikely(sch->q.qlen >= sch->limit)) { + if (unlikely(q->t_len >= sch->limit)) { /* re-link segs, so that qdisc_drop_all() frees them all */ skb->next = segs; qdisc_drop_all(skb, sch, to_free); @@ -681,8 +685,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) tfifo_dequeue: skb = __qdisc_dequeue_head(&sch->q); if (skb) { - qdisc_qstats_backlog_dec(sch, skb); deliver: + qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); return skb; } @@ -698,8 +702,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (time_to_send <= now && q->slot.slot_next <= now) { netem_erase_head(q, skb); - sch->q.qlen--; - qdisc_qstats_backlog_dec(sch, skb); + q->t_len--; skb->next = NULL; skb->prev = NULL; /* skb->dev shares skb->rbnode area, @@ -722,21 +725,25 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) err = qdisc_enqueue(skb, q->qdisc, &to_free); kfree_skb_list(to_free); - if (err != NET_XMIT_SUCCESS && - net_xmit_drop_count(err)) { - qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, - pkt_len); + if (err != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(err)) + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, pkt_len); + sch->qstats.backlog -= pkt_len; + sch->q.qlen--; } goto tfifo_dequeue; } + sch->q.qlen--; goto deliver; } if (q->qdisc) { skb = q->qdisc->ops->dequeue(q->qdisc); - if (skb) + if (skb) { + sch->q.qlen--; goto deliver; + } } qdisc_watchdog_schedule_ns(&q->watchdog, @@ -746,8 +753,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (q->qdisc) { skb = q->qdisc->ops->dequeue(q->qdisc); - if (skb) + if (skb) { + sch->q.qlen--; goto deliver; + } } return NULL; } -- Gitee From 4411eecd2b86c9fcd02d942f1919e6cf2e8636b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 29 Nov 2024 06:26:28 +0200 Subject: [PATCH 072/116] drm/modes: Avoid divide by zero harder in drm_mode_vrefresh() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-6.13 commit 9398332f23fab10c5ec57c168b44e72997d6318e category: bugfix issue: #IBKGHQ CVE: CVE-2024-56369 Signed-off-by: zhangshuqi --------------------------------------- drm_mode_vrefresh() is trying to avoid divide by zero by checking whether htotal or vtotal are zero. But we may still end up with a div-by-zero of vtotal*htotal*... Cc: stable@vger.kernel.org Reported-by: syzbot+622bba18029bcde672e1@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=622bba18029bcde672e1 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20241129042629.18280-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula Signed-off-by: zhangshuqi --- drivers/gpu/drm/drm_modes.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 0f99e5453f15..8bebc29a129d 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -757,14 +757,11 @@ EXPORT_SYMBOL(drm_mode_set_name); */ int drm_mode_vrefresh(const struct drm_display_mode *mode) { - unsigned int num, den; + unsigned int num = 1, den = 1; if (mode->htotal == 0 || mode->vtotal == 0) return 0; - num = mode->clock; - den = mode->htotal * mode->vtotal; - if (mode->flags & DRM_MODE_FLAG_INTERLACE) num *= 2; if (mode->flags & DRM_MODE_FLAG_DBLSCAN) @@ -772,6 +769,12 @@ int drm_mode_vrefresh(const struct drm_display_mode *mode) if (mode->vscan > 1) den *= mode->vscan; + if (check_mul_overflow(mode->clock, num, &num)) + return 0; + + if (check_mul_overflow(mode->htotal * mode->vtotal, den, &den)) + return 0; + return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(num, 1000), den); } EXPORT_SYMBOL(drm_mode_vrefresh); -- Gitee From e534081c268d9af8f2074cc6c20f76f982b5dbad Mon Sep 17 00:00:00 2001 From: liuzerun Date: Mon, 24 Feb 2025 14:31:05 +0800 Subject: [PATCH 073/116] hmdfs remount Signed-off-by: liuzerun --- fs/hmdfs/hmdfs.h | 2 +- fs/hmdfs/main.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/hmdfs/hmdfs.h b/fs/hmdfs/hmdfs.h index c9940693f3e7..22d64c162b10 100644 --- a/fs/hmdfs/hmdfs.h +++ b/fs/hmdfs/hmdfs.h @@ -43,7 +43,7 @@ // 20 digits +'\0', Converted from a u64 integer #define HMDFS_ACCOUNT_HASH_MAX_LEN 21 -#define CTRL_PATH_MAX_LEN 21 +#define CTRL_PATH_MAX_LEN 11 #define HMDFS_SUPER_MAGIC 0x20200302 diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c index 10d37cbac2ef..db4f58d45391 100644 --- a/fs/hmdfs/main.c +++ b/fs/hmdfs/main.c @@ -864,7 +864,6 @@ static int hmdfs_fill_super(struct super_block *sb, void *data, int silent) struct super_block *lower_sb; struct dentry *root_dentry; char ctrl_path[CTRL_PATH_MAX_LEN]; - uint64_t ctrl_hash; if (!raw_data) return -EINVAL; @@ -901,9 +900,8 @@ static int hmdfs_fill_super(struct super_block *sb, void *data, int silent) goto out_freesbi; // add ctrl sysfs node - ctrl_hash = path_hash(sbi->local_dst, strlen(sbi->local_dst), true); - scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%llu", ctrl_hash); - hmdfs_debug("hash %llu", ctrl_hash); + scnprintf(ctrl_path, CTRL_PATH_MAX_LEN, "%u", sb->s_dev); + hmdfs_debug("s_dev %u", sb->s_dev); err = hmdfs_register_sysfs(ctrl_path, sbi); if (err) goto out_freesbi; -- Gitee From 6f5ea34e198191022d4726d40a22eb5daf3a7d2e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 5 Apr 2024 15:50:47 -0400 Subject: [PATCH 074/116] Bluetooth: L2CAP: Fix not validating setsockopt user input stable inclusion from stable-v6.8.7 commit 8ee0c132a61df9723813c40e742dc5321824daa9 category: bugfix issue: #IBRFDQ CVE: CVE-2024-35965 Signed-off-by: zyf1116 --------------------------------------- [ Upstream commit 4f3951242ace5efc7131932e2e01e6ac6baed846 ] Check user input length before copying data. Fixes: 33575df7be67 ("Bluetooth: move l2cap_sock_setsockopt() to l2cap_sock.c") Fixes: 3ee7b7cd8390 ("Bluetooth: Add BT_MODE socket option") Signed-off-by: Eric Dumazet Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin Signed-off-by: zyf1116 --- net/bluetooth/l2cap_sock.c | 52 +++++++++++++++----------------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 4493fc68efca..d878f422fc02 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -744,7 +744,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, struct sock *sk = sock->sk; struct l2cap_chan *chan = l2cap_pi(sk)->chan; struct l2cap_options opts; - int len, err = 0; + int err = 0; u32 opt; BT_DBG("sk %p", sk); @@ -771,11 +771,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, opts.max_tx = chan->max_tx; opts.txwin_size = chan->tx_win; - len = min_t(unsigned int, sizeof(opts), optlen); - if (copy_from_sockptr(&opts, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opts, sizeof(opts), optval, optlen); + if (err) break; - } if (opts.txwin_size > L2CAP_DEFAULT_EXT_WINDOW) { err = -EINVAL; @@ -818,10 +816,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, break; case L2CAP_LM: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt & L2CAP_LM_FIPS) { err = -EINVAL; @@ -902,7 +899,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, struct bt_security sec; struct bt_power pwr; struct l2cap_conn *conn; - int len, err = 0; + int err = 0; u32 opt; u16 mtu; u8 mode; @@ -928,11 +925,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, sec.level = BT_SECURITY_LOW; - len = min_t(unsigned int, sizeof(sec), optlen); - if (copy_from_sockptr(&sec, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen); + if (err) break; - } if (sec.level < BT_SECURITY_LOW || sec.level > BT_SECURITY_FIPS) { @@ -977,10 +972,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt) { set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); @@ -992,10 +986,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_FLUSHABLE: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt > BT_FLUSHABLE_ON) { err = -EINVAL; @@ -1027,11 +1020,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, pwr.force_active = BT_POWER_FORCE_ACTIVE_ON; - len = min_t(unsigned int, sizeof(pwr), optlen); - if (copy_from_sockptr(&pwr, optval, len)) { - err = -EFAULT; + err = bt_copy_from_sockptr(&pwr, sizeof(pwr), optval, optlen); + if (err) break; - } if (pwr.force_active) set_bit(FLAG_FORCE_ACTIVE, &chan->flags); @@ -1040,10 +1031,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; case BT_CHANNEL_POLICY: - if (copy_from_sockptr(&opt, optval, sizeof(u32))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); + if (err) break; - } if (opt > BT_CHANNEL_POLICY_AMP_PREFERRED) { err = -EINVAL; @@ -1088,10 +1078,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&mtu, optval, sizeof(u16))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&mtu, sizeof(mtu), optval, optlen); + if (err) break; - } if (chan->mode == L2CAP_MODE_EXT_FLOWCTL && sk->sk_state == BT_CONNECTED) @@ -1119,10 +1108,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&mode, optval, sizeof(u8))) { - err = -EFAULT; + err = bt_copy_from_sockptr(&mode, sizeof(mode), optval, optlen); + if (err) break; - } BT_DBG("mode %u", mode); -- Gitee From d0d25fce731280b65c4ce89121fade82fa4c450a Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Thu, 21 Nov 2024 18:08:54 +0530 Subject: [PATCH 075/116] quota: flush quota_release_work upon quota writeback stable inclusion from stable-v5.10.231 commit 6f3821acd7c3143145999248087de5fb4b48cf26 category: bugfix issue: NA CVE: CVE-2024-56780 Signed-off-by: zyf1116 --------------------------------------- [ Upstream commit ac6f420291b3fee1113f21d612fa88b628afab5b ] One of the paths quota writeback is called from is: freeze_super() sync_filesystem() ext4_sync_fs() dquot_writeback_dquots() Since we currently don't always flush the quota_release_work queue in this path, we can end up with the following race: 1. dquot are added to releasing_dquots list during regular operations. 2. FS Freeze starts, however, this does not flush the quota_release_work queue. 3. Freeze completes. 4. Kernel eventually tries to flush the workqueue while FS is frozen which hits a WARN_ON since transaction gets started during frozen state: ext4_journal_check_start+0x28/0x110 [ext4] (unreliable) __ext4_journal_start_sb+0x64/0x1c0 [ext4] ext4_release_dquot+0x90/0x1d0 [ext4] quota_release_workfn+0x43c/0x4d0 Which is the following line: WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); Which ultimately results in generic/390 failing due to dmesg noise. This was detected on powerpc machine 15 cores. To avoid this, make sure to flush the workqueue during dquot_writeback_dquots() so we dont have any pending workitems after freeze. Reported-by: Disha Goel CC: stable@vger.kernel.org Fixes: dabc8b207566 ("quota: fix dqput() to follow the guarantees dquot_srcu should provide") Reviewed-by: Baokun Li Signed-off-by: Ojaswin Mujoo Signed-off-by: Jan Kara Link: https://patch.msgid.link/20241121123855.645335-2-ojaswin@linux.ibm.com Signed-off-by: Sasha Levin Signed-off-by: zyf1116 --- fs/quota/dquot.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index cb5c1feaba98..7ab4b1e53d56 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -692,6 +692,8 @@ int dquot_writeback_dquots(struct super_block *sb, int type) WARN_ON_ONCE(!rwsem_is_locked(&sb->s_umount)); + flush_delayed_work("a_release_work); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; -- Gitee From 15303b3c3b305e4c8c3ca4fa42ec8144902f2cc2 Mon Sep 17 00:00:00 2001 From: Seiji Nishikawa Date: Sun, 1 Dec 2024 01:12:34 +0900 Subject: [PATCH 076/116] mm: vmscan: account for free pages to prevent infinite Loop in throttle_direct_reclaim() stable inclusion from stable-v5.10.233 commit d675fefbaec3815b3ae0af1bebd97f27df3a05c8 category: bugfix issue: NA CVE: CVE-2024-57884 Signed-off-by: zyf1116 --------------------------------------- commit 6aaced5abd32e2a57cd94fd64f824514d0361da8 upstream. The task sometimes continues looping in throttle_direct_reclaim() because allow_direct_reclaim(pgdat) keeps returning false. #0 [ffff80002cb6f8d0] __switch_to at ffff8000080095ac #1 [ffff80002cb6f900] __schedule at ffff800008abbd1c #2 [ffff80002cb6f990] schedule at ffff800008abc50c #3 [ffff80002cb6f9b0] throttle_direct_reclaim at ffff800008273550 #4 [ffff80002cb6fa20] try_to_free_pages at ffff800008277b68 #5 [ffff80002cb6fae0] __alloc_pages_nodemask at ffff8000082c4660 #6 [ffff80002cb6fc50] alloc_pages_vma at ffff8000082e4a98 #7 [ffff80002cb6fca0] do_anonymous_page at ffff80000829f5a8 #8 [ffff80002cb6fce0] __handle_mm_fault at ffff8000082a5974 #9 [ffff80002cb6fd90] handle_mm_fault at ffff8000082a5bd4 At this point, the pgdat contains the following two zones: NODE: 4 ZONE: 0 ADDR: ffff00817fffe540 NAME: "DMA32" SIZE: 20480 MIN/LOW/HIGH: 11/28/45 VM_STAT: NR_FREE_PAGES: 359 NR_ZONE_INACTIVE_ANON: 18813 NR_ZONE_ACTIVE_ANON: 0 NR_ZONE_INACTIVE_FILE: 50 NR_ZONE_ACTIVE_FILE: 0 NR_ZONE_UNEVICTABLE: 0 NR_ZONE_WRITE_PENDING: 0 NR_MLOCK: 0 NR_BOUNCE: 0 NR_ZSPAGES: 0 NR_FREE_CMA_PAGES: 0 NODE: 4 ZONE: 1 ADDR: ffff00817fffec00 NAME: "Normal" SIZE: 8454144 PRESENT: 98304 MIN/LOW/HIGH: 68/166/264 VM_STAT: NR_FREE_PAGES: 146 NR_ZONE_INACTIVE_ANON: 94668 NR_ZONE_ACTIVE_ANON: 3 NR_ZONE_INACTIVE_FILE: 735 NR_ZONE_ACTIVE_FILE: 78 NR_ZONE_UNEVICTABLE: 0 NR_ZONE_WRITE_PENDING: 0 NR_MLOCK: 0 NR_BOUNCE: 0 NR_ZSPAGES: 0 NR_FREE_CMA_PAGES: 0 In allow_direct_reclaim(), while processing ZONE_DMA32, the sum of inactive/active file-backed pages calculated in zone_reclaimable_pages() based on the result of zone_page_state_snapshot() is zero. Additionally, since this system lacks swap, the calculation of inactive/ active anonymous pages is skipped. crash> p nr_swap_pages nr_swap_pages = $1937 = { counter = 0 } As a result, ZONE_DMA32 is deemed unreclaimable and skipped, moving on to the processing of the next zone, ZONE_NORMAL, despite ZONE_DMA32 having free pages significantly exceeding the high watermark. The problem is that the pgdat->kswapd_failures hasn't been incremented. crash> px ((struct pglist_data *) 0xffff00817fffe540)->kswapd_failures $1935 = 0x0 This is because the node deemed balanced. The node balancing logic in balance_pgdat() evaluates all zones collectively. If one or more zones (e.g., ZONE_DMA32) have enough free pages to meet their watermarks, the entire node is deemed balanced. This causes balance_pgdat() to exit early before incrementing the kswapd_failures, as it considers the overall memory state acceptable, even though some zones (like ZONE_NORMAL) remain under significant pressure. The patch ensures that zone_reclaimable_pages() includes free pages (NR_FREE_PAGES) in its calculation when no other reclaimable pages are available (e.g., file-backed or anonymous pages). This change prevents zones like ZONE_DMA32, which have sufficient free pages, from being mistakenly deemed unreclaimable. By doing so, the patch ensures proper node balancing, avoids masking pressure on other zones like ZONE_NORMAL, and prevents infinite loops in throttle_direct_reclaim() caused by allow_direct_reclaim(pgdat) repeatedly returning false. The kernel hangs due to a task stuck in throttle_direct_reclaim(), caused by a node being incorrectly deemed balanced despite pressure in certain zones, such as ZONE_NORMAL. This issue arises from zone_reclaimable_pages() returning 0 for zones without reclaimable file- backed or anonymous pages, causing zones like ZONE_DMA32 with sufficient free pages to be skipped. The lack of swap or reclaimable pages results in ZONE_DMA32 being ignored during reclaim, masking pressure in other zones. Consequently, pgdat->kswapd_failures remains 0 in balance_pgdat(), preventing fallback mechanisms in allow_direct_reclaim() from being triggered, leading to an infinite loop in throttle_direct_reclaim(). This patch modifies zone_reclaimable_pages() to account for free pages (NR_FREE_PAGES) when no other reclaimable pages exist. This ensures zones with sufficient free pages are not skipped, enabling proper balancing and reclaim behavior. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20241130164346.436469-1-snishika@redhat.com Link: https://lkml.kernel.org/r/20241130161236.433747-2-snishika@redhat.com Fixes: 5a1c84b404a7 ("mm: remove reclaim and compaction retry approximations") Signed-off-by: Seiji Nishikawa Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: zyf1116 --- mm/vmscan.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index e319f2d460f3..9a3129b27296 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -213,7 +213,14 @@ unsigned long zone_reclaimable_pages(struct zone *zone) if (get_nr_swap_pages() > 0) nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) + zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON); - + /* + * If there are no reclaimable file-backed or anonymous pages, + * ensure zones with sufficient free pages are not skipped. + * This prevents zones like DMA32 from being ignored in reclaim + * scenarios where they can still help alleviate memory pressure. + */ + if (nr == 0) + nr = zone_page_state_snapshot(zone, NR_FREE_PAGES); return nr; } -- Gitee From 7468456d1655501683c62b585951836de5bfe0d5 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Fri, 10 Jan 2025 14:21:55 +0100 Subject: [PATCH 077/116] bpf: Fix bpf_sk_select_reuseport() memory leak mainline inclusion from mainline-v6.13 commit b3af60928ab9129befa65e6df0310d27300942bf category: bugfix issue: NA CVE: CVE-2025-21683 Signed-off-by: zyf1116 --------------------------------------- As pointed out in the original comment, lookup in sockmap can return a TCP ESTABLISHED socket. Such TCP socket may have had SO_ATTACH_REUSEPORT_EBPF set before it was ESTABLISHED. In other words, a non-NULL sk_reuseport_cb does not imply a non-refcounted socket. Drop sk's reference in both error paths. unreferenced object 0xffff888101911800 (size 2048): comm "test_progs", pid 44109, jiffies 4297131437 hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 80 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 9336483b): __kmalloc_noprof+0x3bf/0x560 __reuseport_alloc+0x1d/0x40 reuseport_alloc+0xca/0x150 reuseport_attach_prog+0x87/0x140 sk_reuseport_attach_bpf+0xc8/0x100 sk_setsockopt+0x1181/0x1990 do_sock_setsockopt+0x12b/0x160 __sys_setsockopt+0x7b/0xc0 __x64_sys_setsockopt+0x1b/0x30 do_syscall_64+0x93/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 64d85290d79c ("bpf: Allow bpf_map_lookup_elem for SOCKMAP and SOCKHASH") Signed-off-by: Michal Luczaj Reviewed-by: Martin KaFai Lau Link: https://patch.msgid.link/20250110-reuseport-memleak-v1-1-fa1ddab0adfe@rbox.co Signed-off-by: Jakub Kicinski Signed-off-by: zyf1116 --- net/core/filter.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 61395535c91c..9d645e85d04f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -10048,6 +10048,7 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY; struct sock_reuseport *reuse; struct sock *selected_sk; + int err; selected_sk = map->ops->map_lookup_elem(map, key); if (!selected_sk) @@ -10055,10 +10056,6 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, reuse = rcu_dereference(selected_sk->sk_reuseport_cb); if (!reuse) { - /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ - if (sk_is_refcounted(selected_sk)) - sock_put(selected_sk); - /* reuseport_array has only sk with non NULL sk_reuseport_cb. * The only (!reuse) case here is - the sk has already been * unhashed (e.g. by close()), so treat it as -ENOENT. @@ -10066,24 +10063,33 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, * Other maps (e.g. sock_map) do not provide this guarantee and * the sk may never be in the reuseport group to begin with. */ - return is_sockarray ? -ENOENT : -EINVAL; + err = is_sockarray ? -ENOENT : -EINVAL; + goto error; } if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) { struct sock *sk = reuse_kern->sk; - if (sk->sk_protocol != selected_sk->sk_protocol) - return -EPROTOTYPE; - else if (sk->sk_family != selected_sk->sk_family) - return -EAFNOSUPPORT; - - /* Catch all. Likely bound to a different sockaddr. */ - return -EBADFD; + if (sk->sk_protocol != selected_sk->sk_protocol) { + err = -EPROTOTYPE; + } else if (sk->sk_family != selected_sk->sk_family) { + err = -EAFNOSUPPORT; + } else { + /* Catch all. Likely bound to a different sockaddr. */ + err = -EBADFD; + } + goto error; } reuse_kern->selected_sk = selected_sk; return 0; +error: + /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ + if (sk_is_refcounted(selected_sk)) + sock_put(selected_sk); + + return err; } static const struct bpf_func_proto sk_select_reuseport_proto = { -- Gitee From c6ec9d1ddb2c6d024233561c4f47d708a7b2565f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 8 Jan 2025 22:56:33 +0100 Subject: [PATCH 078/116] netfilter: conntrack: clamp maximum hashtable size to INT_MAX mainline inclusion from mainline-v6.13-rc7 commit b541ba7d1f5a5b7b3e2e22dc9e40e18a7d6dbc13 category: bugfix issue: NA CVE: CVE-2025-21648 Signed-off-by: zyf1116 --------------------------------------- Use INT_MAX as maximum size for the conntrack hashtable. Otherwise, it is possible to hit WARN_ON_ONCE in __kvmalloc_node_noprof() when resizing hashtable because __GFP_NOWARN is unset. See: 0708a0afe291 ("mm: Consider __GFP_NOWARN flag for oversized kvmalloc() calls") Note: hashtable resize is only possible from init_netns. Fixes: 9cc1c73ad666 ("netfilter: conntrack: avoid integer overflow when resizing") Signed-off-by: Pablo Neira Ayuso Signed-off-by: zyf1116 --- net/netfilter/nf_conntrack_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f82a234ac53a..99d5d8cd3895 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2435,12 +2435,15 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) struct hlist_nulls_head *hash; unsigned int nr_slots, i; - if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head))) + if (*sizep > (INT_MAX / sizeof(struct hlist_nulls_head))) return NULL; BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); + if (nr_slots > (INT_MAX / sizeof(struct hlist_nulls_head))) + return NULL; + hash = kvcalloc(nr_slots, sizeof(struct hlist_nulls_head), GFP_KERNEL); if (hash && nulls) -- Gitee From e65a8c64bcb2e261ef58d2216760fc7aa42cc75d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:26 -0700 Subject: [PATCH 079/116] icmp: Fix a data-race around sysctl_icmp_errors_use_inbound_ifaddr. mainline inclusion from mainline-v5.19-rc7 commit d2efabce81db7eed1c98fa1a3f203f0edd738ac3 category: bugfix issue: NA CVE: CVE-2022-49632 Signed-off-by: zyf1116 --------------------------------------- While reading sysctl_icmp_errors_use_inbound_ifaddr, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 1c2fb7f93cb2 ("[IPV4]: Sysctl configurable icmp error source address.") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: zyf1116 --- net/ipv4/icmp.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 155ed02c8b6b..2d78bef2f0e6 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -706,7 +706,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, rcu_read_lock(); if (rt_is_input_route(rt) && - net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) + READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)) dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); if (dev) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 59ba518a85b9..c6726ace37af 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -564,6 +564,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "icmp_ratelimit", -- Gitee From 84a98ecf9bbd79eb07394188f52dcf54f8b8c3fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Jan 2025 18:12:41 +0000 Subject: [PATCH 080/116] ipmr: do not call mr_mfc_uses_dev() for unres entries mainline inclusion from mainline-v6.14-rc1 commit 15a901361ec3fb1c393f91880e1cbf24ec0a88bd category: bugfix issue: NA CVE: CVE-2025-21719 Signed-off-by: zyf1116 --------------------------------------- syzbot found that calling mr_mfc_uses_dev() for unres entries would crash [1], because c->mfc_un.res.minvif / c->mfc_un.res.maxvif alias to "struct sk_buff_head unresolved", which contain two pointers. This code never worked, lets remove it. [1] Unable to handle kernel paging request at virtual address ffff5fff2d536613 KASAN: maybe wild-memory-access in range [0xfffefff96a9b3098-0xfffefff96a9b309f] Modules linked in: CPU: 1 UID: 0 PID: 7321 Comm: syz.0.16 Not tainted 6.13.0-rc7-syzkaller-g1950a0af2d55 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : mr_mfc_uses_dev net/ipv4/ipmr_base.c:290 [inline] pc : mr_table_dump+0x5a4/0x8b0 net/ipv4/ipmr_base.c:334 lr : mr_mfc_uses_dev net/ipv4/ipmr_base.c:289 [inline] lr : mr_table_dump+0x694/0x8b0 net/ipv4/ipmr_base.c:334 Call trace: mr_mfc_uses_dev net/ipv4/ipmr_base.c:290 [inline] (P) mr_table_dump+0x5a4/0x8b0 net/ipv4/ipmr_base.c:334 (P) mr_rtm_dumproute+0x254/0x454 net/ipv4/ipmr_base.c:382 ipmr_rtm_dumproute+0x248/0x4b4 net/ipv4/ipmr.c:2648 rtnl_dump_all+0x2e4/0x4e8 net/core/rtnetlink.c:4327 rtnl_dumpit+0x98/0x1d0 net/core/rtnetlink.c:6791 netlink_dump+0x4f0/0xbc0 net/netlink/af_netlink.c:2317 netlink_recvmsg+0x56c/0xe64 net/netlink/af_netlink.c:1973 sock_recvmsg_nosec net/socket.c:1033 [inline] sock_recvmsg net/socket.c:1055 [inline] sock_read_iter+0x2d8/0x40c net/socket.c:1125 new_sync_read fs/read_write.c:484 [inline] vfs_read+0x740/0x970 fs/read_write.c:565 ksys_read+0x15c/0x26c fs/read_write.c:708 Fixes: cb167893f41e ("net: Plumb support for filtering ipv4 and ipv6 multicast route dumps") Reported-by: syzbot+5cfae50c0e5f2c500013@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/678fe2d1.050a0220.15cac.00b3.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://patch.msgid.link/20250121181241.841212-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: zyf1116 --- net/ipv4/ipmr_base.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index aa8738a91210..0c4bb906af21 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -320,9 +320,6 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb, list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { if (e < s_e) goto next_entry2; - if (filter->dev && - !mr_mfc_uses_dev(mrt, mfc, filter->dev)) - goto next_entry2; err = fill(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags); -- Gitee From fc3e0b789bc1b415d26aec7ead1adb70bfffa601 Mon Sep 17 00:00:00 2001 From: Akash M Date: Thu, 19 Dec 2024 18:22:19 +0530 Subject: [PATCH 081/116] usb: gadget: f_fs: Remove WARN_ON in functionfs_bind mainline inclusion from mainline-v6.13-rc7 commit dfc51e48bca475bbee984e90f33fdc537ce09699 category: bugfix issue: #IBY0MI CVE: CVE-2024-57913 Signed-off-by: zyf1116 --------------------------------------- This commit addresses an issue related to below kernel panic where panic_on_warn is enabled. It is caused by the unnecessary use of WARN_ON in functionsfs_bind, which easily leads to the following scenarios. 1.adb_write in adbd 2. UDC write via configfs ================= ===================== ->usb_ffs_open_thread() ->UDC write ->open_functionfs() ->configfs_write_iter() ->adb_open() ->gadget_dev_desc_UDC_store() ->adb_write() ->usb_gadget_register_driver_owner ->driver_register() ->StartMonitor() ->bus_add_driver() ->adb_read() ->gadget_bind_driver() ->configfs_composite_bind() ->usb_add_function() ->open_functionfs() ->ffs_func_bind() ->adb_open() ->functionfs_bind() state !=FFS_ACTIVE> The adb_open, adb_read, and adb_write operations are invoked from the daemon, but trying to bind the function is a process that is invoked by UDC write through configfs, which opens up the possibility of a race condition between the two paths. In this race scenario, the kernel panic occurs due to the WARN_ON from functionfs_bind when panic_on_warn is enabled. This commit fixes the kernel panic by removing the unnecessary WARN_ON. Kernel panic - not syncing: kernel: panic_on_warn set ... [ 14.542395] Call trace: [ 14.542464] ffs_func_bind+0x1c8/0x14a8 [ 14.542468] usb_add_function+0xcc/0x1f0 [ 14.542473] configfs_composite_bind+0x468/0x588 [ 14.542478] gadget_bind_driver+0x108/0x27c [ 14.542483] really_probe+0x190/0x374 [ 14.542488] __driver_probe_device+0xa0/0x12c [ 14.542492] driver_probe_device+0x3c/0x220 [ 14.542498] __driver_attach+0x11c/0x1fc [ 14.542502] bus_for_each_dev+0x104/0x160 [ 14.542506] driver_attach+0x24/0x34 [ 14.542510] bus_add_driver+0x154/0x270 [ 14.542514] driver_register+0x68/0x104 [ 14.542518] usb_gadget_register_driver_owner+0x48/0xf4 [ 14.542523] gadget_dev_desc_UDC_store+0xf8/0x144 [ 14.542526] configfs_write_iter+0xf0/0x138 Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver") Cc: stable Signed-off-by: Akash M Link: https://lore.kernel.org/r/20241219125221.1679-1-akash.m5@samsung.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: zyf1116 Conflicts: drivers/usb/gadget/function/f_fs.c --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index d80df9286f4b..eb9d01d4245a 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1856,7 +1856,7 @@ static int functionfs_bind(struct ffs_data *ffs, struct usb_composite_dev *cdev) ENTER(); - if (WARN_ON(ffs->state != FFS_ACTIVE + if ((ffs->state != FFS_ACTIVE || test_and_set_bit(FFS_FL_BOUND, &ffs->flags))) return -EBADFD; -- Gitee From 3aff3ebbc38bd881ac60b08178a3ac68bdf8ea01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 23 Jan 2025 08:22:40 +0100 Subject: [PATCH 082/116] ptp: Ensure info->enable callback is always set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.14-rc1 commit fd53aa40e65f518453115b6f56183b0c201db26b category: bugfix issue: NA CVE: CVE-2025-21814 Signed-off-by: zyf1116 --------------------------------------- The ioctl and sysfs handlers unconditionally call the ->enable callback. Not all drivers implement that callback, leading to NULL dereferences. Example of affected drivers: ptp_s390.c, ptp_vclock.c and ptp_mock.c. Instead use a dummy callback if no better was specified by the driver. Fixes: d94ba80ebbea ("ptp: Added a brand new class driver for ptp clocks.") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Acked-by: Richard Cochran Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250123-ptp-enable-v1-1-b015834d3a47@weissschuh.net Signed-off-by: Jakub Kicinski Signed-off-by: zyf1116 Conflicts: drivers/ptp/ptp_clock.c --- drivers/ptp/ptp_clock.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index ed766943a356..e408dae648a6 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -187,6 +187,11 @@ static void ptp_clock_release(struct device *dev) kfree(ptp); } +static int ptp_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *request, int on) +{ + return -EOPNOTSUPP; +} + static void ptp_aux_kworker(struct kthread_work *work) { struct ptp_clock *ptp = container_of(work, struct ptp_clock, @@ -232,6 +237,9 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, mutex_init(&ptp->pincfg_mux); init_waitqueue_head(&ptp->tsev_wq); + if (!ptp->info->enable) + ptp->info->enable = ptp_enable; + if (ptp->info->do_aux_work) { kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker); ptp->kworker = kthread_create_worker(0, "ptp%d", ptp->index); -- Gitee From 47d7b0af814ec99ed2e337f42192028b053cb92b Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 19 Dec 2024 09:30:30 +0000 Subject: [PATCH 083/116] workqueue: Do not warn when cancelling WQ_MEM_RECLAIM work from !WQ_MEM_RECLAIM worker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.13-rc6 commit de35994ecd2dd6148ab5a6c5050a1670a04dec77 category: bugfix issue: NA CVE: CVE-2024-57888 Signed-off-by: zyf1116 --------------------------------------- After commit 746ae46c1113 ("drm/sched: Mark scheduler work queues with WQ_MEM_RECLAIM") amdgpu started seeing the following warning: [ ] workqueue: WQ_MEM_RECLAIM sdma0:drm_sched_run_job_work [gpu_sched] is flushing !WQ_MEM_RECLAIM events:amdgpu_device_delay_enable_gfx_off [amdgpu] ... [ ] Workqueue: sdma0 drm_sched_run_job_work [gpu_sched] ... [ ] Call Trace: [ ] ... [ ] ? check_flush_dependency+0xf5/0x110 ... [ ] cancel_delayed_work_sync+0x6e/0x80 [ ] amdgpu_gfx_off_ctrl+0xab/0x140 [amdgpu] [ ] amdgpu_ring_alloc+0x40/0x50 [amdgpu] [ ] amdgpu_ib_schedule+0xf4/0x810 [amdgpu] [ ] ? drm_sched_run_job_work+0x22c/0x430 [gpu_sched] [ ] amdgpu_job_run+0xaa/0x1f0 [amdgpu] [ ] drm_sched_run_job_work+0x257/0x430 [gpu_sched] [ ] process_one_work+0x217/0x720 ... [ ] The intent of the verifcation done in check_flush_depedency is to ensure forward progress during memory reclaim, by flagging cases when either a memory reclaim process, or a memory reclaim work item is flushed from a context not marked as memory reclaim safe. This is correct when flushing, but when called from the cancel(_delayed)_work_sync() paths it is a false positive because work is either already running, or will not be running at all. Therefore cancelling it is safe and we can relax the warning criteria by letting the helper know of the calling context. Signed-off-by: Tvrtko Ursulin Fixes: fca839c00a12 ("workqueue: warn if memory reclaim tries to flush !WQ_MEM_RECLAIM workqueue") References: 746ae46c1113 ("drm/sched: Mark scheduler work queues with WQ_MEM_RECLAIM") Cc: Tejun Heo Cc: Peter Zijlstra Cc: Lai Jiangshan Cc: Alex Deucher Cc: Christian König Cc: # v4.5+ Signed-off-by: Tejun Heo Signed-off-by: zyf1116 Changes to be committed: modified: kernel/workqueue.c --- kernel/workqueue.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 27f2bdc42aac..95111ee91fdb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2596,23 +2596,27 @@ static int rescuer_thread(void *__rescuer) * check_flush_dependency - check for flush dependency sanity * @target_wq: workqueue being flushed * @target_work: work item being flushed (NULL for workqueue flushes) + * @from_cancel: are we called from the work cancel path * * %current is trying to flush the whole @target_wq or @target_work on it. - * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not - * reclaiming memory or running on a workqueue which doesn't have - * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to - * a deadlock. + * If this is not the cancel path (which implies work being flushed is either + * already running, or will not be at all), check if @target_wq doesn't have + * %WQ_MEM_RECLAIM and verify that %current is not reclaiming memory or running + * on a workqueue which doesn't have %WQ_MEM_RECLAIM as that can break forward- + * progress guarantee leading to a deadlock. */ static void check_flush_dependency(struct workqueue_struct *target_wq, - struct work_struct *target_work) + struct work_struct *target_work, + bool from_cancel) { - work_func_t target_func = target_work ? target_work->func : NULL; + work_func_t target_func; struct worker *worker; - if (target_wq->flags & WQ_MEM_RECLAIM) + if (from_cancel || target_wq->flags & WQ_MEM_RECLAIM) return; worker = current_wq_worker(); + target_func = target_work ? target_work->func : NULL; WARN_ONCE(current->flags & PF_MEMALLOC, "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps", @@ -2838,7 +2842,7 @@ void flush_workqueue(struct workqueue_struct *wq) list_add_tail(&this_flusher.list, &wq->flusher_overflow); } - check_flush_dependency(wq, NULL); + check_flush_dependency(wq, NULL, false); mutex_unlock(&wq->mutex); @@ -3013,7 +3017,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, pwq = worker->current_pwq; } - check_flush_dependency(pwq->wq, work); + check_flush_dependency(pwq->wq, work, from_cancel); insert_wq_barrier(pwq, barr, work, worker); raw_spin_unlock_irq(&pool->lock); -- Gitee From 4b44013641fd7048fa332a5b13a047999655b826 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 4 Dec 2024 15:20:07 +0200 Subject: [PATCH 084/116] drm/dp_mst: Ensure mst_primary pointer is valid in drm_dp_mst_handle_up_req() mainline inclusion from mainline-v6.13-rc2 commit e54b00086f7473dbda1a7d6fc47720ced157c6a8 category: bugfix issue: NA CVE: CVE-2024-57798 Signed-off-by: zyf1116 --------------------------------------- While receiving an MST up request message from one thread in drm_dp_mst_handle_up_req(), the MST topology could be removed from another thread via drm_dp_mst_topology_mgr_set_mst(false), freeing mst_primary and setting drm_dp_mst_topology_mgr::mst_primary to NULL. This could lead to a NULL deref/use-after-free of mst_primary in drm_dp_mst_handle_up_req(). Avoid the above by holding a reference for mst_primary in drm_dp_mst_handle_up_req() while it's used. v2: Fix kfreeing the request if getting an mst_primary reference fails. Cc: Lyude Paul Reviewed-by: Lyude Paul (v1) Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20241204132007.3132494-1-imre.deak@intel.com Signed-off-by: zyf1116 Conflicts: drivers/gpu/drm/drm_dp_mst_topology.c --- drivers/gpu/drm/drm_dp_mst_topology.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 0eb2f30c1e3e..95fdcac480ca 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -4118,9 +4118,10 @@ static void drm_dp_mst_up_req_work(struct work_struct *work) static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) { struct drm_dp_pending_up_req *up_req; + struct drm_dp_mst_branch *mst_primary; if (!drm_dp_get_one_sb_msg(mgr, true, NULL)) - goto out; + goto out_clear_reply; if (!mgr->up_req_recv.have_eomt) return 0; @@ -4139,10 +4140,19 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) DRM_DEBUG_KMS("Received unknown up req type, ignoring: %x\n", up_req->msg.req_type); kfree(up_req); - goto out; + goto out_clear_reply; } - drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, up_req->msg.req_type, + mutex_lock(&mgr->lock); + mst_primary = mgr->mst_primary; + if (!mst_primary || !drm_dp_mst_topology_try_get_mstb(mst_primary)) { + mutex_unlock(&mgr->lock); + kfree(up_req); + goto out_clear_reply; + } + mutex_unlock(&mgr->lock); + + drm_dp_send_up_ack_reply(mgr, mst_primary, up_req->msg.req_type, false); if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) { @@ -4171,7 +4181,8 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) mutex_unlock(&mgr->up_req_lock); queue_work(system_long_wq, &mgr->up_req_work); -out: + drm_dp_mst_topology_put_mstb(mst_primary); +out_clear_reply: memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); return 0; } -- Gitee From 3c36d73063453b30f5b700ebcdcaecef1df2fe28 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 16 Jan 2025 10:21:57 +0100 Subject: [PATCH 085/116] net: avoid race between device unregistration and ethnl ops mainline inclusion from mainline-v6.14 commit 12e070eb6964b341b41677fd260af5a305316a1f category: bugfix issue: NA CVE: CVE-2025-21701 Signed-off-by: zyf1116 --------------------------------------- The following trace can be seen if a device is being unregistered while its number of channels are being modified. DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 3 PID: 3754 at kernel/locking/mutex.c:564 __mutex_lock+0xc8a/0x1120 CPU: 3 UID: 0 PID: 3754 Comm: ethtool Not tainted 6.13.0-rc6+ #771 RIP: 0010:__mutex_lock+0xc8a/0x1120 Call Trace: ethtool_check_max_channel+0x1ea/0x880 ethnl_set_channels+0x3c3/0xb10 ethnl_default_set_doit+0x306/0x650 genl_family_rcv_msg_doit+0x1e3/0x2c0 genl_rcv_msg+0x432/0x6f0 netlink_rcv_skb+0x13d/0x3b0 genl_rcv+0x28/0x40 netlink_unicast+0x42e/0x720 netlink_sendmsg+0x765/0xc20 __sys_sendto+0x3ac/0x420 __x64_sys_sendto+0xe0/0x1c0 do_syscall_64+0x95/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e This is because unregister_netdevice_many_notify might run before the rtnl lock section of ethnl operations, eg. set_channels in the above example. In this example the rss lock would be destroyed by the device unregistration path before being used again, but in general running ethnl operations while dismantle has started is not a good idea. Fix this by denying any operation on devices being unregistered. A check was already there in ethnl_ops_begin, but not wide enough. Note that the same issue cannot be seen on the ioctl version (__dev_ethtool) because the device reference is retrieved from within the rtnl lock section there. Once dismantle started, the net device is unlisted and no reference will be found. Fixes: dde91ccfa25f ("ethtool: do not perform operations on net devices being unregistered") Signed-off-by: Antoine Tenart Reviewed-by: Przemek Kitszel Reviewed-by: Edward Cree Link: https://patch.msgid.link/20250116092159.50890-1-atenart@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: zyf1116 Conflicts: net/ethtool/netlink.c net/ethtool/netlink.h --- net/ethtool/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 979dee6bb88c..2c19f5236cb8 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -249,7 +249,7 @@ struct ethnl_reply_data { static inline int ethnl_ops_begin(struct net_device *dev) { - if (dev && dev->reg_state == NETREG_UNREGISTERING) + if (dev && dev->reg_state >= NETREG_UNREGISTERING) return -ENODEV; if (dev && dev->ethtool_ops->begin) -- Gitee From cb49b3433ef78e8200fe08f3cd0b2c05639071c4 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Thu, 16 Jan 2025 22:30:53 +0800 Subject: [PATCH 086/116] net: let net.core.dev_weight always be non-zero mainline inclusion from mainline-6.14 commit d1f9f79fa2af8e3b45cffdeef66e05833480148a category: bugfix issue: NA CVE: CVE-2025-21806 Signed-off-by: zyf1116 --------------------------------------- The following problem was encountered during stability test: (NULL net_device): NAPI poll function process_backlog+0x0/0x530 \ returned 1, exceeding its budget of 0. ------------[ cut here ]------------ list_add double add: new=ffff88905f746f48, prev=ffff88905f746f48, \ next=ffff88905f746e40. WARNING: CPU: 18 PID: 5462 at lib/list_debug.c:35 \ __list_add_valid_or_report+0xf3/0x130 CPU: 18 UID: 0 PID: 5462 Comm: ping Kdump: loaded Not tainted 6.13.0-rc7+ RIP: 0010:__list_add_valid_or_report+0xf3/0x130 Call Trace: ? __warn+0xcd/0x250 ? __list_add_valid_or_report+0xf3/0x130 enqueue_to_backlog+0x923/0x1070 netif_rx_internal+0x92/0x2b0 __netif_rx+0x15/0x170 loopback_xmit+0x2ef/0x450 dev_hard_start_xmit+0x103/0x490 __dev_queue_xmit+0xeac/0x1950 ip_finish_output2+0x6cc/0x1620 ip_output+0x161/0x270 ip_push_pending_frames+0x155/0x1a0 raw_sendmsg+0xe13/0x1550 __sys_sendto+0x3bf/0x4e0 __x64_sys_sendto+0xdc/0x1b0 do_syscall_64+0x5b/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e The reproduction command is as follows: sysctl -w net.core.dev_weight=0 ping 127.0.0.1 This is because when the napi's weight is set to 0, process_backlog() may return 0 and clear the NAPI_STATE_SCHED bit of napi->state, causing this napi to be re-polled in net_rx_action() until __do_softirq() times out. Since the NAPI_STATE_SCHED bit has been cleared, napi_schedule_rps() can be retriggered in enqueue_to_backlog(), causing this issue. Making the napi's weight always non-zero solves this problem. Triggering this issue requires system-wide admin (setting is not namespaced). Fixes: e38766054509 ("[NET]: Fix sysctl net.core.dev_weight") Fixes: 3d48b53fb2ae ("net: dev_weight: TX/RX orthogonality") Signed-off-by: Liu Jian Link: https://patch.msgid.link/20250116143053.4146855-1-liujian56@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: zyf1116 --- net/core/sysctl_net_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0dfe9f255ab3..3640be19a795 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -239,7 +239,7 @@ static int proc_do_dev_weight(struct ctl_table *table, int write, int ret, weight; mutex_lock(&dev_weight_mutex); - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) { weight = READ_ONCE(weight_p); WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias); @@ -351,6 +351,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_dev_weight, + .extra1 = SYSCTL_ONE, }, { .procname = "dev_weight_rx_bias", @@ -358,6 +359,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_dev_weight, + .extra1 = SYSCTL_ONE, }, { .procname = "dev_weight_tx_bias", @@ -365,6 +367,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_dev_weight, + .extra1 = SYSCTL_ONE, }, { .procname = "netdev_max_backlog", -- Gitee From bea5fccd17ebd8aaafa0d09f27f41f4032df6b15 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Jan 2022 09:52:44 -0800 Subject: [PATCH 087/116] srcu: Tighten cleanup_srcu_struct() GP checks mainline inclusion from mainline-5.19 commit 8ed00760203d8018bee042fbfe8e076579be2c2b category: bugfix issue: NA CVE: CVE-2022-49651 Signed-off-by: zyf1116 --------------------------------------- Currently, cleanup_srcu_struct() checks for a grace period in progress, but it does not check for a grace period that has not yet started but which might start at any time. Such a situation could result in a use-after-free bug, so this commit adds a check for a grace period that is needed but not yet started to cleanup_srcu_struct(). Signed-off-by: Paul E. McKenney Signed-off-by: zyf1116 --- kernel/rcu/srcutree.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 1ca734767b9d..375a1ea05c22 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -390,9 +390,11 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) return; /* Forgot srcu_barrier(), so just leak it! */ } if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) || + WARN_ON(rcu_seq_current(&ssp->srcu_gp_seq) != ssp->srcu_gp_seq_needed) || WARN_ON(srcu_readers_active(ssp))) { - pr_info("%s: Active srcu_struct %p state: %d\n", - __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))); + pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n", + __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)), + rcu_seq_current(&ssp->srcu_gp_seq), ssp->srcu_gp_seq_needed); return; /* Caller forgot to stop doing call_srcu()? */ } free_percpu(ssp->sda); -- Gitee From e72504a3304cba968cfab858798119df1d44a3a7 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Wed, 15 Jan 2025 10:36:47 +0000 Subject: [PATCH 088/116] bpf: Send signals asynchronously if !preemptible mainline inclusion from mainline-6.14 commit 87c544108b612512b254c8f79aa5c0a8546e2cc4 category: bugfix issue: NA CVE: CVE-2025-21728 Signed-off-by: zyf1116 --------------------------------------- BPF programs can execute in all kinds of contexts and when a program running in a non-preemptible context uses the bpf_send_signal() kfunc, it will cause issues because this kfunc can sleep. Change `irqs_disabled()` to `!preemptible()`. Reported-by: syzbot+97da3d7e0112d59971de@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/67486b09.050a0220.253251.0084.GAE@google.com/ Fixes: 1bc7896e9ef4 ("bpf: Fix deadlock with rq_lock in bpf_send_signal()") Signed-off-by: Puranjay Mohan Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20250115103647.38487-1-puranjay@kernel.org Signed-off-by: Alexei Starovoitov Signed-off-by: zyf1116 --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 5b9b7fdd7b9c..c62beae65e06 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1081,7 +1081,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) if (unlikely(is_global_init(current))) return -EPERM; - if (irqs_disabled()) { + if (!preemptible()) { /* Do an early check on signal validity. Otherwise, * the error is lost in deferred irq_work. */ -- Gitee From f9f4e9f7cd89d9481c7e17ee8bdba83e0193bc5a Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 8 Nov 2024 01:51:30 +0200 Subject: [PATCH 089/116] media: uvcvideo: Fix double free in error path stable inclusion from stable-6.6.76 commit 6c36dcd662ec5276782838660f8533a7cb26be49 category: bugfix issue: NA CVE: CVE-2024-57980 Signed-off-by: zyf1116 --------------------------------------- commit c6ef3a7fa97ec823a1e1af9085cf13db9f7b3bac upstream. If the uvc_status_init() function fails to allocate the int_urb, it will free the dev->status pointer but doesn't reset the pointer to NULL. This results in the kfree() call in uvc_status_cleanup() trying to double-free the memory. Fix it by resetting the dev->status pointer to NULL after freeing it. Fixes: a31a4055473b ("V4L/DVB:usbvideo:don't use part of buffer for USB transfer #4") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20241107235130.31372-1-laurent.pinchart@ideasonboard.com Signed-off-by: Laurent Pinchart Reviewed by: Ricardo Ribalda Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman Signed-off-by: zyf1116 --- drivers/media/usb/uvc/uvc_status.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/usb/uvc/uvc_status.c b/drivers/media/usb/uvc/uvc_status.c index 73725051cc16..6edf491dd873 100644 --- a/drivers/media/usb/uvc/uvc_status.c +++ b/drivers/media/usb/uvc/uvc_status.c @@ -269,6 +269,7 @@ int uvc_status_init(struct uvc_device *dev) dev->int_urb = usb_alloc_urb(0, GFP_KERNEL); if (dev->int_urb == NULL) { kfree(dev->status); + dev->status = NULL; return -ENOMEM; } -- Gitee From 7b552ff481682db817ff72cb371fecfd8b0acb47 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Wed, 18 Dec 2024 00:19:59 +0300 Subject: [PATCH 090/116] Bluetooth: L2CAP: handle NULL sock pointer in l2cap_sock_alloc mainline inclusion from mainline-6.14 commit 5f397409f8ee5bc82901eeaf799e1cbc4f8edcf1 category: bugfix issue: NA CVE: CVE-2024-58009 Signed-off-by: zyf1116 --------------------------------------- A NULL sock pointer is passed into l2cap_sock_alloc() when it is called from l2cap_sock_new_connection_cb() and the error handling paths should also be aware of it. Seemingly a more elegant solution would be to swap bt_sock_alloc() and l2cap_chan_create() calls since they are not interdependent to that moment but then l2cap_chan_create() adds the soon to be deallocated and still dummy-initialized channel to the global list accessible by many L2CAP paths. The channel would be removed from the list in short period of time but be a bit more straight-forward here and just check for NULL instead of changing the order of function calls. Found by Linux Verification Center (linuxtesting.org) with SVACE static analysis tool. Fixes: 7c4f78cdb8e7 ("Bluetooth: L2CAP: do not leave dangling sk pointer on error in l2cap_sock_create()") Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Reviewed-by: Kuniyuki Iwashima Signed-off-by: Luiz Augusto von Dentz Signed-off-by: zyf1116 --- net/bluetooth/l2cap_sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index d878f422fc02..b1663868a600 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1863,7 +1863,8 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, chan = l2cap_chan_create(); if (!chan) { sk_free(sk); - sock->sk = NULL; + if (sock) + sock->sk = NULL; return NULL; } -- Gitee From 3cae6bc856801ff704db8779b8658ebb8194f459 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sat, 28 Sep 2024 19:36:08 +0800 Subject: [PATCH 091/116] printk: Fix signed integer overflow when defining LOG_BUF_LEN_MAX mainline inclusion from mainline-6.14 commit 3d6f83df8ff2d5de84b50377e4f0d45e25311c7a category: bugfix issue: NA CVE: CVE-2024-58017 Signed-off-by: zyf1116 --------------------------------------- Shifting 1 << 31 on a 32-bit int causes signed integer overflow, which leads to undefined behavior. To prevent this, cast 1 to u32 before performing the shift, ensuring well-defined behavior. This change explicitly avoids any potential overflow by ensuring that the shift occurs on an unsigned 32-bit integer. Signed-off-by: Kuan-Wei Chiu Acked-by: Petr Mladek Link: https://lore.kernel.org/r/20240928113608.1438087-1-visitorckw@gmail.com Signed-off-by: Petr Mladek Signed-off-by: zyf1116 --- kernel/printk/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 17a310dcb6d9..fd351283f5eb 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -420,7 +420,7 @@ static u64 clear_seq; /* record buffer */ #define LOG_ALIGN __alignof__(unsigned long) #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) -#define LOG_BUF_LEN_MAX (u32)(1 << 31) +#define LOG_BUF_LEN_MAX ((u32)1 << 31) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; -- Gitee From 552bc2f86c1c3482010a47b75f70fb459d3a971e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 5 Dec 2024 17:16:29 +0000 Subject: [PATCH 092/116] ocfs2: handle a symlink read error correctly mainline inclusion from mainline-6.14 commit 2b4c2094da6d84e69b843dd3317902e977bf64bd category: bugfix issue: NA CVE: CVE-2024-58001 Signed-off-by: zyf1116 --------------------------------------- Patch series "Convert ocfs2 to use folios". Mark did a conversion of ocfs2 to use folios and sent it to me as a giant patch for review ;-) So I've redone it as individual patches, and credited Mark for the patches where his code is substantially the same. It's not a bad way to do it; his patch had some bugs and my patches had some bugs. Hopefully all our bugs were different from each other. And hopefully Mark likes all the changes I made to his code! This patch (of 23): If we can't read the buffer, be sure to unlock the page before returning. Link: https://lkml.kernel.org/r/20241205171653.3179945-1-willy@infradead.org Link: https://lkml.kernel.org/r/20241205171653.3179945-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Mark Tinguely Cc: Signed-off-by: Andrew Morton Signed-off-by: zyf1116 --- fs/ocfs2/symlink.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 94cfacc9bad7..f3e80c00ca69 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -66,7 +66,7 @@ static int ocfs2_fast_symlink_readpage(struct file *unused, struct page *page) if (status < 0) { mlog_errno(status); - return status; + goto out; } fe = (struct ocfs2_dinode *) bh->b_data; @@ -77,9 +77,10 @@ static int ocfs2_fast_symlink_readpage(struct file *unused, struct page *page) memcpy(kaddr, link, len + 1); kunmap_atomic(kaddr); SetPageUptodate(page); +out: unlock_page(page); brelse(bh); - return 0; + return status; } const struct address_space_operations ocfs2_fast_symlink_aops = { -- Gitee From 896de721ac5afae123a48781dc45b4ac2e9b5449 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 13 Oct 2023 14:10:23 +0200 Subject: [PATCH 093/116] net: treat possible_net_t net pointer as an RCU one and add read_pnet_rcu() mainline inclusion from mainline-v6.7 commit 2034d90ae41ae93e30d492ebcf1f06f97a9cfba6 category: bugfix issue: #IBZA9X CVE: CVE-2025-21764 Signed-off-by: zyf1116 --------------------------------------- Make the net pointer stored in possible_net_t structure annotated as an RCU pointer. Change the access helpers to treat it as such. Introduce read_pnet_rcu() helper to allow caller to dereference the net pointer under RCU read lock. Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: zyf1116 --- include/net/net_namespace.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 3c0ba2a91d79..d98524340170 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -327,21 +327,30 @@ static inline int check_net(const struct net *net) typedef struct { #ifdef CONFIG_NET_NS - struct net *net; + struct net __rcu *net; #endif } possible_net_t; static inline void write_pnet(possible_net_t *pnet, struct net *net) { #ifdef CONFIG_NET_NS - pnet->net = net; + rcu_assign_pointer(pnet->net, net); #endif } static inline struct net *read_pnet(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS - return pnet->net; + return rcu_dereference_protected(pnet->net, true); +#else + return &init_net; +#endif +} + +static inline struct net *read_pnet_rcu(possible_net_t *pnet) +{ +#ifdef CONFIG_NET_NS + return rcu_dereference(pnet->net); #else return &init_net; #endif -- Gitee From 6acfff1b892fed5c274b3d3945787d0951a361d5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Feb 2025 15:51:09 +0000 Subject: [PATCH 094/116] net: add dev_net_rcu() helper mainline inclusion from mainline-v6.14 commit 482ad2a4ace2740ca0ff1cbc8f3c7f862f3ab507 category: bugfix issue: NA CVE: CVE-2025-21764 Signed-off-by: zyf1116 --------------------------------------- dev->nd_net can change, readers should either use rcu_read_lock() or RTNL. We currently use a generic helper, dev_net() with no debugging support. We probably have many hidden bugs. Add dev_net_rcu() helper for callers using rcu_read_lock() protection. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250205155120.1676781-2-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: zyf1116 --- include/linux/netdevice.h | 6 ++++++ include/net/net_namespace.h | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a961130297c0..f1d7e00f8604 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2364,6 +2364,12 @@ struct net *dev_net(const struct net_device *dev) return read_pnet(&dev->nd_net); } +static inline +struct net *dev_net_rcu(const struct net_device *dev) +{ + return read_pnet_rcu(&dev->nd_net); +} + static inline void dev_net_set(struct net_device *dev, struct net *net) { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index d98524340170..f8ff4ff0cde7 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -347,7 +347,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #endif } -static inline struct net *read_pnet_rcu(possible_net_t *pnet) +static inline struct net *read_pnet_rcu(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS return rcu_dereference(pnet->net); -- Gitee From 6644a9ff2a192079254ab27ad7c20296bcfce950 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:34 +0000 Subject: [PATCH 095/116] ndisc: use RCU protection in ndisc_alloc_skb() mainline inclusion from mainline-v6.14 commit 628e6d18930bbd21f2d4562228afe27694f66da9 category: bugfix issue: NA CVE: CVE-2025-21764 Signed-off-by: zyf1116 --------------------------------------- ndisc_alloc_skb() can be called without RTNL or RCU being held. Add RCU protection to avoid possible UAF. Fixes: de09334b9326 ("ndisc: Introduce ndisc_alloc_skb() helper.") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-3-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: zyf1116 --- net/ipv6/ndisc.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 14251347c4a5..5134fa7f2888 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -415,15 +415,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); - if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", - __func__); + if (!skb) return NULL; - } skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -434,7 +430,9 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, /* Manually assign socket ownership as we avoid calling * sock_alloc_send_pskb() to bypass wmem buffer limits */ - skb_set_owner_w(skb, sk); + rcu_read_lock(); + skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk); + rcu_read_unlock(); return skb; } -- Gitee From 36029384209b7e494c89b228e457e550468bbdde Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:39 +0000 Subject: [PATCH 096/116] ndisc: extend RCU protection in ndisc_send_skb() mainline inclusion from mainline-6.14 commit ed6ae1f325d3c43966ec1b62ac1459e2b8e45640 category: bugfix issue: NA CVE: CVE-2025-21760 Signed-off-by: zyf1116 --------------------------------------- ndisc_send_skb() can be called without RTNL or RCU held. Acquire rcu_read_lock() earlier, so that we can use dev_net_rcu() and avoid a potential UAF. Fixes: 1762f7e88eb3 ("[NETNS][IPV6] ndisc - make socket control per namespace") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-8-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: zyf1116 --- net/ipv6/ndisc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 5134fa7f2888..8155fdb7b0f4 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -469,16 +469,20 @@ static void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, const struct in6_addr *saddr) { + struct icmp6hdr *icmp6h = icmp6_hdr(skb); struct dst_entry *dst = skb_dst(skb); - struct net *net = dev_net(skb->dev); - struct sock *sk = net->ipv6.ndisc_sk; struct inet6_dev *idev; + struct net *net; + struct sock *sk; int err; - struct icmp6hdr *icmp6h = icmp6_hdr(skb); u8 type; type = icmp6h->icmp6_type; + rcu_read_lock(); + + net = dev_net_rcu(skb->dev); + sk = net->ipv6.ndisc_sk; if (!dst) { struct flowi6 fl6; int oif = skb->dev->ifindex; @@ -486,6 +490,7 @@ static void ndisc_send_skb(struct sk_buff *skb, icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { + rcu_read_unlock(); kfree_skb(skb); return; } @@ -500,7 +505,6 @@ static void ndisc_send_skb(struct sk_buff *skb, ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len); - rcu_read_lock(); idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); -- Gitee From 7d0167a6a9ccd742f2b618e950392fb0c583bed1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:40 +0000 Subject: [PATCH 097/116] ipv6: mcast: extend RCU protection in igmp6_send() mainline inclusion from mainline-v6.14-rc3 commit 087c1faa594fa07a66933d750c0b2610aa1a2946 category: bugfix issue: NA CVE: CVE-2025-21759 Signed-off-by: zyf1116 --------------------------------------- igmp6_send() can be called without RTNL or RCU being held. Extend RCU protection so that we can safely fetch the net pointer and avoid a potential UAF. Note that we no longer can use sock_alloc_send_skb() because ipv6.igmp_sk uses GFP_KERNEL allocations which can sleep. Instead use alloc_skb() and charge the net->ipv6.igmp_sk socket under RCU protection. Fixes: b8ad0cbc58f7 ("[NETNS][IPV6] mcast - handle several network namespace") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-9-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: zyf1116 Conflicts: net/ipv6/mcast.c --- net/ipv6/mcast.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9fb5077f8e9a..c4ac3078455a 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1974,21 +1974,21 @@ static void mld_send_cr(struct inet6_dev *idev) static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) { - struct net *net = dev_net(dev); - struct sock *sk = net->ipv6.igmp_sk; + const struct in6_addr *snd_addr, *saddr; + int err, len, payload_len, full_len; + struct in6_addr addr_buf; struct inet6_dev *idev; struct sk_buff *skb; struct mld_msg *hdr; - const struct in6_addr *snd_addr, *saddr; - struct in6_addr addr_buf; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - int err, len, payload_len, full_len; u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; - struct flowi6 fl6; struct dst_entry *dst; + struct flowi6 fl6; + struct net *net; + struct sock *sk; if (type == ICMPV6_MGM_REDUCTION) snd_addr = &in6addr_linklocal_allrouters; @@ -1999,20 +1999,21 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) payload_len = len + sizeof(ra); full_len = sizeof(struct ipv6hdr) + payload_len; - rcu_read_lock(); - IP6_UPD_PO_STATS(net, __in6_dev_get(dev), - IPSTATS_MIB_OUT, full_len); - rcu_read_unlock(); + skb = alloc_skb(hlen + tlen + full_len, GFP_KERNEL); - skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); + rcu_read_lock(); + net = dev_net_rcu(dev); + idev = __in6_dev_get(dev); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, full_len); if (!skb) { - rcu_read_lock(); - IP6_INC_STATS(net, __in6_dev_get(dev), - IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return; } + sk = net->ipv6.igmp_sk; + skb_set_owner_w(skb, sk); + skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, hlen); @@ -2037,9 +2038,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IPPROTO_ICMPV6, csum_partial(hdr, len, 0)); - rcu_read_lock(); - idev = __in6_dev_get(skb->dev); - icmpv6_flow_init(sk, &fl6, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); -- Gitee From 8820eb9355307363aa3b76935fdabd61fb2068ed Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Fri, 27 Dec 2024 14:01:40 +0200 Subject: [PATCH 098/116] usb: xhci: Fix NULL pointer dereference on certain command aborts mainline inclusion from mainline-v6.14-rc1 commit 1e0a19912adb68a4b2b74fd77001c96cd83eb073 category: bugfix issue: NA CVE: CVE-2024-57981 Signed-off-by: zyf1116 --------------------------------------- If a command is queued to the final usable TRB of a ring segment, the enqueue pointer is advanced to the subsequent link TRB and no further. If the command is later aborted, when the abort completion is handled the dequeue pointer is advanced to the first TRB of the next segment. If no further commands are queued, xhci_handle_stopped_cmd_ring() sees the ring pointers unequal and assumes that there is a pending command, so it calls xhci_mod_cmd_timer() which crashes if cur_cmd was NULL. Don't attempt timer setup if cur_cmd is NULL. The subsequent doorbell ring likely is unnecessary too, but it's harmless. Leave it alone. This is probably Bug 219532, but no confirmation has been received. The issue has been independently reproduced and confirmed fixed using a USB MCU programmed to NAK the Status stage of SET_ADDRESS forever. Everything continued working normally after several prevented crashes. Link: https://bugzilla.kernel.org/show_bug.cgi?id=219532 Fixes: c311e391a7ef ("xhci: rework command timeout and cancellation,") CC: stable@vger.kernel.org Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20241227120142.1035206-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: zyf1116 Conflicts: drivers/usb/host/xhci-ring.c --- drivers/usb/host/xhci-ring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 4fa387e447f0..94518180ba78 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -334,7 +334,8 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, if ((xhci->cmd_ring->dequeue != xhci->cmd_ring->enqueue) && !(xhci->xhc_state & XHCI_STATE_DYING)) { xhci->current_cmd = cur_cmd; - xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); + if (cur_cmd) + xhci_mod_cmd_timer(xhci, XHCI_CMD_DEFAULT_TIMEOUT); xhci_ring_cmd_db(xhci); } } -- Gitee From a4b750307fd048b572b950073b71f377c89c08d2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 29 Apr 2022 14:38:01 -0700 Subject: [PATCH 099/116] list: fix a data-race around ep->rdllist stable inclusion from stable-v5.15.46 commit 5d5d993f16be15d124be7b8ec71b28ef7b7dc3af category: bugfix issue: NA CVE: CVE-2022-49443 Signed-off-by: zyf1116 --------------------------------------- [ Upstream commit d679ae94fdd5d3ab00c35078f5af5f37e068b03d ] ep_poll() first calls ep_events_available() with no lock held and checks if ep->rdllist is empty by list_empty_careful(), which reads rdllist->prev. Thus all accesses to it need some protection to avoid store/load-tearing. Note INIT_LIST_HEAD_RCU() already has the annotation for both prev and next. Commit bf3b9f6372c4 ("epoll: Add busy poll support to epoll with socket fds.") added the first lockless ep_events_available(), and commit c5a282e9635e ("fs/epoll: reduce the scope of wq lock in epoll_wait()") made some ep_events_available() calls lockless and added single call under a lock, finally commit e59d3c64cba6 ("epoll: eliminate unnecessary lock for zero timeout") made the last ep_events_available() lockless. BUG: KCSAN: data-race in do_epoll_wait / do_epoll_wait write to 0xffff88810480c7d8 of 8 bytes by task 1802 on cpu 0: INIT_LIST_HEAD include/linux/list.h:38 [inline] list_splice_init include/linux/list.h:492 [inline] ep_start_scan fs/eventpoll.c:622 [inline] ep_send_events fs/eventpoll.c:1656 [inline] ep_poll fs/eventpoll.c:1806 [inline] do_epoll_wait+0x4eb/0xf40 fs/eventpoll.c:2234 do_epoll_pwait fs/eventpoll.c:2268 [inline] __do_sys_epoll_pwait fs/eventpoll.c:2281 [inline] __se_sys_epoll_pwait+0x12b/0x240 fs/eventpoll.c:2275 __x64_sys_epoll_pwait+0x74/0x80 fs/eventpoll.c:2275 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff88810480c7d8 of 8 bytes by task 1799 on cpu 1: list_empty_careful include/linux/list.h:329 [inline] ep_events_available fs/eventpoll.c:381 [inline] ep_poll fs/eventpoll.c:1797 [inline] do_epoll_wait+0x279/0xf40 fs/eventpoll.c:2234 do_epoll_pwait fs/eventpoll.c:2268 [inline] __do_sys_epoll_pwait fs/eventpoll.c:2281 [inline] __se_sys_epoll_pwait+0x12b/0x240 fs/eventpoll.c:2275 __x64_sys_epoll_pwait+0x74/0x80 fs/eventpoll.c:2275 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0xffff88810480c7d0 -> 0xffff888103c15098 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 1799 Comm: syz-fuzzer Tainted: G W 5.17.0-rc7-syzkaller-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Link: https://lkml.kernel.org/r/20220322002653.33865-3-kuniyu@amazon.co.jp Fixes: e59d3c64cba6 ("epoll: eliminate unnecessary lock for zero timeout") Fixes: c5a282e9635e ("fs/epoll: reduce the scope of wq lock in epoll_wait()") Fixes: bf3b9f6372c4 ("epoll: Add busy poll support to epoll with socket fds.") Signed-off-by: Kuniyuki Iwashima Reported-by: syzbot+bdd6e38a1ed5ee58d8bd@syzkaller.appspotmail.com Cc: Al Viro , Andrew Morton Cc: Kuniyuki Iwashima Cc: Kuniyuki Iwashima Cc: "Soheil Hassas Yeganeh" Cc: Davidlohr Bueso Cc: "Sridhar Samudrala" Cc: Alexander Duyck Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin Signed-off-by: zyf1116 --- include/linux/list.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/list.h b/include/linux/list.h index a18c87b63376..8fe6ef2aa966 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -33,7 +33,7 @@ static inline void INIT_LIST_HEAD(struct list_head *list) { WRITE_ONCE(list->next, list); - list->prev = list; + WRITE_ONCE(list->prev, list); } #ifdef CONFIG_DEBUG_LIST @@ -296,7 +296,7 @@ static inline int list_empty(const struct list_head *head) static inline void list_del_init_careful(struct list_head *entry) { __list_del_entry(entry); - entry->prev = entry; + WRITE_ONCE(entry->prev, entry); smp_store_release(&entry->next, entry); } @@ -316,7 +316,7 @@ static inline void list_del_init_careful(struct list_head *entry) static inline int list_empty_careful(const struct list_head *head) { struct list_head *next = smp_load_acquire(&head->next); - return (next == head) && (next == head->prev); + return (next == head) && (next == READ_ONCE(head->prev)); } /** -- Gitee From 885a238496a979d173a1856255d320bafbd62773 Mon Sep 17 00:00:00 2001 From: Yongzhi Liu Date: Fri, 21 Jan 2022 11:26:13 +0000 Subject: [PATCH 100/116] drm/amd/display: Fix memory leak stable inclusion from stable-v5.15.34 commit 7e10369c72db7a0e2f77b2e306aadc07aef6b07a category: bugfix issue: NA CVE: CVE-2022-49135 Signed-off-by: zyf1116 --------------------------------------- [ Upstream commit 5d5c6dba2b43e28845d7d7ed32a36802329a5f52 ] [why] Resource release is needed on the error handling path to prevent memory leak. [how] Fix this by adding kfree on the error handling path. Reviewed-by: Harry Wentland Signed-off-by: Yongzhi Liu Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: zyf1116 Conflicts: drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c --- .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 72 ++++++++++++++----- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index e43f82bcb231..f1e73e20ee7b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -229,8 +229,10 @@ static ssize_t dp_link_settings_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -388,8 +390,10 @@ static ssize_t dp_phy_settings_read(struct file *f, char __user *buf, break; r = put_user((*(rd_buf + result)), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1195,8 +1199,10 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1212,8 +1218,10 @@ static ssize_t dp_dsc_clock_en_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1351,8 +1359,10 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1368,8 +1378,10 @@ static ssize_t dp_dsc_slice_width_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1505,8 +1517,10 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1522,8 +1536,10 @@ static ssize_t dp_dsc_slice_height_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1655,8 +1671,10 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1672,8 +1690,10 @@ static ssize_t dp_dsc_bits_per_pixel_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1800,8 +1820,10 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1817,8 +1839,10 @@ static ssize_t dp_dsc_pic_width_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1857,8 +1881,10 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1874,8 +1900,10 @@ static ssize_t dp_dsc_pic_height_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -1929,8 +1957,10 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -1946,8 +1976,10 @@ static ssize_t dp_dsc_chunk_size_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; @@ -2001,8 +2033,10 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf, break; } - if (!pipe_ctx) + if (!pipe_ctx) { + kfree(rd_buf); return -ENXIO; + } dsc = pipe_ctx->stream_res.dsc; if (dsc) @@ -2018,8 +2052,10 @@ static ssize_t dp_dsc_slice_bpg_offset_read(struct file *f, char __user *buf, break; r = put_user(*(rd_buf + result), buf); - if (r) + if (r) { + kfree(rd_buf); return r; /* r = -EFAULT */ + } buf += 1; size -= 1; -- Gitee From 21886f6adce4b117929be6cfb83d0f9b4aeadaaa Mon Sep 17 00:00:00 2001 From: Radu Rendec Date: Thu, 6 Feb 2025 12:44:20 -0500 Subject: [PATCH 101/116] arm64: cacheinfo: Avoid out-of-bounds write to cacheinfo array stable inclusion from stable-6.1.129 commit 4ff25f0b18d1d0174c105e4620428bcdc1213860 category: bugfix issue: NA CVE: CVE-2025-21785 Signed-off-by: zyf1116 --------------------------------------- [ Upstream commit 875d742cf5327c93cba1f11e12b08d3cce7a88d2 ] The loop that detects/populates cache information already has a bounds check on the array size but does not account for cache levels with separate data/instructions cache. Fix this by incrementing the index for any populated leaf (instead of any populated level). Fixes: 5d425c186537 ("arm64: kernel: add support for cpu cache information") Signed-off-by: Radu Rendec Link: https://lore.kernel.org/r/20250206174420.2178724-1-rrendec@redhat.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin Signed-off-by: zyf1116 --- arch/arm64/kernel/cacheinfo.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 97c42be71338..1510f457b615 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -87,16 +87,18 @@ int populate_cache_leaves(unsigned int cpu) unsigned int level, idx; enum cache_type type; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf = this_cpu_ci->info_list; + struct cacheinfo *infos = this_cpu_ci->info_list; for (idx = 0, level = 1; level <= this_cpu_ci->num_levels && - idx < this_cpu_ci->num_leaves; idx++, level++) { + idx < this_cpu_ci->num_leaves; level++) { type = get_cache_type(level); if (type == CACHE_TYPE_SEPARATE) { - ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); - ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (idx + 1 >= this_cpu_ci->num_leaves) + break; + ci_leaf_init(&infos[idx++], CACHE_TYPE_DATA, level); + ci_leaf_init(&infos[idx++], CACHE_TYPE_INST, level); } else { - ci_leaf_init(this_leaf++, type, level); + ci_leaf_init(&infos[idx++], type, level); } } return 0; -- Gitee From 80f03b8997eabac275afbe52bc270a7dfeeb8a24 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 22 Jan 2025 14:26:17 -0500 Subject: [PATCH 102/116] USB: hub: Ignore non-compliant devices with too many configs or interfaces mainline inclusion from mainline-6.14 commit 2240fed37afbcdb5e8b627bc7ad986891100e05d category: bugfix issue: NA CVE: CVE-2025-21776 Signed-off-by: zyf1116 --------------------------------------- Robert Morris created a test program which can cause usb_hub_to_struct_hub() to dereference a NULL or inappropriate pointer: Oops: general protection fault, probably for non-canonical address 0xcccccccccccccccc: 0000 [#1] SMP DEBUG_PAGEALLOC PTI CPU: 7 UID: 0 PID: 117 Comm: kworker/7:1 Not tainted 6.13.0-rc3-00017-gf44d154d6e3d #14 Hardware name: FreeBSD BHYVE/BHYVE, BIOS 14.0 10/17/2021 Workqueue: usb_hub_wq hub_event RIP: 0010:usb_hub_adjust_deviceremovable+0x78/0x110 ... Call Trace: ? die_addr+0x31/0x80 ? exc_general_protection+0x1b4/0x3c0 ? asm_exc_general_protection+0x26/0x30 ? usb_hub_adjust_deviceremovable+0x78/0x110 hub_probe+0x7c7/0xab0 usb_probe_interface+0x14b/0x350 really_probe+0xd0/0x2d0 ? __pfx___device_attach_driver+0x10/0x10 __driver_probe_device+0x6e/0x110 driver_probe_device+0x1a/0x90 __device_attach_driver+0x7e/0xc0 bus_for_each_drv+0x7f/0xd0 __device_attach+0xaa/0x1a0 bus_probe_device+0x8b/0xa0 device_add+0x62e/0x810 usb_set_configuration+0x65d/0x990 usb_generic_driver_probe+0x4b/0x70 usb_probe_device+0x36/0xd0 The cause of this error is that the device has two interfaces, and the hub driver binds to interface 1 instead of interface 0, which is where usb_hub_to_struct_hub() looks. We can prevent the problem from occurring by refusing to accept hub devices that violate the USB spec by having more than one configuration or interface. Reported-and-tested-by: Robert Morris Cc: stable Closes: https://lore.kernel.org/linux-usb/95564.1737394039@localhost/ Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/c27f3bf4-63d8-4fb5-ac82-09e3cd19f61c@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman Signed-off-by: zyf1116 --- drivers/usb/core/hub.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 331f41c6cc75..6d334bb5bb33 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1794,6 +1794,17 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) desc = intf->cur_altsetting; hdev = interface_to_usbdev(intf); + /* + * The USB 2.0 spec prohibits hubs from having more than one + * configuration or interface, and we rely on this prohibition. + * Refuse to accept a device that violates it. + */ + if (hdev->descriptor.bNumConfigurations > 1 || + hdev->actconfig->desc.bNumInterfaces > 1) { + dev_err(&intf->dev, "Invalid hub with more than one config or interface\n"); + return -EINVAL; + } + /* * Set default autosuspend delay as 0 to speedup bus suspend, * based on the below considerations: -- Gitee From a0a1b3828eb142021a087ff4c56beb9f7d03d393 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 7 Feb 2025 13:58:36 +0000 Subject: [PATCH 103/116] arp: use RCU protection in arp_xmit() mainline inclusion from mainline-6.14 commit a42b69f692165ec39db42d595f4f65a4c8f42e44 category: bugfix issue: NA CVE: CVE-2025-21762 Signed-off-by: zyf1116 --------------------------------------- arp_xmit() can be called without RTNL or RCU protection. Use RCU protection to avoid potential UAF. Fixes: 29a26a568038 ("netfilter: Pass struct net into the netfilter hooks") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250207135841.1948589-5-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: zyf1116 --- net/ipv4/arp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8ae9bd6f91c1..6879e0b70c76 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -637,10 +637,12 @@ static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb */ void arp_xmit(struct sk_buff *skb) { + rcu_read_lock(); /* Send it off, maybe filter it using firewalling first. */ NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, - dev_net(skb->dev), NULL, skb, NULL, skb->dev, + dev_net_rcu(skb->dev), NULL, skb, NULL, skb->dev, arp_xmit_finish); + rcu_read_unlock(); } EXPORT_SYMBOL(arp_xmit); -- Gitee From 5f67e45a933f110fd5295e4d277fc50f56c27633 Mon Sep 17 00:00:00 2001 From: pangliyuan Date: Tue, 24 Dec 2024 16:18:23 +0800 Subject: [PATCH 104/116] ubifs: skip dumping tnc tree when zroot is null mainline inclusion from mainline-6.14 commit bdb0ca39e0acccf6771db49c3f94ed787d05f2d7 category: bugfix issue: NA CVE: CVE-2024-58058 Signed-off-by: zyf1116 --------------------------------------- Clearing slab cache will free all znode in memory and make c->zroot.znode = NULL, then dumping tnc tree will access c->zroot.znode which cause null pointer dereference. Link: https://bugzilla.kernel.org/show_bug.cgi?id=219624#c0 Fixes: 1e51764a3c2a ("UBIFS: add new flash file system") Signed-off-by: pangliyuan Reviewed-by: Zhihao Cheng Signed-off-by: Richard Weinberger Signed-off-by: zyf1116 --- fs/ubifs/debug.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 89320c89cf0d..8c748bc273e1 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -946,16 +946,20 @@ void ubifs_dump_tnc(struct ubifs_info *c) pr_err("\n"); pr_err("(pid %d) start dumping TNC tree\n", current->pid); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); - level = znode->level; - pr_err("== Level %d ==\n", level); - while (znode) { - if (level != znode->level) { - level = znode->level; - pr_err("== Level %d ==\n", level); + if (c->zroot.znode) { + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, NULL); + level = znode->level; + pr_err("== Level %d ==\n", level); + while (znode) { + if (level != znode->level) { + level = znode->level; + pr_err("== Level %d ==\n", level); + } + ubifs_dump_znode(c, znode); + znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); } - ubifs_dump_znode(c, znode); - znode = ubifs_tnc_levelorder_next(c, c->zroot.znode, znode); + } else { + pr_err("empty TNC tree in memory\n"); } pr_err("(pid %d) finish dumping TNC tree\n", current->pid); } -- Gitee From f7ed2b086c763335ea5998c89406a95dc4694880 Mon Sep 17 00:00:00 2001 From: Charles Han Date: Fri, 15 Nov 2024 14:26:21 +0800 Subject: [PATCH 105/116] HID: multitouch: Add NULL check in mt_input_configured mainline inclusion from mainline-6.14 commit 9b8e2220d3a052a690b1d1b23019673e612494c5 category: bugfix issue: NA CVE: CVE-2024-58020 Signed-off-by: zyf1116 --------------------------------------- devm_kasprintf() can return a NULL pointer on failure,but this returned value in mt_input_configured() is not checked. Add NULL check in mt_input_configured(), to handle kernel NULL pointer dereference error. Fixes: 479439463529 ("HID: multitouch: Correct devm device reference for hidinput input_dev name") Signed-off-by: Charles Han Signed-off-by: Jiri Kosina Signed-off-by: zyf1116 --- drivers/hid/hid-multitouch.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 7d43d62df240..3ef81b07d774 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1594,9 +1594,12 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) break; } - if (suffix) + if (suffix) { hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, suffix); + if (!hi->input->name) + return -ENOMEM; + } return 0; } -- Gitee From 81cca410de48714d1d7101e177d4cd918f683cc8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Jul 2022 17:15:31 -0700 Subject: [PATCH 106/116] tcp: Fix a data-race around sysctl_tcp_ecn_fallback. mainline inclusion from mainline-5.19 commit 12b8d9ca7e678abc48195294494f1815b555d658 category: bugfix issue: NA CVE: CVE-2022-49630 Signed-off-by: zyf1116 --------------------------------------- While reading sysctl_tcp_ecn_fallback, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader. Fixes: 492135557dc0 ("tcp: add rfc3168, section 6.1.1.1. fallback") Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: zyf1116 --- net/ipv4/sysctl_net_ipv4.c | 2 ++ net/ipv4/tcp_output.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index c6726ace37af..f2a5e5c87584 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -612,6 +612,8 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "ip_dynaddr", diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 231f3ca5ca3f..b381f3a748dd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -344,7 +344,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) { - if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)) /* tp->ecn_flags are cleared at a later point in time when * SYN ACK is ultimatively being received. */ -- Gitee From b2e59b24e77d19a74c9aba642e80ebb3fae286f1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 11 Apr 2022 21:07:51 +0300 Subject: [PATCH 107/116] i2c: dev: check return value when calling dev_set_name() mainline inclusion from mainline-5.18 commit 993eb48fa199b5f476df8204e652eff63dd19361 category: bugfix issue: NA CVE: CVE-2022-49046 Signed-off-by: zyf1116 --------------------------------------- If dev_set_name() fails, the dev_name() is null, check the return value of dev_set_name() to avoid the null-ptr-deref. Fixes: 1413ef638aba ("i2c: dev: Fix the race between the release of i2c_dev and cdev") Signed-off-by: Andy Shevchenko Signed-off-by: Wolfram Sang Signed-off-by: zyf1116 --- drivers/i2c/i2c-dev.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index dafad891998e..f0bd4ae19df6 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -669,17 +669,22 @@ static int i2cdev_attach_adapter(struct device *dev, void *dummy) i2c_dev->dev.class = i2c_dev_class; i2c_dev->dev.parent = &adap->dev; i2c_dev->dev.release = i2cdev_dev_release; - dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr); + + res = dev_set_name(&i2c_dev->dev, "i2c-%d", adap->nr); + if (res) + goto err_put_i2c_dev; res = cdev_device_add(&i2c_dev->cdev, &i2c_dev->dev); - if (res) { - put_i2c_dev(i2c_dev, false); - return res; - } + if (res) + goto err_put_i2c_dev; pr_debug("i2c-dev: adapter [%s] registered as minor %d\n", adap->name, adap->nr); return 0; + +err_put_i2c_dev: + put_i2c_dev(i2c_dev, false); + return res; } static int i2cdev_detach_adapter(struct device *dev, void *dummy) -- Gitee From 22b487faec225211f4ad9c92c0ae071672dbeabb Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Tue, 24 Dec 2024 02:52:38 +0000 Subject: [PATCH 108/116] memcg: fix soft lockup in the OOM process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.14 commit ade81479c7dda1ce3eedb215c78bc615bbd04f06 category: bugfix issue: NA CVE: CVE-2024-57977 Signed-off-by: zyf1116 --------------------------------------- A soft lockup issue was found in the product with about 56,000 tasks were in the OOM cgroup, it was traversing them when the soft lockup was triggered. watchdog: BUG: soft lockup - CPU#2 stuck for 23s! [VM Thread:1503066] CPU: 2 PID: 1503066 Comm: VM Thread Kdump: loaded Tainted: G Hardware name: Huawei Cloud OpenStack Nova, BIOS RIP: 0010:console_unlock+0x343/0x540 RSP: 0000:ffffb751447db9a0 EFLAGS: 00000247 ORIG_RAX: ffffffffffffff13 RAX: 0000000000000001 RBX: 0000000000000000 RCX: 00000000ffffffff RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000247 RBP: ffffffffafc71f90 R08: 0000000000000000 R09: 0000000000000040 R10: 0000000000000080 R11: 0000000000000000 R12: ffffffffafc74bd0 R13: ffffffffaf60a220 R14: 0000000000000247 R15: 0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2fe6ad91f0 CR3: 00000004b2076003 CR4: 0000000000360ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: vprintk_emit+0x193/0x280 printk+0x52/0x6e dump_task+0x114/0x130 mem_cgroup_scan_tasks+0x76/0x100 dump_header+0x1fe/0x210 oom_kill_process+0xd1/0x100 out_of_memory+0x125/0x570 mem_cgroup_out_of_memory+0xb5/0xd0 try_charge+0x720/0x770 mem_cgroup_try_charge+0x86/0x180 mem_cgroup_try_charge_delay+0x1c/0x40 do_anonymous_page+0xb5/0x390 handle_mm_fault+0xc4/0x1f0 This is because thousands of processes are in the OOM cgroup, it takes a long time to traverse all of them. As a result, this lead to soft lockup in the OOM process. To fix this issue, call 'cond_resched' in the 'mem_cgroup_scan_tasks' function per 1000 iterations. For global OOM, call 'touch_softlockup_watchdog' per 1000 iterations to avoid this issue. Link: https://lkml.kernel.org/r/20241224025238.3768787-1-chenridong@huaweicloud.com Fixes: 9cbb78bb3143 ("mm, memcg: introduce own oom handler to iterate only over its own threads") Signed-off-by: Chen Ridong Acked-by: Michal Hocko Cc: Roman Gushchin Cc: Johannes Weiner Cc: Shakeel Butt Cc: Muchun Song Cc: Michal Koutný Cc: Signed-off-by: Andrew Morton Signed-off-by: zyf1116 --- mm/memcontrol.c | 7 ++++++- mm/oom_kill.c | 8 +++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dc3e442cd911..2ce75eb3034b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1330,6 +1330,7 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, { struct mem_cgroup *iter; int ret = 0; + int i = 0; BUG_ON(memcg == root_mem_cgroup); @@ -1338,8 +1339,12 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, struct task_struct *task; css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it); - while (!ret && (task = css_task_iter_next(&it))) + while (!ret && (task = css_task_iter_next(&it))) { + /* Avoid potential softlockup warning */ + if ((++i & 1023) == 0) + cond_resched(); ret = fn(task, arg); + } css_task_iter_end(&it); if (ret) { mem_cgroup_iter_break(memcg, iter); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3d7c557fb70c..ba333cc560d8 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include "internal.h" @@ -430,10 +431,15 @@ static void dump_tasks(struct oom_control *oc) mem_cgroup_scan_tasks(oc->memcg, dump_task, oc); else { struct task_struct *p; + int i = 0; rcu_read_lock(); - for_each_process(p) + for_each_process(p) { + /* Avoid potential softlockup warning */ + if ((++i & 1023) == 0) + touch_softlockup_watchdog(); dump_task(p, oc); + } rcu_read_unlock(); } } -- Gitee From 870f371210c717465ecbf07aa4e298e84156a37b Mon Sep 17 00:00:00 2001 From: Ionela Voinescu Date: Mon, 17 Jan 2022 21:06:59 +0800 Subject: [PATCH 109/116] arm64: split counter validation function mainline inclusion from mainline-v5.11-rc1 commit bc3b6562a1ac category: bugfix issue: NA CVE: CVE-2022-48657 Signed-off-by: zyf1116 --------------------------------------- In order for the counter validation function to be reused, split validate_cpu_freq_invariance_counters() into: - freq_counters_valid(cpu) - check cpu for valid cycle counters - freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate) - generic function that sets the normalization ratio used by topology_scale_freq_tick() Signed-off-by: Ionela Voinescu Reviewed-by: Sudeep Holla Cc: Will Deacon Link: https://lore.kernel.org/r/20201106125334.21570-3-ionela.voinescu@arm.com Signed-off-by: Catalin Marinas Signed-off-by: Xiongfeng Wang Reviewed-by: Hanjun Guo Signed-off-by: Zheng Zengkai Signed-off-by: zyf1116 Conflicts: arch/arm64/kernel/topology.c --- arch/arm64/kernel/topology.c | 44 +++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index f35af19b7055..bd8fb9de69ad 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -102,45 +102,49 @@ void init_cpu_freq_invariance_counters(void) read_sysreg_s(SYS_AMEVCNTR0_CONST_EL0)); } -static int validate_cpu_freq_invariance_counters(int cpu) +static inline bool freq_counters_valid(int cpu) { - u64 max_freq_hz, ratio; - if (!cpu_has_amu_feat(cpu)) { pr_debug("CPU%d: counters are not supported.\n", cpu); - return -EINVAL; + return false; } if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) || !per_cpu(arch_core_cycles_prev, cpu))) { pr_debug("CPU%d: cycle counters are not enabled.\n", cpu); - return -EINVAL; + return false; } - /* Convert maximum frequency from KHz to Hz and validate */ - max_freq_hz = cpufreq_get_hw_max_freq(cpu) * 1000ULL; - if (unlikely(!max_freq_hz)) { - pr_debug("CPU%d: invalid maximum frequency.\n", cpu); + return true; +} + +static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate) +{ + u64 ratio; + + if (unlikely(!max_rate || !ref_rate)) { + pr_debug("CPU%d: invalid maximum or reference frequency.\n", + cpu); return -EINVAL; } /* * Pre-compute the fixed ratio between the frequency of the constant - * counter and the maximum frequency of the CPU. + * reference counter and the maximum frequency of the CPU. * - * const_freq - * arch_max_freq_scale = ---------------- * SCHED_CAPACITY_SCALE² - * cpuinfo_max_freq + * ref_rate + * arch_max_freq_scale = ---------- * SCHED_CAPACITY_SCALE² + * max_rate * * We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE² * in order to ensure a good resolution for arch_max_freq_scale for - * very low arch timer frequencies (down to the KHz range which should + * very low reference frequencies (down to the KHz range which should * be unlikely). */ - ratio = (u64)arch_timer_get_rate() << (2 * SCHED_CAPACITY_SHIFT); - ratio = div64_u64(ratio, max_freq_hz); + ratio = ref_rate << (2 * SCHED_CAPACITY_SHIFT); + ratio = div64_u64(ratio, max_rate); if (!ratio) { - WARN_ONCE(1, "System timer frequency too low.\n"); + WARN_ONCE(1, "Reference frequency too low.\n"); return -EINVAL; } @@ -187,8 +191,12 @@ static int __init init_amu_fie(void) } for_each_present_cpu(cpu) { - if (validate_cpu_freq_invariance_counters(cpu)) + if (!freq_counters_valid(cpu) || + freq_inv_set_max_ratio(cpu, + cpufreq_get_hw_max_freq(cpu) * 1000, + arch_timer_get_rate())) continue; + cpumask_set_cpu(cpu, valid_cpus); have_policy |= enable_policy_freq_counters(cpu, valid_cpus); } -- Gitee From dde7b20d89813caa4e294723bcf0532b8d690521 Mon Sep 17 00:00:00 2001 From: Eddie James Date: Thu, 26 Jan 2023 15:08:09 -0600 Subject: [PATCH 110/116] tpm: Use managed allocation for bios event log mainline inclusion from mainline-v6.3 commit 441b7152729f4a2bdb100135a58625fa0aeb69e4 category: bugfix issue: NA CVE: CVE-2024-58005 Signed-off-by: zyf1116 --------------------------------------- Since the bios event log is freed in the device release function, let devres handle the deallocation. This will allow other memory allocation/mapping functions to be used for the bios event log. Signed-off-by: Eddie James Tested-by: Jarkko Sakkinen Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: zyf1116 --- drivers/char/tpm/eventlog/acpi.c | 5 +++-- drivers/char/tpm/eventlog/efi.c | 13 +++++++------ drivers/char/tpm/eventlog/of.c | 3 ++- drivers/char/tpm/tpm-chip.c | 1 - 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/char/tpm/eventlog/acpi.c b/drivers/char/tpm/eventlog/acpi.c index cd266021d010..bd757d836c5c 100644 --- a/drivers/char/tpm/eventlog/acpi.c +++ b/drivers/char/tpm/eventlog/acpi.c @@ -14,6 +14,7 @@ * Access to the event log extended by the TCG BIOS of PC platform */ +#include #include #include #include @@ -135,7 +136,7 @@ int tpm_read_log_acpi(struct tpm_chip *chip) } /* malloc EventLog space */ - log->bios_event_log = kmalloc(len, GFP_KERNEL); + log->bios_event_log = devm_kmalloc(&chip->dev, len, GFP_KERNEL); if (!log->bios_event_log) return -ENOMEM; @@ -164,7 +165,7 @@ int tpm_read_log_acpi(struct tpm_chip *chip) return format; err: - kfree(log->bios_event_log); + devm_kfree(&chip->dev, log->bios_event_log); log->bios_event_log = NULL; return ret; } diff --git a/drivers/char/tpm/eventlog/efi.c b/drivers/char/tpm/eventlog/efi.c index e6cb9d525e30..4e9d7c2bf32e 100644 --- a/drivers/char/tpm/eventlog/efi.c +++ b/drivers/char/tpm/eventlog/efi.c @@ -6,6 +6,7 @@ * Thiebaud Weksteen */ +#include #include #include @@ -55,7 +56,7 @@ int tpm_read_log_efi(struct tpm_chip *chip) } /* malloc EventLog space */ - log->bios_event_log = kmemdup(log_tbl->log, log_size, GFP_KERNEL); + log->bios_event_log = devm_kmemdup(&chip->dev, log_tbl->log, log_size, GFP_KERNEL); if (!log->bios_event_log) { ret = -ENOMEM; goto out; @@ -76,7 +77,7 @@ int tpm_read_log_efi(struct tpm_chip *chip) MEMREMAP_WB); if (!final_tbl) { pr_err("Could not map UEFI TPM final log\n"); - kfree(log->bios_event_log); + devm_kfree(&chip->dev, log->bios_event_log); ret = -ENOMEM; goto out; } @@ -91,11 +92,11 @@ int tpm_read_log_efi(struct tpm_chip *chip) * Allocate memory for the 'combined log' where we will append the * 'final events log' to. */ - tmp = krealloc(log->bios_event_log, - log_size + final_events_log_size, - GFP_KERNEL); + tmp = devm_krealloc(&chip->dev, log->bios_event_log, + log_size + final_events_log_size, + GFP_KERNEL); if (!tmp) { - kfree(log->bios_event_log); + devm_kfree(&chip->dev, log->bios_event_log); ret = -ENOMEM; goto out; } diff --git a/drivers/char/tpm/eventlog/of.c b/drivers/char/tpm/eventlog/of.c index a9ce66d09a75..741ab2204b11 100644 --- a/drivers/char/tpm/eventlog/of.c +++ b/drivers/char/tpm/eventlog/of.c @@ -10,6 +10,7 @@ * Read the event log created by the firmware on PPC64 */ +#include #include #include #include @@ -65,7 +66,7 @@ int tpm_read_log_of(struct tpm_chip *chip) return -EIO; } - log->bios_event_log = kmemdup(__va(base), size, GFP_KERNEL); + log->bios_event_log = devm_kmemdup(&chip->dev, __va(base), size, GFP_KERNEL); if (!log->bios_event_log) return -ENOMEM; diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index ed600473ad7e..1e4f1a5049a5 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -267,7 +267,6 @@ static void tpm_dev_release(struct device *dev) idr_remove(&dev_nums_idr, chip->dev_num); mutex_unlock(&idr_lock); - kfree(chip->log.bios_event_log); kfree(chip->work_space.context_buf); kfree(chip->work_space.session_buf); kfree(chip->allocated_banks); -- Gitee From 2a29ff839e3b22317797bdbb23b5952db7ef8913 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 27 Dec 2024 17:39:09 +0200 Subject: [PATCH 111/116] tpm: Change to kvalloc() in eventlog/acpi.c mainline inclusion from mainline-v6.14 commit a3a860bc0fd6c07332e4911cf9a238d20de90173 category: bugfix issue: #IBZA9X CVE: CVE-2024-58005 Signed-off-by: zyf1116 --------------------------------------- The following failure was reported on HPE ProLiant D320: [ 10.693310][ T1] tpm_tis STM0925:00: 2.0 TPM (device-id 0x3, rev-id 0) [ 10.848132][ T1] ------------[ cut here ]------------ [ 10.853559][ T1] WARNING: CPU: 59 PID: 1 at mm/page_alloc.c:4727 __alloc_pages_noprof+0x2ca/0x330 [ 10.862827][ T1] Modules linked in: [ 10.866671][ T1] CPU: 59 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-lp155.2.g52785e2-default #1 openSUSE Tumbleweed (unreleased) 588cd98293a7c9eba9013378d807364c088c9375 [ 10.882741][ T1] Hardware name: HPE ProLiant DL320 Gen12/ProLiant DL320 Gen12, BIOS 1.20 10/28/2024 [ 10.892170][ T1] RIP: 0010:__alloc_pages_noprof+0x2ca/0x330 [ 10.898103][ T1] Code: 24 08 e9 4a fe ff ff e8 34 36 fa ff e9 88 fe ff ff 83 fe 0a 0f 86 b3 fd ff ff 80 3d 01 e7 ce 01 00 75 09 c6 05 f8 e6 ce 01 01 <0f> 0b 45 31 ff e9 e5 fe ff ff f7 c2 00 00 08 00 75 42 89 d9 80 e1 [ 10.917750][ T1] RSP: 0000:ffffb7cf40077980 EFLAGS: 00010246 [ 10.923777][ T1] RAX: 0000000000000000 RBX: 0000000000040cc0 RCX: 0000000000000000 [ 10.931727][ T1] RDX: 0000000000000000 RSI: 000000000000000c RDI: 0000000000040cc0 The above transcript shows that ACPI pointed a 16 MiB buffer for the log events because RSI maps to the 'order' parameter of __alloc_pages_noprof(). Address the bug by moving from devm_kmalloc() to devm_add_action() and kvmalloc() and devm_add_action(). Suggested-by: Ard Biesheuvel Cc: stable@vger.kernel.org # v2.6.16+ Fixes: 55a82ab3181b ("[PATCH] tpm: add bios measurement log") Reported-by: Andy Liang Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219495 Reviewed-by: Ard Biesheuvel Reviewed-by: Stefan Berger Reviewed-by: Takashi Iwai Tested-by: Andy Liang Signed-off-by: Jarkko Sakkinen Signed-off-by: zyf1116 --- drivers/char/tpm/eventlog/acpi.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/eventlog/acpi.c b/drivers/char/tpm/eventlog/acpi.c index bd757d836c5c..1a5644051d31 100644 --- a/drivers/char/tpm/eventlog/acpi.c +++ b/drivers/char/tpm/eventlog/acpi.c @@ -63,6 +63,11 @@ static bool tpm_is_tpm2_log(void *bios_event_log, u64 len) return n == 0; } +static void tpm_bios_log_free(void *data) +{ + kvfree(data); +} + /* read binary bios log */ int tpm_read_log_acpi(struct tpm_chip *chip) { @@ -136,7 +141,7 @@ int tpm_read_log_acpi(struct tpm_chip *chip) } /* malloc EventLog space */ - log->bios_event_log = devm_kmalloc(&chip->dev, len, GFP_KERNEL); + log->bios_event_log = kvmalloc(len, GFP_KERNEL); if (!log->bios_event_log) return -ENOMEM; @@ -162,10 +167,16 @@ int tpm_read_log_acpi(struct tpm_chip *chip) goto err; } + ret = devm_add_action(&chip->dev, tpm_bios_log_free, log->bios_event_log); + if (ret) { + log->bios_event_log = NULL; + goto err; + } + return format; err: - devm_kfree(&chip->dev, log->bios_event_log); + tpm_bios_log_free(log->bios_event_log); log->bios_event_log = NULL; return ret; } -- Gitee From 0ccc4c79a4470d2a0205e6e76e2a35c5815f3858 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 1 Mar 2025 15:06:24 +0300 Subject: [PATCH 112/116] proc: fix UAF in proc_get_inode() mainline inclusion from mainline-v6.14-rc6 commit 654b33ada4ab5e926cd9c570196fefa7bec7c1df category: bugfix issue: NA CVE: CVE-2025-21999 Signed-off-by: zyf1116 --------------------------------------- Fix race between rmmod and /proc/XXX's inode instantiation. The bug is that pde->proc_ops don't belong to /proc, it belongs to a module, therefore dereferencing it after /proc entry has been registered is a bug unless use_pde/unuse_pde() pair has been used. use_pde/unuse_pde can be avoided (2 atomic ops!) because pde->proc_ops never changes so information necessary for inode instantiation can be saved _before_ proc_register() in PDE itself and used later, avoiding pde->proc_ops->... dereference. rmmod lookup sys_delete_module proc_lookup_de pde_get(de); proc_get_inode(dir->i_sb, de); mod->exit() proc_remove remove_proc_subtree proc_entry_rundown(de); free_module(mod); if (S_ISREG(inode->i_mode)) if (de->proc_ops->proc_read_iter) --> As module is already freed, will trigger UAF BUG: unable to handle page fault for address: fffffbfff80a702b PGD 817fc4067 P4D 817fc4067 PUD 817fc0067 PMD 102ef4067 PTE 0 Oops: Oops: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 26 UID: 0 PID: 2667 Comm: ls Tainted: G Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:proc_get_inode+0x302/0x6e0 RSP: 0018:ffff88811c837998 EFLAGS: 00010a06 RAX: dffffc0000000000 RBX: ffffffffc0538140 RCX: 0000000000000007 RDX: 1ffffffff80a702b RSI: 0000000000000001 RDI: ffffffffc0538158 RBP: ffff8881299a6000 R08: 0000000067bbe1e5 R09: 1ffff11023906f20 R10: ffffffffb560ca07 R11: ffffffffb2b43a58 R12: ffff888105bb78f0 R13: ffff888100518048 R14: ffff8881299a6004 R15: 0000000000000001 FS: 00007f95b9686840(0000) GS:ffff8883af100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffbfff80a702b CR3: 0000000117dd2000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: proc_lookup_de+0x11f/0x2e0 __lookup_slow+0x188/0x350 walk_component+0x2ab/0x4f0 path_lookupat+0x120/0x660 filename_lookup+0x1ce/0x560 vfs_statx+0xac/0x150 __do_sys_newstat+0x96/0x110 do_syscall_64+0x5f/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e [adobriyan@gmail.com: don't do 2 atomic ops on the common path] Link: https://lkml.kernel.org/r/3d25ded0-1739-447e-812b-e34da7990dcf@p183 Fixes: 778f3dd5a13c ("Fix procfs compat_ioctl regression") Signed-off-by: Ye Bin Signed-off-by: Alexey Dobriyan Cc: Al Viro Cc: David S. Miller Cc: Signed-off-by: Andrew Morton Signed-off-by: zyf1116 Conflicts: fs/proc/internal.h --- fs/proc/generic.c | 10 +++++++++- fs/proc/inode.c | 6 +++--- fs/proc/internal.h | 14 ++++++++++++++ include/linux/proc_fs.h | 7 +++++-- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 5898761698c2..7b6d9c77b425 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -563,10 +563,16 @@ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, return p; } -static inline void pde_set_flags(struct proc_dir_entry *pde) +static void pde_set_flags(struct proc_dir_entry *pde) { if (pde->proc_ops->proc_flags & PROC_ENTRY_PERMANENT) pde->flags |= PROC_ENTRY_PERMANENT; + if (pde->proc_ops->proc_read_iter) + pde->flags |= PROC_ENTRY_proc_read_iter; +#ifdef CONFIG_COMPAT + if (pde->proc_ops->proc_compat_ioctl) + pde->flags |= PROC_ENTRY_proc_compat_ioctl; +#endif } struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, @@ -630,6 +636,7 @@ struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, p->proc_ops = &proc_seq_ops; p->seq_ops = ops; p->state_size = state_size; + pde_set_flags(p); return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_seq_private); @@ -660,6 +667,7 @@ struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, return NULL; p->proc_ops = &proc_single_ops; p->single_show = show; + pde_set_flags(p); return proc_register(parent, p); } EXPORT_SYMBOL(proc_create_single_data); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index bde6b6f69852..ba35ffc426ea 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -684,13 +684,13 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) if (S_ISREG(inode->i_mode)) { inode->i_op = de->proc_iops; - if (de->proc_ops->proc_read_iter) + if (pde_has_proc_read_iter(de)) inode->i_fop = &proc_iter_file_ops; else inode->i_fop = &proc_reg_file_ops; #ifdef CONFIG_COMPAT - if (de->proc_ops->proc_compat_ioctl) { - if (de->proc_ops->proc_read_iter) + if (pde_has_proc_compat_ioctl(de)) { + if (pde_has_proc_read_iter(de)) inode->i_fop = &proc_iter_file_ops_compat; else inode->i_fop = &proc_reg_file_ops_compat; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 1c002424f5b5..356646cf78ff 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -79,6 +79,20 @@ static inline bool pde_is_permanent(const struct proc_dir_entry *pde) return pde->flags & PROC_ENTRY_PERMANENT; } +static inline bool pde_has_proc_read_iter(const struct proc_dir_entry *pde) +{ + return pde->flags & PROC_ENTRY_proc_read_iter; +} + +static inline bool pde_has_proc_compat_ioctl(const struct proc_dir_entry *pde) +{ +#ifdef CONFIG_COMPAT + return pde->flags & PROC_ENTRY_proc_compat_ioctl; +#else + return false; +#endif +} + extern struct kmem_cache *proc_dir_entry_cache; void pde_free(struct proc_dir_entry *pde); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 8c892730a1f1..9f8e0072b30f 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -20,10 +20,13 @@ enum { * If in doubt, ignore this flag. */ #ifdef MODULE - PROC_ENTRY_PERMANENT = 0U, + PROC_ENTRY_PERMANENT = 0U, #else - PROC_ENTRY_PERMANENT = 1U << 0, + PROC_ENTRY_PERMANENT = 1U << 0, #endif + + PROC_ENTRY_proc_read_iter = 1U << 1, + PROC_ENTRY_proc_compat_ioctl = 1U << 2, }; struct proc_ops { -- Gitee From 88372128833d028120ec2ed7f369b907becf59ce Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 1 Feb 2024 10:47:51 +0100 Subject: [PATCH 113/116] net: atlantic: Fix DMA mapping for PTP hwts ring mainline inclusion from mainline-v6.8-rc4 commit 2e7d3b67630dfd8f178c41fa2217aa00e79a5887 category: bugfix issue: NA CVE: CVE-2024-26680 Signed-off-by: zyf1116 --------------------------------------- Function aq_ring_hwts_rx_alloc() maps extra AQ_CFG_RXDS_DEF bytes for PTP HWTS ring but then generic aq_ring_free() does not take this into account. Create and use a specific function to free HWTS ring to fix this issue. Trace: [ 215.351607] ------------[ cut here ]------------ [ 215.351612] DMA-API: atlantic 0000:4b:00.0: device driver frees DMA memory with different size [device address=0x00000000fbdd0000] [map size=34816 bytes] [unmap size=32768 bytes] [ 215.351635] WARNING: CPU: 33 PID: 10759 at kernel/dma/debug.c:988 check_unmap+0xa6f/0x2360 ... [ 215.581176] Call Trace: [ 215.583632] [ 215.585745] ? show_trace_log_lvl+0x1c4/0x2df [ 215.590114] ? show_trace_log_lvl+0x1c4/0x2df [ 215.594497] ? debug_dma_free_coherent+0x196/0x210 [ 215.599305] ? check_unmap+0xa6f/0x2360 [ 215.603147] ? __warn+0xca/0x1d0 [ 215.606391] ? check_unmap+0xa6f/0x2360 [ 215.610237] ? report_bug+0x1ef/0x370 [ 215.613921] ? handle_bug+0x3c/0x70 [ 215.617423] ? exc_invalid_op+0x14/0x50 [ 215.621269] ? asm_exc_invalid_op+0x16/0x20 [ 215.625480] ? check_unmap+0xa6f/0x2360 [ 215.629331] ? mark_lock.part.0+0xca/0xa40 [ 215.633445] debug_dma_free_coherent+0x196/0x210 [ 215.638079] ? __pfx_debug_dma_free_coherent+0x10/0x10 [ 215.643242] ? slab_free_freelist_hook+0x11d/0x1d0 [ 215.648060] dma_free_attrs+0x6d/0x130 [ 215.651834] aq_ring_free+0x193/0x290 [atlantic] [ 215.656487] aq_ptp_ring_free+0x67/0x110 [atlantic] ... [ 216.127540] ---[ end trace 6467e5964dd2640b ]--- [ 216.132160] DMA-API: Mapped at: [ 216.132162] debug_dma_alloc_coherent+0x66/0x2f0 [ 216.132165] dma_alloc_attrs+0xf5/0x1b0 [ 216.132168] aq_ring_hwts_rx_alloc+0x150/0x1f0 [atlantic] [ 216.132193] aq_ptp_ring_alloc+0x1bb/0x540 [atlantic] [ 216.132213] aq_nic_init+0x4a1/0x760 [atlantic] Fixes: 94ad94558b0f ("net: aquantia: add PTP rings infrastructure") Signed-off-by: Ivan Vecera Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20240201094752.883026-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: zyf1116 Conflicts: drivers/net/ethernet/aquantia/atlantic/aq_ring.h Signed-off-by: zyf1116 --- drivers/net/ethernet/aquantia/atlantic/aq_ptp.c | 4 ++-- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 13 +++++++++++++ drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c index 06de19f63287..e54357f02205 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c @@ -1001,7 +1001,7 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) return 0; err_exit_hwts_rx: - aq_ring_free(&aq_ptp->hwts_rx); + aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); err_exit_ptp_rx: aq_ring_free(&aq_ptp->ptp_rx); err_exit_ptp_tx: @@ -1019,7 +1019,7 @@ void aq_ptp_ring_free(struct aq_nic_s *aq_nic) aq_ring_free(&aq_ptp->ptp_tx); aq_ring_free(&aq_ptp->ptp_rx); - aq_ring_free(&aq_ptp->hwts_rx); + aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); aq_ptp_skb_ring_release(&aq_ptp->skb_ring); } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 98e8997f8036..3a83241aebb9 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -587,6 +587,19 @@ void aq_ring_free(struct aq_ring_s *self) } } +void aq_ring_hwts_rx_free(struct aq_ring_s *self) +{ + if (!self) + return; + + if (self->dx_ring) { + dma_free_coherent(aq_nic_get_dev(self->aq_nic), + self->size * self->dx_size + AQ_CFG_RXDS_DEF, + self->dx_ring, self->dx_ring_pa); + self->dx_ring = NULL; + } +} + unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) { unsigned int count; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 93659e58f1ce..9844d5998b7d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -191,6 +191,7 @@ int aq_ring_rx_fill(struct aq_ring_s *self); struct aq_ring_s *aq_ring_hwts_rx_alloc(struct aq_ring_s *self, struct aq_nic_s *aq_nic, unsigned int idx, unsigned int size, unsigned int dx_size); +void aq_ring_hwts_rx_free(struct aq_ring_s *self); void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic); unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data); -- Gitee From 987957fc7e667b85f513806b4f3e9d94f6964627 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 28 Jun 2021 19:36:50 -0700 Subject: [PATCH 114/116] swap: fix do_swap_page() race with swapoff stable inclusion from v5.10.50 commit a5dcdfe4cb4a5311568202289595ed6436212ca9 category: bugfix issue: NA CVE: CVE-2024-26759 Signed-off-by: zyf1116 --------------------------------------- [ Upstream commit 2799e77529c2a25492a4395db93996e3dacd762d ] When I was investigating the swap code, I found the below possible race window: CPU 1 CPU 2 ----- ----- do_swap_page if (data_race(si->flags & SWP_SYNCHRONOUS_IO) swap_readpage if (data_race(sis->flags & SWP_FS_OPS)) { swapoff .. p->swap_file = NULL; .. struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping;[oops!] Note that for the pages that are swapped in through swap cache, this isn't an issue. Because the page is locked, and the swap entry will be marked with SWAP_HAS_CACHE, so swapoff() can not proceed until the page has been unlocked. Fix this race by using get/put_swap_device() to guard against concurrent swapoff. Link: https://lkml.kernel.org/r/20210426123316.806267-3-linmiaohe@huawei.com Fixes: 0bcac06f27d7 ("mm,swap: skip swapcache for swapin of synchronous device") Signed-off-by: Miaohe Lin Reviewed-by: "Huang, Ying" Cc: Alex Shi Cc: David Hildenbrand Cc: Dennis Zhou Cc: Hugh Dickins Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Michal Hocko Cc: Minchan Kim Cc: Tim Chen Cc: Wei Yang Cc: Yang Shi Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: zyf1116 --- include/linux/swap.h | 9 +++++++++ mm/memory.c | 11 +++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 517ab5adb973..aedb5b5b820c 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -521,6 +521,15 @@ static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) return NULL; } +static inline struct swap_info_struct *get_swap_device(swp_entry_t entry) +{ + return NULL; +} + +static inline void put_swap_device(struct swap_info_struct *si) +{ +} + #define swap_address_space(entry) (NULL) #define get_nr_swap_pages() 0L #define total_swap_pages 0L diff --git a/mm/memory.c b/mm/memory.c index ecfeda3db9ff..a60677a3eef2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3359,6 +3359,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct page *page = NULL, *swapcache; + struct swap_info_struct *si = NULL; swp_entry_t entry; pte_t pte; int locked; @@ -3400,14 +3401,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) goto out; } + /* Prevent swapoff from happening to us. */ + si = get_swap_device(entry); + if (unlikely(!si)) + goto out; delayacct_set_flag(DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry, vma, vmf->address); swapcache = page; if (!page) { - struct swap_info_struct *si = swp_swap_info(entry); - if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { /* skip swapcache */ @@ -3583,6 +3586,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); out: + if (si) + put_swap_device(si); return ret; out_nomap: pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -3594,6 +3599,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock_page(swapcache); put_page(swapcache); } + if (si) + put_swap_device(si); return ret; } -- Gitee From b028337bee58419f82328604dcefe0231ce7feb2 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 7 Feb 2024 02:25:59 +0800 Subject: [PATCH 115/116] mm/swap: fix race when skipping swapcache mainline inclusion from mainline-v6.8-rc6 commit 13ddaf26be324a7f951891ecd9ccd04466d27458 category: bugfix issue: #IC2PU5 CVE: CVE-2024-26759 Signed-off-by: zyf1116 --------------------------------------- When skipping swapcache for SWP_SYNCHRONOUS_IO, if two or more threads swapin the same entry at the same time, they get different pages (A, B). Before one thread (T0) finishes the swapin and installs page (A) to the PTE, another thread (T1) could finish swapin of page (B), swap_free the entry, then swap out the possibly modified page reusing the same entry. It breaks the pte_same check in (T0) because PTE value is unchanged, causing ABA problem. Thread (T0) will install a stalled page (A) into the PTE and cause data corruption. One possible callstack is like this: CPU0 CPU1 ---- ---- do_swap_page() do_swap_page() with same entry swap_read_folio() <- read to page A swap_read_folio() <- read to page B ... set_pte_at() swap_free() <- entry is free pte_same() <- Check pass, PTE seems unchanged, but page A is stalled! swap_free() <- page B content lost! set_pte_at() <- staled page A installed! And besides, for ZRAM, swap_free() allows the swap device to discard the entry content, so even if page (B) is not modified, if swap_read_folio() on CPU0 happens later than swap_free() on CPU1, it may also cause data loss. To fix this, reuse swapcache_prepare which will pin the swap entry using the cache flag, and allow only one thread to swap it in, also prevent any parallel code from putting the entry in the cache. Release the pin after PT unlocked. Racers just loop and wait since it's a rare and very short event. A schedule_timeout_uninterruptible(1) call is added to avoid repeated page faults wasting too much CPU, causing livelock or adding too much noise to perf statistics. A similar livelock issue was described in commit 029c4628b2eb ("mm: swap: get rid of livelock in swapin readahead") Reproducer: This race issue can be triggered easily using a well constructed reproducer and patched brd (with a delay in read path) [1]: With latest 6.8 mainline, race caused data loss can be observed easily: $ gcc -g -lpthread test-thread-swap-race.c && ./a.out Polulating 32MB of memory region... Keep swapping out... Starting round 0... Spawning 65536 workers... 32746 workers spawned, wait for done... Round 0: Error on 0x5aa00, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x395200, expected 32746, got 32743, 3 data loss! Round 0: Error on 0x3fd000, expected 32746, got 32737, 9 data loss! Round 0 Failed, 15 data loss! This reproducer spawns multiple threads sharing the same memory region using a small swap device. Every two threads updates mapped pages one by one in opposite direction trying to create a race, with one dedicated thread keep swapping out the data out using madvise. The reproducer created a reproduce rate of about once every 5 minutes, so the race should be totally possible in production. After this patch, I ran the reproducer for over a few hundred rounds and no data loss observed. Performance overhead is minimal, microbenchmark swapin 10G from 32G zram: Before: 10934698 us After: 11157121 us Cached: 13155355 us (Dropping SWP_SYNCHRONOUS_IO flag) [kasong@tencent.com: v4] Link: https://lkml.kernel.org/r/20240219082040.7495-1-ryncsn@gmail.com Link: https://lkml.kernel.org/r/20240206182559.32264-1-ryncsn@gmail.com Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device") Reported-by: "Huang, Ying" Closes: https://lore.kernel.org/lkml/87bk92gqpx.fsf_-_@yhuang6-desk2.ccr.corp.intel.com/ Link: https://github.com/ryncsn/emm-test-project/tree/master/swap-stress-race [1] Signed-off-by: Kairui Song Reviewed-by: "Huang, Ying" Acked-by: Yu Zhao Acked-by: David Hildenbrand Acked-by: Chris Li Cc: Hugh Dickins Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Yosry Ahmed Cc: Yu Zhao Cc: Barry Song <21cnbao@gmail.com> Cc: SeongJae Park Cc: Signed-off-by: Andrew Morton Signed-off-by: zyf1116 Conflicts: include/linux/swap.h mm/memory.c --- include/linux/swap.h | 5 +++++ mm/memory.c | 20 ++++++++++++++++++++ mm/swapfile.c | 13 +++++++++++++ 3 files changed, 38 insertions(+) diff --git a/include/linux/swap.h b/include/linux/swap.h index aedb5b5b820c..1d43f93243e4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -436,6 +436,7 @@ extern void __delete_from_swap_cache(struct page *page, extern void delete_from_swap_cache(struct page *); extern void clear_shadow_from_swap_cache(int type, unsigned long begin, unsigned long end); +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry); extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t entry, @@ -591,6 +592,10 @@ static inline int swap_writepage(struct page *p, struct writeback_control *wbc) return 0; } +static inline void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ +} + static inline struct page *lookup_swap_cache(swp_entry_t swp, struct vm_area_struct *vma, unsigned long addr) diff --git a/mm/memory.c b/mm/memory.c index a60677a3eef2..cdd07179695e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3360,6 +3360,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct page *page = NULL, *swapcache; struct swap_info_struct *si = NULL; + bool need_clear_cache = false; swp_entry_t entry; pte_t pte; int locked; @@ -3413,6 +3414,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!page) { if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { + /* + * Prevent parallel swapin from proceeding with + * the cache flag. Otherwise, another thread may + * finish swapin first, free the entry, and swapout + * reusing the same entry. It's undetectable as + * pte_same() returns true due to entry reuse. + */ + if (swapcache_prepare(entry)) { + /* Relax a bit to prevent rapid repeated page faults */ + schedule_timeout_uninterruptible(1); + goto out; + } + need_clear_cache = true; + /* skip swapcache */ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); @@ -3586,6 +3601,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); out: + /* Clear the swap cache pin for direct swapin after PTL unlock */ + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; @@ -3599,6 +3617,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) unlock_page(swapcache); put_page(swapcache); } + if (need_clear_cache) + swapcache_clear(si, entry); if (si) put_swap_device(si); return ret; diff --git a/mm/swapfile.c b/mm/swapfile.c index 0a067a083116..0202f281d551 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3590,6 +3590,19 @@ int swapcache_prepare(swp_entry_t entry) return __swap_duplicate(entry, SWAP_HAS_CACHE); } +void swapcache_clear(struct swap_info_struct *si, swp_entry_t entry) +{ + struct swap_cluster_info *ci; + unsigned long offset = swp_offset(entry); + unsigned char usage; + + ci = lock_cluster_or_swap_info(si, offset); + usage = __swap_entry_free_locked(si, offset, SWAP_HAS_CACHE); + unlock_cluster_or_swap_info(si, ci); + if (!usage) + free_swap_slot(entry); +} + struct swap_info_struct *swp_swap_info(swp_entry_t entry) { return swap_type_to_swap_info(swp_type(entry)); -- Gitee From 1836088707b3662f3891a510f63fbe73722b1ce5 Mon Sep 17 00:00:00 2001 From: Li Nan Date: Thu, 24 Apr 2025 21:36:04 +0800 Subject: [PATCH 116/116] test mr --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index f72d53957033..bd5eee531a3f 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -151,7 +151,7 @@ static __refdata struct memblock_type *memblock_memory = &memblock.memory; pr_info(fmt, ##__VA_ARGS__); \ } while (0) -static int memblock_debug __initdata_memblock; +static int memblock_debug __initdata_memblock = 1; static bool system_has_some_mirror __initdata_memblock = false; static int memblock_can_resize __initdata_memblock; static int memblock_memory_in_slab __initdata_memblock = 0; -- Gitee