From d8a03e79a20e2ee9691a9c157fafd3bddf098c60 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Mon, 1 Aug 2022 18:25:11 +0000 Subject: [PATCH 001/243] binder: fix UAF of ref->proc caused by race condition stable inclusion from stable-5.10.142 commit 9629f2dfdb1dad294b468038ff8e161e94d0b609 category: bugfix issue: I5VMGP CVE: CVE-2022-20421 Signed-off-by: gaochao --------------------------------------- commit a0e44c64b6061dda7e00b7c458e4523e2331b739 upstream. A transaction of type BINDER_TYPE_WEAK_HANDLE can fail to increment the reference for a node. In this case, the target proc normally releases the failed reference upon close as expected. However, if the target is dying in parallel the call will race with binder_deferred_release(), so the target could have released all of its references by now leaving the cleanup of the new failed reference unhandled. The transaction then ends and the target proc gets released making the ref->proc now a dangling pointer. Later on, ref->node is closed and we attempt to take spin_lock(&ref->proc->inner_lock), which leads to the use-after-free bug reported below. Let's fix this by cleaning up the failed reference on the spot instead of relying on the target to do so. ================================================================== BUG: KASAN: use-after-free in _raw_spin_lock+0xa8/0x150 Write of size 4 at addr ffff5ca207094238 by task kworker/1:0/590 CPU: 1 PID: 590 Comm: kworker/1:0 Not tainted 5.19.0-rc8 #10 Hardware name: linux,dummy-virt (DT) Workqueue: events binder_deferred_func Call trace: dump_backtrace.part.0+0x1d0/0x1e0 show_stack+0x18/0x70 dump_stack_lvl+0x68/0x84 print_report+0x2e4/0x61c kasan_report+0xa4/0x110 kasan_check_range+0xfc/0x1a4 __kasan_check_write+0x3c/0x50 _raw_spin_lock+0xa8/0x150 binder_deferred_func+0x5e0/0x9b0 process_one_work+0x38c/0x5f0 worker_thread+0x9c/0x694 kthread+0x188/0x190 ret_from_fork+0x10/0x20 Acked-by: Christian Brauner (Microsoft) Signed-off-by: Carlos Llamas Cc: stable # 4.14+ Link: https://lore.kernel.org/r/20220801182511.3371447-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 0f6af5919e04..60dba1f2eb16 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1784,6 +1784,18 @@ static int binder_inc_ref_for_node(struct binder_proc *proc, } ret = binder_inc_ref_olocked(ref, strong, target_list); *rdata = ref->data; + if (ret && ref == new_ref) { + /* + * Cleanup the failed reference here as the target + * could now be dead and have already released its + * references by now. Calling on the new reference + * with strong=0 and a tmp_refs will not decrement + * the node. The new_ref gets kfree'd below. + */ + binder_cleanup_ref_olocked(new_ref); + ref = NULL; + } + binder_proc_unlock(proc); if (new_ref && ref != new_ref) /* -- Gitee From 9e174687d415a23492183046859322e4cdfa0aa7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 28 Sep 2022 21:56:15 +0200 Subject: [PATCH 002/243] wifi: cfg80211: fix u8 overflow in cfg80211_update_notlisted_nontrans() stable inclusion from stable-5.10.148 commit a6408e0b694c1bdd8ae7dd0464a86b98518145ec category: bugfix issue: I5WKEU CVE: CVE-2022-41674 Signed-off-by: gaochao --------------------------------------- commit aebe9f4639b13a1f4e9a6b42cdd2e38c617b442d upstream. In the copy code of the elements, we do the following calculation to reach the end of the MBSSID element: /* copy the IEs after MBSSID */ cpy_len = mbssid[1] + 2; This looks fine, however, cpy_len is a u8, the same as mbssid[1], so the addition of two can overflow. In this case the subsequent memcpy() will overflow the allocated buffer, since it copies 256 bytes too much due to the way the allocation and memcpy() sizes are calculated. Fix this by using size_t for the cpy_len variable. This fixes CVE-2022-41674. Reported-by: Soenke Huster Tested-by: Soenke Huster Fixes: 0b8fb8235be8 ("cfg80211: Parsing of Multiple BSSID information in scanning") Reviewed-by: Kees Cook Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index fd614a5a00b4..bde2e647bc3a 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2219,7 +2219,7 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy, size_t new_ie_len; struct cfg80211_bss_ies *new_ies; const struct cfg80211_bss_ies *old; - u8 cpy_len; + size_t cpy_len; lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock); -- Gitee From 072143ee6f68bd4f5b10bf833b4793196ee00d70 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 14 Oct 2022 18:41:50 +0200 Subject: [PATCH 003/243] wifi: mac80211: fix MBSSID parsing use-after-free stable inclusion from stable-5.10.149 commit 31ce5da48a845bac48930bbde1d45e7449591728 category: bugfix issue: I5WJSU CVE: CVE-2022-42719 Signed-off-by: gaochao --------------------------------------- Commit ff05d4b45dd89b922578dac497dcabf57cf771c6 upstream. This is a different version of the commit, changed to store the non-transmitted profile in the elems, and freeing it in the few places where it's relevant, since that is only the case when the last argument for parsing (the non-tx BSSID) is non-NULL. When we parse a multi-BSSID element, we might point some element pointers into the allocated nontransmitted_profile. However, we free this before returning, causing UAF when the relevant pointers in the parsed elements are accessed. Fix this by not allocating the scratch buffer separately but as part of the returned structure instead, that way, there are no lifetime issues with it. The scratch buffer introduction as part of the returned data here is taken from MLO feature work done by Ilan. This fixes CVE-2022-42719. Fixes: 5023b14cf4df ("mac80211: support profile split between elements") Co-developed-by: Ilan Peer Signed-off-by: Ilan Peer Reviewed-by: Kees Cook Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mlme.c | 6 +++++- net/mac80211/scan.c | 2 ++ net/mac80211/util.c | 7 ++++++- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7f2be08b72a5..75deca7014d4 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1548,6 +1548,8 @@ struct ieee802_11_elems { u8 country_elem_len; u8 bssid_index_len; + void *nontx_profile; + /* whether a parse error occurred while retrieving these elements */ bool parse_error; }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 778bf262418b..504ae5466759 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3385,6 +3385,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "AP bug: VHT operation missing from AssocResp\n"); } + kfree(bss_elems.nontx_profile); } /* @@ -4030,6 +4031,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ifmgd->assoc_data->timeout = jiffies; ifmgd->assoc_data->timeout_started = true; run_again(sdata, ifmgd->assoc_data->timeout); + kfree(elems.nontx_profile); return; } @@ -4207,7 +4209,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, deauth_buf, sizeof(deauth_buf), true, WLAN_REASON_DEAUTH_LEAVING); - return; + goto free; } if (sta && elems.opmode_notif) @@ -4222,6 +4224,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, elems.cisco_dtpc_elem); ieee80211_bss_info_change_notify(sdata, changed); +free: + kfree(elems.nontx_profile); } void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 6b50cb5e0e3c..ad088324a6d3 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -227,6 +227,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local, rx_status, beacon); } + kfree(elems.nontx_profile); + return bss; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index a1f129292ad8..c2eae5270ee5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1485,6 +1485,11 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, nontransmitted_profile, nontransmitted_profile_len); + if (!nontransmitted_profile_len) { + nontransmitted_profile_len = 0; + kfree(nontransmitted_profile); + nontransmitted_profile = NULL; + } } crc = _ieee802_11_parse_elems_crc(start, len, action, elems, filter, @@ -1514,7 +1519,7 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, offsetofend(struct ieee80211_bssid_index, dtim_count)) elems->dtim_count = elems->bssid_index->dtim_count; - kfree(nontransmitted_profile); + elems->nontx_profile = nontransmitted_profile; return crc; } -- Gitee From 85b64629e765731fe493fddf84e3d79e5de7a74d Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Mon, 11 Apr 2022 14:37:51 +0530 Subject: [PATCH 004/243] cfg80211: hold bss_lock while updating nontrans_list stable inclusion from stable-5.10.112 commit 5513f9a0b0684383c1bc120ea37b35ae944a709a category: bugfix issue: I5WJUA CVE: NA Signed-off-by: gaochao --------------------------------------- [ Upstream commit a5199b5626cd6913cf8776a835bc63d40e0686ad ] Synchronize additions to nontrans_list of transmitting BSS with bss_lock to avoid races. Also when cfg80211_add_nontrans_list() fails __cfg80211_unlink_bss() needs bss_lock to be held (has lockdep assert on bss_lock). So protect the whole block with bss_lock to avoid races and warnings. Found during code review. Fixes: 0b8fb8235be8 ("cfg80211: Parsing of Multiple BSSID information in scanning") Signed-off-by: Rameshkumar Sundaram Link: https://lore.kernel.org/r/1649668071-9370-1-git-send-email-quic_ramess@quicinc.com Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/scan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index bde2e647bc3a..26f81a962602 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1961,11 +1961,13 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, /* this is a nontransmitting bss, we need to add it to * transmitting bss' list if it is not there */ + spin_lock_bh(&rdev->bss_lock); if (cfg80211_add_nontrans_list(non_tx_data->tx_bss, &res->pub)) { if (__cfg80211_unlink_bss(rdev, res)) rdev->bss_generation++; } + spin_unlock_bh(&rdev->bss_lock); } trace_cfg80211_return_bss(&res->pub); -- Gitee From 27ef0fd32a5854f031bdb744e703af6eb4c1d27b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 30 Sep 2022 23:44:23 +0200 Subject: [PATCH 005/243] wifi: cfg80211: fix BSS refcounting bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.148 commit 6b944845031356f3e0c0f6695f9252a8ddc8b02f category: bugfix issue: I5WJUA CVE: CVE-2022-42720 Signed-off-by: gaochao --------------------------------------- commit 0b7808818cb9df6680f98996b8e9a439fa7bcc2f upstream. There are multiple refcounting bugs related to multi-BSSID: - In bss_ref_get(), if the BSS has a hidden_beacon_bss, then the bss pointer is overwritten before checking for the transmitted BSS, which is clearly wrong. Fix this by using the bss_from_pub() macro. - In cfg80211_bss_update() we copy the transmitted_bss pointer from tmp into new, but then if we release new, we'll unref it erroneously. We already set the pointer and ref it, but need to NULL it since it was copied from the tmp data. - In cfg80211_inform_single_bss_data(), if adding to the non- transmitted list fails, we unlink the BSS and yet still we return it, but this results in returning an entry without a reference. We shouldn't return it anyway if it was broken enough to not get added there. This fixes CVE-2022-42720. Reported-by: Sönke Huster Tested-by: Sönke Huster Fixes: a3584f56de1c ("cfg80211: Properly track transmitting and non-transmitting BSS") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/scan.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 26f81a962602..d3fbbe172339 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -143,18 +143,12 @@ static inline void bss_ref_get(struct cfg80211_registered_device *rdev, lockdep_assert_held(&rdev->bss_lock); bss->refcount++; - if (bss->pub.hidden_beacon_bss) { - bss = container_of(bss->pub.hidden_beacon_bss, - struct cfg80211_internal_bss, - pub); - bss->refcount++; - } - if (bss->pub.transmitted_bss) { - bss = container_of(bss->pub.transmitted_bss, - struct cfg80211_internal_bss, - pub); - bss->refcount++; - } + + if (bss->pub.hidden_beacon_bss) + bss_from_pub(bss->pub.hidden_beacon_bss)->refcount++; + + if (bss->pub.transmitted_bss) + bss_from_pub(bss->pub.transmitted_bss)->refcount++; } static inline void bss_ref_put(struct cfg80211_registered_device *rdev, @@ -1727,6 +1721,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, new->refcount = 1; INIT_LIST_HEAD(&new->hidden_list); INIT_LIST_HEAD(&new->pub.nontrans_list); + /* we'll set this later if it was non-NULL */ + new->pub.transmitted_bss = NULL; if (rcu_access_pointer(tmp->pub.proberesp_ies)) { hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN); @@ -1964,10 +1960,15 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, spin_lock_bh(&rdev->bss_lock); if (cfg80211_add_nontrans_list(non_tx_data->tx_bss, &res->pub)) { - if (__cfg80211_unlink_bss(rdev, res)) + if (__cfg80211_unlink_bss(rdev, res)) { rdev->bss_generation++; + res = NULL; + } } spin_unlock_bh(&rdev->bss_lock); + + if (!res) + return NULL; } trace_cfg80211_return_bss(&res->pub); -- Gitee From a133ec1a0e7368dd32598ab8d88ba30de27a9f08 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 1 Oct 2022 00:01:44 +0200 Subject: [PATCH 006/243] wifi: cfg80211: avoid nontransmitted BSS list corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.148 commit b0e5c5deb7880be5b8a459d584e13e1f9879d307 category: bugfix issue: I5WJVP CVE: CVE-2022-42721 Signed-off-by: gaochao --------------------------------------- commit bcca852027e5878aec911a347407ecc88d6fff7f upstream. If a non-transmitted BSS shares enough information (both SSID and BSSID!) with another non-transmitted BSS of a different AP, then we can find and update it, and then try to add it to the non-transmitted BSS list. We do a search for it on the transmitted BSS, but if it's not there (but belongs to another transmitted BSS), the list gets corrupted. Since this is an erroneous situation, simply fail the list insertion in this case and free the non-transmitted BSS. This fixes CVE-2022-42721. Reported-by: Sönke Huster Tested-by: Sönke Huster Fixes: 0b8fb8235be8 ("cfg80211: Parsing of Multiple BSSID information in scanning") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/scan.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d3fbbe172339..c86b10c20127 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -423,6 +423,15 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss, rcu_read_unlock(); + /* + * This is a bit weird - it's not on the list, but already on another + * one! The only way that could happen is if there's some BSSID/SSID + * shared by multiple APs in their multi-BSSID profiles, potentially + * with hidden SSID mixed in ... ignore it. + */ + if (!list_empty(&nontrans_bss->nontrans_list)) + return -EINVAL; + /* add to the list */ list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list); return 0; -- Gitee From 9a296c15d5f6772e51bcbe105b88e9b0781db4e8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Oct 2022 21:24:10 +0200 Subject: [PATCH 007/243] wifi: mac80211: fix crash in beacon protection for P2P-device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.148 commit 58c0306d0bcd5f541714bea8765d23111c9af68a category: bugfix issue: I5WJWD CVE: CVE-2022-42722 Signed-off-by: gaochao --------------------------------------- commit b2d03cabe2b2e150ff5a381731ea0355459be09f upstream. If beacon protection is active but the beacon cannot be decrypted or is otherwise malformed, we call the cfg80211 API to report this to userspace, but that uses a netdev pointer, which isn't present for P2P-Device. Fix this to call it only conditionally to ensure cfg80211 won't crash in the case of P2P-Device. This fixes CVE-2022-42722. Reported-by: Sönke Huster Fixes: 9eaf183af741 ("mac80211: Report beacon protection failures to user space") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d27c444a19ed..b6bcb34ec6bb 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1976,10 +1976,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (mmie_keyidx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS || mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + - NUM_DEFAULT_BEACON_KEYS) { - cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, - skb->data, - skb->len); + NUM_DEFAULT_BEACON_KEYS) { + if (rx->sdata->dev) + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, + skb->data, + skb->len); return RX_DROP_MONITOR; /* unexpected BIP keyidx */ } @@ -2127,7 +2128,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) /* either the frame has been decrypted or will be dropped */ status->flag |= RX_FLAG_DECRYPTED; - if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE)) + if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE && + rx->sdata->dev)) cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, skb->data, skb->len); -- Gitee From b0e1be6ed7135607e42b37e07ef60353c2b96655 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 1 Apr 2022 10:33:42 +0300 Subject: [PATCH 008/243] net: ipv4: fix route with nexthop object delete warning stable inclusion from stable-5.10.111 commit 63ea57478aaa3e06a597081a0f537318fc04e49f category: bugfix issue: I5VWNQ CVE: NO Signed-off-by: gaochao --------------------------------------- [ Upstream commit 6bf92d70e690b7ff12b24f4bfff5e5434d019b82 ] FRR folks have hit a kernel warning[1] while deleting routes[2] which is caused by trying to delete a route pointing to a nexthop id without specifying nhid but matching on an interface. That is, a route is found but we hit a warning while matching it. The warning is from fib_info_nh() in include/net/nexthop.h because we run it on a fib_info with nexthop object. The call chain is: inet_rtm_delroute -> fib_table_delete -> fib_nh_match (called with a nexthop fib_info and also with fc_oif set thus calling fib_info_nh on the fib_info and triggering the warning). The fix is to not do any matching in that branch if the fi has a nexthop object because those are managed separately. I.e. we should match when deleting without nh spec and should fail when deleting a nexthop route with old-style nh spec because nexthop objects are managed separately, e.g.: $ ip r show 1.2.3.4/32 1.2.3.4 nhid 12 via 192.168.11.2 dev dummy0 $ ip r del 1.2.3.4/32 $ ip r del 1.2.3.4/32 nhid 12 $ ip r del 1.2.3.4/32 dev dummy0 [1] [ 523.462226] ------------[ cut here ]------------ [ 523.462230] WARNING: CPU: 14 PID: 22893 at include/net/nexthop.h:468 fib_nh_match+0x210/0x460 [ 523.462236] Modules linked in: dummy rpcsec_gss_krb5 xt_socket nf_socket_ipv4 nf_socket_ipv6 ip6table_raw iptable_raw bpf_preload xt_statistic ip_set ip_vs_sh ip_vs_wrr ip_vs_rr ip_vs xt_mark nf_tables xt_nat veth nf_conntrack_netlink nfnetlink xt_addrtype br_netfilter overlay dm_crypt nfsv3 nfs fscache netfs vhost_net vhost vhost_iotlb tap tun xt_CHECKSUM xt_MASQUERADE xt_conntrack 8021q garp mrp ipt_REJECT nf_reject_ipv4 ip6table_mangle ip6table_nat iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bridge stp llc rfcomm snd_seq_dummy snd_hrtimer rpcrdma rdma_cm iw_cm ib_cm ib_core ip6table_filter xt_comment ip6_tables vboxnetadp(OE) vboxnetflt(OE) vboxdrv(OE) qrtr bnep binfmt_misc xfs vfat fat squashfs loop nvidia_drm(POE) nvidia_modeset(POE) nvidia_uvm(POE) nvidia(POE) intel_rapl_msr intel_rapl_common snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio snd_hda_codec_hdmi btusb btrtl iwlmvm uvcvideo btbcm snd_hda_intel edac_mce_amd [ 523.462274] videobuf2_vmalloc videobuf2_memops btintel snd_intel_dspcfg videobuf2_v4l2 snd_intel_sdw_acpi bluetooth snd_usb_audio snd_hda_codec mac80211 snd_usbmidi_lib joydev snd_hda_core videobuf2_common kvm_amd snd_rawmidi snd_hwdep snd_seq videodev ccp snd_seq_device libarc4 ecdh_generic mc snd_pcm kvm iwlwifi snd_timer drm_kms_helper snd cfg80211 cec soundcore irqbypass rapl wmi_bmof i2c_piix4 rfkill k10temp pcspkr acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc drm zram ip_tables crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel nvme sp5100_tco r8169 nvme_core wmi ipmi_devintf ipmi_msghandler fuse [ 523.462300] CPU: 14 PID: 22893 Comm: ip Tainted: P OE 5.16.18-200.fc35.x86_64 #1 [ 523.462302] Hardware name: Micro-Star International Co., Ltd. MS-7C37/MPG X570 GAMING EDGE WIFI (MS-7C37), BIOS 1.C0 10/29/2020 [ 523.462303] RIP: 0010:fib_nh_match+0x210/0x460 [ 523.462304] Code: 7c 24 20 48 8b b5 90 00 00 00 e8 bb ee f4 ff 48 8b 7c 24 20 41 89 c4 e8 ee eb f4 ff 45 85 e4 0f 85 2e fe ff ff e9 4c ff ff ff <0f> 0b e9 17 ff ff ff 3c 0a 0f 85 61 fe ff ff 48 8b b5 98 00 00 00 [ 523.462306] RSP: 0018:ffffaa53d4d87928 EFLAGS: 00010286 [ 523.462307] RAX: 0000000000000000 RBX: ffffaa53d4d87a90 RCX: ffffaa53d4d87bb0 [ 523.462308] RDX: ffff9e3d2ee6be80 RSI: ffffaa53d4d87a90 RDI: ffffffff920ed380 [ 523.462309] RBP: ffff9e3d2ee6be80 R08: 0000000000000064 R09: 0000000000000000 [ 523.462310] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000031 [ 523.462310] R13: 0000000000000020 R14: 0000000000000000 R15: ffff9e3d331054e0 [ 523.462311] FS: 00007f245517c1c0(0000) GS:ffff9e492ed80000(0000) knlGS:0000000000000000 [ 523.462313] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 523.462313] CR2: 000055e5dfdd8268 CR3: 00000003ef488000 CR4: 0000000000350ee0 [ 523.462315] Call Trace: [ 523.462316] [ 523.462320] fib_table_delete+0x1a9/0x310 [ 523.462323] inet_rtm_delroute+0x93/0x110 [ 523.462325] rtnetlink_rcv_msg+0x133/0x370 [ 523.462327] ? _copy_to_iter+0xb5/0x6f0 [ 523.462330] ? rtnl_calcit.isra.0+0x110/0x110 [ 523.462331] netlink_rcv_skb+0x50/0xf0 [ 523.462334] netlink_unicast+0x211/0x330 [ 523.462336] netlink_sendmsg+0x23f/0x480 [ 523.462338] sock_sendmsg+0x5e/0x60 [ 523.462340] ____sys_sendmsg+0x22c/0x270 [ 523.462341] ? import_iovec+0x17/0x20 [ 523.462343] ? sendmsg_copy_msghdr+0x59/0x90 [ 523.462344] ? __mod_lruvec_page_state+0x85/0x110 [ 523.462348] ___sys_sendmsg+0x81/0xc0 [ 523.462350] ? netlink_seq_start+0x70/0x70 [ 523.462352] ? __dentry_kill+0x13a/0x180 [ 523.462354] ? __fput+0xff/0x250 [ 523.462356] __sys_sendmsg+0x49/0x80 [ 523.462358] do_syscall_64+0x3b/0x90 [ 523.462361] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 523.462364] RIP: 0033:0x7f24552aa337 [ 523.462365] Code: 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 523.462366] RSP: 002b:00007fff7f05a838 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 523.462368] RAX: ffffffffffffffda RBX: 000000006245bf91 RCX: 00007f24552aa337 [ 523.462368] RDX: 0000000000000000 RSI: 00007fff7f05a8a0 RDI: 0000000000000003 [ 523.462369] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 [ 523.462370] R10: 0000000000000008 R11: 0000000000000246 R12: 0000000000000001 [ 523.462370] R13: 00007fff7f05ce08 R14: 0000000000000000 R15: 000055e5dfdd1040 [ 523.462373] [ 523.462374] ---[ end trace ba537bc16f6bf4ed ]--- [2] https://github.com/FRRouting/frr/issues/6412 Fixes: 4c7e8084fd46 ("ipv4: Plumb support for nexthop object in a fib_info") Signed-off-by: Nikolay Aleksandrov Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/fib_semantics.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 838a876c168c..c8c7b76c3b2e 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -888,8 +888,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, } if (cfg->fc_oif || cfg->fc_gw_family) { - struct fib_nh *nh = fib_info_nh(fi, 0); + struct fib_nh *nh; + + /* cannot match on nexthop object attributes */ + if (fi->nh) + return 1; + nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { if (fib_encap_match(net, cfg->fc_encap_type, cfg->fc_encap, nh, cfg, extack)) -- Gitee From d69fa6043a6ee28a6eee5b9ca107ffa97f2aaf1e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 6 Oct 2022 10:48:49 -0600 Subject: [PATCH 009/243] ipv4: Handle attempt to delete multipath route when fib_info contains an nh reference mainline inclusion from mainline-6.1-rc1 commit 61b91eb33a69c3be11b259c5ea484505cd79f883 category: bugfix issue: I5VWNQ CVE: CVE-2022-3435 Signed-off-by: gaochao --------------------------------------- Gwangun Jung reported a slab-out-of-bounds access in fib_nh_match: fib_nh_match+0xf98/0x1130 linux-6.0-rc7/net/ipv4/fib_semantics.c:961 fib_table_delete+0x5f3/0xa40 linux-6.0-rc7/net/ipv4/fib_trie.c:1753 inet_rtm_delroute+0x2b3/0x380 linux-6.0-rc7/net/ipv4/fib_frontend.c:874 Separate nexthop objects are mutually exclusive with the legacy multipath spec. Fix fib_nh_match to return if the config for the to be deleted route contains a multipath spec while the fib_info is using a nexthop object. Fixes: 493ced1ac47c ("ipv4: Allow routes to use nexthop objects") Fixes: 6bf92d70e690 ("net: ipv4: fix route with nexthop object delete warning") Reported-by: Gwangun Jung Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c8c7b76c3b2e..5e0902af9e20 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -887,13 +887,13 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, return 1; } + /* cannot match on nexthop object attributes */ + if (fi->nh) + return 1; + if (cfg->fc_oif || cfg->fc_gw_family) { struct fib_nh *nh; - /* cannot match on nexthop object attributes */ - if (fi->nh) - return 1; - nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { if (fib_encap_match(net, cfg->fc_encap_type, -- Gitee From 3c5c85e13114d19f826c374cdbf9d61a53db4242 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Oct 2022 13:34:12 +0000 Subject: [PATCH 010/243] kcm: avoid potential race in kcm_tx_work mainline inclusion from mainline-6.1-rc1 commit ec7eede369fe5b0d085ac51fdbb95184f87bfc6c category: bugfix issue: I5WZIT CVE: CVE-2022-3521 Signed-off-by: gaochao --------------------------------------- syzbot found that kcm_tx_work() could crash [1] in: /* Primarily for SOCK_SEQPACKET sockets */ if (likely(sk->sk_socket) && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { <<*>> clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_space(sk); } I think the reason is that another thread might concurrently run in kcm_release() and call sock_orphan(sk) while sk is not locked. kcm_tx_work() find sk->sk_socket being NULL. [1] BUG: KASAN: null-ptr-deref in instrument_atomic_write include/linux/instrumented.h:86 [inline] BUG: KASAN: null-ptr-deref in clear_bit include/asm-generic/bitops/instrumented-atomic.h:41 [inline] BUG: KASAN: null-ptr-deref in kcm_tx_work+0xff/0x160 net/kcm/kcmsock.c:742 Write of size 8 at addr 0000000000000008 by task kworker/u4:3/53 CPU: 0 PID: 53 Comm: kworker/u4:3 Not tainted 5.19.0-rc3-next-20220621-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: kkcmd kcm_tx_work Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 kasan_report+0xbe/0x1f0 mm/kasan/report.c:495 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189 instrument_atomic_write include/linux/instrumented.h:86 [inline] clear_bit include/asm-generic/bitops/instrumented-atomic.h:41 [inline] kcm_tx_work+0xff/0x160 net/kcm/kcmsock.c:742 process_one_work+0x996/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e9/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302 Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Tom Herbert Link: https://lore.kernel.org/r/20221012133412.519394-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/kcm/kcmsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 56dad9565bc9..977f2de89dce 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1838,10 +1838,10 @@ static int kcm_release(struct socket *sock) kcm = kcm_sk(sk); mux = kcm->mux; + lock_sock(sk); sock_orphan(sk); kfree_skb(kcm->seq_skb); - lock_sock(sk); /* Purge queue under lock to avoid race condition with tx_work trying * to act when queue is nonempty. If tx_work runs after this point * it will just return. -- Gitee From 08e6019d9732263734de21efcaf076ebcb2d1d70 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Oct 2022 11:53:45 -0700 Subject: [PATCH 011/243] tcp/udp: Fix memory leak in ipv6_renew_options(). mainline inclusion from mainline-6.1-rc1 commit 3c52c6bb831f6335c176a0fc7214e26f43adbd11 category: bugfix issue: I5WZIA CVE: CVE-2022-3524 Signed-off-by: gaochao --------------------------------------- syzbot reported a memory leak [0] related to IPV6_ADDRFORM. The scenario is that while one thread is converting an IPv6 socket into IPv4 with IPV6_ADDRFORM, another thread calls do_ipv6_setsockopt() and allocates memory to inet6_sk(sk)->XXX after conversion. Then, the converted sk with (tcp|udp)_prot never frees the IPv6 resources, which inet6_destroy_sock() should have cleaned up. setsockopt(IPV6_ADDRFORM) setsockopt(IPV6_DSTOPTS) +-----------------------+ +----------------------+ - do_ipv6_setsockopt(sk, ...) - sockopt_lock_sock(sk) - do_ipv6_setsockopt(sk, ...) - lock_sock(sk) ^._ called via tcpv6_prot - WRITE_ONCE(sk->sk_prot, &tcp_prot) before WRITE_ONCE() - xchg(&np->opt, NULL) - txopt_put(opt) - sockopt_release_sock(sk) - release_sock(sk) - sockopt_lock_sock(sk) - lock_sock(sk) - ipv6_set_opt_hdr(sk, ...) - ipv6_update_options(sk, opt) - xchg(&inet6_sk(sk)->opt, opt) ^._ opt is never freed. - sockopt_release_sock(sk) - release_sock(sk) Since IPV6_DSTOPTS allocates options under lock_sock(), we can avoid this memory leak by testing whether sk_family is changed by IPV6_ADDRFORM after acquiring the lock. This issue exists from the initial commit between IPV6_ADDRFORM and IPV6_PKTOPTIONS. [0]: BUG: memory leak unreferenced object 0xffff888009ab9f80 (size 96): comm "syz-executor583", pid 328, jiffies 4294916198 (age 13.034s) hex dump (first 32 bytes): 01 00 00 00 48 00 00 00 08 00 00 00 00 00 00 00 ....H........... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000002ee98ae1>] kmalloc include/linux/slab.h:605 [inline] [<000000002ee98ae1>] sock_kmalloc+0xb3/0x100 net/core/sock.c:2566 [<0000000065d7b698>] ipv6_renew_options+0x21e/0x10b0 net/ipv6/exthdrs.c:1318 [<00000000a8c756d7>] ipv6_set_opt_hdr net/ipv6/ipv6_sockglue.c:354 [inline] [<00000000a8c756d7>] do_ipv6_setsockopt.constprop.0+0x28b7/0x4350 net/ipv6/ipv6_sockglue.c:668 [<000000002854d204>] ipv6_setsockopt+0xdf/0x190 net/ipv6/ipv6_sockglue.c:1021 [<00000000e69fdcf8>] tcp_setsockopt+0x13b/0x2620 net/ipv4/tcp.c:3789 [<0000000090da4b9b>] __sys_setsockopt+0x239/0x620 net/socket.c:2252 [<00000000b10d192f>] __do_sys_setsockopt net/socket.c:2263 [inline] [<00000000b10d192f>] __se_sys_setsockopt net/socket.c:2260 [inline] [<00000000b10d192f>] __x64_sys_setsockopt+0xbe/0x160 net/socket.c:2260 [<000000000a80d7aa>] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [<000000000a80d7aa>] do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80 [<000000004562b5c6>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Signed-off-by: Kuniyuki Iwashima Signed-off-by: Jakub Kicinski --- net/ipv6/ipv6_sockglue.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 43a894bf9a1b..0a19f58cbc99 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -417,6 +417,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, rtnl_lock(); lock_sock(sk); + /* Another thread has converted the socket into IPv4 with + * IPV6_ADDRFORM concurrently. + */ + if (unlikely(sk->sk_family != AF_INET6)) + goto unlock; + switch (optname) { case IPV6_ADDRFORM: @@ -976,6 +982,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; } +unlock: release_sock(sk); if (needs_rtnl) rtnl_unlock(); -- Gitee From 9d44fd7b97ae0a164d74b14275bbdb3462292e27 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 3 Oct 2022 17:19:27 +0100 Subject: [PATCH 012/243] net: mvpp2: fix mvpp2 debugfs leak mainline inclusion from mainline-6.1-rc1 commit 0152dfee235e87660f52a117fc9f70dc55956bb4 category: bugfix issue: I5WKPR CVE: CVE-2022-3535 Signed-off-by: gaochao --------------------------------------- When mvpp2 is unloaded, the driver specific debugfs directory is not removed, which technically leads to a memory leak. However, this directory is only created when the first device is probed, so the hardware is present. Removing the module is only something a developer would to when e.g. testing out changes, so the module would be reloaded. So this memory leak is minor. The original attempt in commit fe2c9c61f668 ("net: mvpp2: debugfs: fix memory leak when using debugfs_lookup()") that was labelled as a memory leak fix was not, it fixed a refcount leak, but in doing so created a problem when the module is reloaded - the directory already exists, but mvpp2_root is NULL, so we lose all debugfs entries. This fix has been reverted. This is the alternative fix, where we remove the offending directory whenever the driver is unloaded. Fixes: 21da57a23125 ("net: mvpp2: add a debugfs interface for the Header Parser") Signed-off-by: Russell King (Oracle) Reviewed-by: Greg Kroah-Hartman Reviewed-by: Marcin Wojtas Link: https://lore.kernel.org/r/E1ofOAB-00CzkG-UO@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 1 + drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c | 10 ++++++++-- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 13 ++++++++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index d825eb021b22..e999ac2de34e 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -1434,6 +1434,7 @@ u32 mvpp2_read(struct mvpp2 *priv, u32 offset); void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name); void mvpp2_dbgfs_cleanup(struct mvpp2 *priv); +void mvpp2_dbgfs_exit(void); #ifdef CONFIG_MVPP2_PTP int mvpp22_tai_probe(struct device *dev, struct mvpp2 *priv); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c index 4a3baa7e0142..75e83ea2a926 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_debugfs.c @@ -691,6 +691,13 @@ static int mvpp2_dbgfs_port_init(struct dentry *parent, return 0; } +static struct dentry *mvpp2_root; + +void mvpp2_dbgfs_exit(void) +{ + debugfs_remove(mvpp2_root); +} + void mvpp2_dbgfs_cleanup(struct mvpp2 *priv) { debugfs_remove_recursive(priv->dbgfs_dir); @@ -700,10 +707,9 @@ void mvpp2_dbgfs_cleanup(struct mvpp2 *priv) void mvpp2_dbgfs_init(struct mvpp2 *priv, const char *name) { - struct dentry *mvpp2_dir, *mvpp2_root; + struct dentry *mvpp2_dir; int ret, i; - mvpp2_root = debugfs_lookup(MVPP2_DRIVER_NAME, NULL); if (!mvpp2_root) mvpp2_root = debugfs_create_dir(MVPP2_DRIVER_NAME, NULL); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 542cd6f2c9bd..68c5ed8716c8 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -7155,7 +7155,18 @@ static struct platform_driver mvpp2_driver = { }, }; -module_platform_driver(mvpp2_driver); +static int __init mvpp2_driver_init(void) +{ + return platform_driver_register(&mvpp2_driver); +} +module_init(mvpp2_driver_init); + +static void __exit mvpp2_driver_exit(void) +{ + platform_driver_unregister(&mvpp2_driver); + mvpp2_dbgfs_exit(); +} +module_exit(mvpp2_driver_exit); MODULE_DESCRIPTION("Marvell PPv2 Ethernet Driver - www.marvell.com"); MODULE_AUTHOR("Marcin Wojtas "); -- Gitee From 90e3dcda20db7213b5f3fe06785e22753eacd352 Mon Sep 17 00:00:00 2001 From: Jianglei Nie Date: Fri, 30 Sep 2022 14:28:43 +0800 Subject: [PATCH 013/243] bnx2x: fix potential memory leak in bnx2x_tpa_stop() mainline inclusion from mainline-6.1-rc1 commit b43f9acbb8942b05252be83ac25a81cec70cc192 category: bugfix issue: I5WZJA CVE: CVE-2022-3542 Signed-off-by: gaochao --------------------------------------- bnx2x_tpa_stop() allocates a memory chunk from new_data with bnx2x_frag_alloc(). The new_data should be freed when gets some error. But when "pad + len > fp->rx_buf_size" is true, bnx2x_tpa_stop() returns without releasing the new_data, which will lead to a memory leak. We should free the new_data with bnx2x_frag_free() when "pad + len > fp->rx_buf_size" is true. Fixes: 07b0f00964def8af9321cfd6c4a7e84f6362f728 ("bnx2x: fix possible panic under memory stress") Signed-off-by: Jianglei Nie Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 41ebbb2c7d3a..ac6e6a522ad0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -788,6 +788,7 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp, BNX2X_ERR("skb_put is about to fail... pad %d len %d rx_buf_size %d\n", pad, len, fp->rx_buf_size); bnx2x_panic(); + bnx2x_frag_free(fp, new_data); return; } #endif -- Gitee From 5ed0a3a2fad77ede0b4b942ad1df86b01d032d8d Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Wed, 28 Sep 2022 21:39:38 +0800 Subject: [PATCH 014/243] mISDN: fix use-after-free bugs in l1oip timer handlers mainline inclusion from mainline-6.1-rc1 commit 2568a7e0832ee30b0a351016d03062ab4e0e0a3f category: bugfix issue: I5WZJS CVE: CVE-2022-3565 Signed-off-by: gaochao --------------------------------------- The l1oip_cleanup() traverses the l1oip_ilist and calls release_card() to cleanup module and stack. However, release_card() calls del_timer() to delete the timers such as keep_tl and timeout_tl. If the timer handler is running, the del_timer() will not stop it and result in UAF bugs. One of the processes is shown below: (cleanup routine) | (timer handler) release_card() | l1oip_timeout() ... | del_timer() | ... ... | kfree(hc) //FREE | | hc->timeout_on = 0 //USE Fix by calling del_timer_sync() in release_card(), which makes sure the timer handlers have finished before the resources, such as l1oip and so on, have been deallocated. What's more, the hc->workq and hc->socket_thread can kick those timers right back in. We add a bool flag to show if card is released. Then, check this flag in hc->workq and hc->socket_thread. Fixes: 3712b42d4b1b ("Add layer1 over IP support") Signed-off-by: Duoming Zhou Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/isdn/mISDN/l1oip.h | 1 + drivers/isdn/mISDN/l1oip_core.c | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/isdn/mISDN/l1oip.h b/drivers/isdn/mISDN/l1oip.h index 7ea10db20e3a..48133d022812 100644 --- a/drivers/isdn/mISDN/l1oip.h +++ b/drivers/isdn/mISDN/l1oip.h @@ -59,6 +59,7 @@ struct l1oip { int bundle; /* bundle channels in one frm */ int codec; /* codec to use for transmis. */ int limit; /* limit number of bchannels */ + bool shutdown; /* if card is released */ /* timer */ struct timer_list keep_tl; diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index b57dcb834594..aec4f2a69c3b 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -275,7 +275,7 @@ l1oip_socket_send(struct l1oip *hc, u8 localcodec, u8 channel, u32 chanmask, p = frame; /* restart timer */ - if (time_before(hc->keep_tl.expires, jiffies + 5 * HZ)) + if (time_before(hc->keep_tl.expires, jiffies + 5 * HZ) && !hc->shutdown) mod_timer(&hc->keep_tl, jiffies + L1OIP_KEEPALIVE * HZ); else hc->keep_tl.expires = jiffies + L1OIP_KEEPALIVE * HZ; @@ -601,7 +601,9 @@ l1oip_socket_parse(struct l1oip *hc, struct sockaddr_in *sin, u8 *buf, int len) goto multiframe; /* restart timer */ - if (time_before(hc->timeout_tl.expires, jiffies + 5 * HZ) || !hc->timeout_on) { + if ((time_before(hc->timeout_tl.expires, jiffies + 5 * HZ) || + !hc->timeout_on) && + !hc->shutdown) { hc->timeout_on = 1; mod_timer(&hc->timeout_tl, jiffies + L1OIP_TIMEOUT * HZ); } else /* only adjust timer */ @@ -1232,11 +1234,10 @@ release_card(struct l1oip *hc) { int ch; - if (timer_pending(&hc->keep_tl)) - del_timer(&hc->keep_tl); + hc->shutdown = true; - if (timer_pending(&hc->timeout_tl)) - del_timer(&hc->timeout_tl); + del_timer_sync(&hc->keep_tl); + del_timer_sync(&hc->timeout_tl); cancel_work_sync(&hc->workq); -- Gitee From 63191f56bce4b73565cc43b14f1bcb8e8c1f5a85 Mon Sep 17 00:00:00 2001 From: Andrew Gaul Date: Sun, 2 Oct 2022 12:41:28 +0900 Subject: [PATCH 015/243] r8152: Rate limit overflow messages mainline inclusion from mainline-6.1-rc1 commit 93e2be344a7db169b7119de21ac1bf253b8c6907 category: bugfix issue: I5WZK0 CVE: CVE-2022-3594 Signed-off-by: gaochao --------------------------------------- My system shows almost 10 million of these messages over a 24-hour period which pollutes my logs. Signed-off-by: Andrew Gaul Link: https://lore.kernel.org/r/20221002034128.2026653-1-gaul@google.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0bb5b1c78654..a526242a3e36 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1689,7 +1689,9 @@ static void intr_callback(struct urb *urb) "Stop submitting intr, status %d\n", status); return; case -EOVERFLOW: - netif_info(tp, intr, tp->netdev, "intr status -EOVERFLOW\n"); + if (net_ratelimit()) + netif_info(tp, intr, tp->netdev, + "intr status -EOVERFLOW\n"); goto resubmit; /* -EPIPE: should clear the halt */ default: -- Gitee From 622596c4c248084830499ba8c5eba0d99158bf39 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sun, 25 Sep 2022 06:32:43 -0700 Subject: [PATCH 016/243] fbdev: smscufx: Fix use-after-free in ufx_ops_open() mainline inclusion from mainline-6.1-rc1 commit 5610bcfe8693c02e2e4c8b31427f1bdbdecc839c category: bugfix issue: I5VMG5 CVE: CVE-2022-41849 Signed-off-by: gaochao --------------------------------------- A race condition may occur if the user physically removes the USB device while calling open() for this device node. This is a race condition between the ufx_ops_open() function and the ufx_usb_disconnect() function, which may eventually result in UAF. So, add a mutex to the ufx_ops_open() and ufx_usb_disconnect() functions to avoid race contidion of krefs. Signed-off-by: Hyunwoo Kim Cc: stable@vger.kernel.org Signed-off-by: Helge Deller --- drivers/video/fbdev/smscufx.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c index bfac3ee4a642..7b26c95369f2 100644 --- a/drivers/video/fbdev/smscufx.c +++ b/drivers/video/fbdev/smscufx.c @@ -137,6 +137,8 @@ static int ufx_submit_urb(struct ufx_data *dev, struct urb * urb, size_t len); static int ufx_alloc_urb_list(struct ufx_data *dev, int count, size_t size); static void ufx_free_urb_list(struct ufx_data *dev); +static DEFINE_MUTEX(disconnect_mutex); + /* reads a control register */ static int ufx_reg_read(struct ufx_data *dev, u32 index, u32 *data) { @@ -1070,9 +1072,13 @@ static int ufx_ops_open(struct fb_info *info, int user) if (user == 0 && !console) return -EBUSY; + mutex_lock(&disconnect_mutex); + /* If the USB device is gone, we don't accept new opens */ - if (dev->virtualized) + if (dev->virtualized) { + mutex_unlock(&disconnect_mutex); return -ENODEV; + } dev->fb_count++; @@ -1096,6 +1102,8 @@ static int ufx_ops_open(struct fb_info *info, int user) pr_debug("open /dev/fb%d user=%d fb_info=%p count=%d", info->node, user, info, dev->fb_count); + mutex_unlock(&disconnect_mutex); + return 0; } @@ -1741,6 +1749,8 @@ static void ufx_usb_disconnect(struct usb_interface *interface) { struct ufx_data *dev; + mutex_lock(&disconnect_mutex); + dev = usb_get_intfdata(interface); pr_debug("USB disconnect starting\n"); @@ -1761,6 +1771,8 @@ static void ufx_usb_disconnect(struct usb_interface *interface) kref_put(&dev->kref, ufx_free); /* consider ufx_data freed */ + + mutex_unlock(&disconnect_mutex); } static struct usb_driver ufx_driver = { -- Gitee From 09df9451d8cc54000ced6fdf11a891bfc2245811 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Tue, 11 Oct 2022 08:01:03 -0400 Subject: [PATCH 017/243] libbpf: Fix use-after-free in btf_dump_name_dups maillist inclusion category: bugfix issue: I5WZIN CVE: CVE-2022-3534 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=93c660ca40b5d2f7c1b1626e955a8e9fa30e0749 Signed-off-by: gaochao --------------------------------------- ASAN reports an use-after-free in btf_dump_name_dups: ERROR: AddressSanitizer: heap-use-after-free on address 0xffff927006db at pc 0xaaaab5dfb618 bp 0xffffdd89b890 sp 0xffffdd89b928 READ of size 2 at 0xffff927006db thread T0 #0 0xaaaab5dfb614 in __interceptor_strcmp.part.0 (test_progs+0x21b614) #1 0xaaaab635f144 in str_equal_fn tools/lib/bpf/btf_dump.c:127 #2 0xaaaab635e3e0 in hashmap_find_entry tools/lib/bpf/hashmap.c:143 #3 0xaaaab635e72c in hashmap__find tools/lib/bpf/hashmap.c:212 #4 0xaaaab6362258 in btf_dump_name_dups tools/lib/bpf/btf_dump.c:1525 #5 0xaaaab636240c in btf_dump_resolve_name tools/lib/bpf/btf_dump.c:1552 #6 0xaaaab6362598 in btf_dump_type_name tools/lib/bpf/btf_dump.c:1567 #7 0xaaaab6360b48 in btf_dump_emit_struct_def tools/lib/bpf/btf_dump.c:912 #8 0xaaaab6360630 in btf_dump_emit_type tools/lib/bpf/btf_dump.c:798 #9 0xaaaab635f720 in btf_dump__dump_type tools/lib/bpf/btf_dump.c:282 #10 0xaaaab608523c in test_btf_dump_incremental tools/testing/selftests/bpf/prog_tests/btf_dump.c:236 #11 0xaaaab6097530 in test_btf_dump tools/testing/selftests/bpf/prog_tests/btf_dump.c:875 #12 0xaaaab6314ed0 in run_one_test tools/testing/selftests/bpf/test_progs.c:1062 #13 0xaaaab631a0a8 in main tools/testing/selftests/bpf/test_progs.c:1697 #14 0xffff9676d214 in __libc_start_main ../csu/libc-start.c:308 #15 0xaaaab5d65990 (test_progs+0x185990) 0xffff927006db is located 11 bytes inside of 16-byte region [0xffff927006d0,0xffff927006e0) freed by thread T0 here: #0 0xaaaab5e2c7c4 in realloc (test_progs+0x24c7c4) #1 0xaaaab634f4a0 in libbpf_reallocarray tools/lib/bpf/libbpf_internal.h:191 #2 0xaaaab634f840 in libbpf_add_mem tools/lib/bpf/btf.c:163 #3 0xaaaab636643c in strset_add_str_mem tools/lib/bpf/strset.c:106 #4 0xaaaab6366560 in strset__add_str tools/lib/bpf/strset.c:157 #5 0xaaaab6352d70 in btf__add_str tools/lib/bpf/btf.c:1519 #6 0xaaaab6353e10 in btf__add_field tools/lib/bpf/btf.c:2032 #7 0xaaaab6084fcc in test_btf_dump_incremental tools/testing/selftests/bpf/prog_tests/btf_dump.c:232 #8 0xaaaab6097530 in test_btf_dump tools/testing/selftests/bpf/prog_tests/btf_dump.c:875 #9 0xaaaab6314ed0 in run_one_test tools/testing/selftests/bpf/test_progs.c:1062 #10 0xaaaab631a0a8 in main tools/testing/selftests/bpf/test_progs.c:1697 #11 0xffff9676d214 in __libc_start_main ../csu/libc-start.c:308 #12 0xaaaab5d65990 (test_progs+0x185990) previously allocated by thread T0 here: #0 0xaaaab5e2c7c4 in realloc (test_progs+0x24c7c4) #1 0xaaaab634f4a0 in libbpf_reallocarray tools/lib/bpf/libbpf_internal.h:191 #2 0xaaaab634f840 in libbpf_add_mem tools/lib/bpf/btf.c:163 #3 0xaaaab636643c in strset_add_str_mem tools/lib/bpf/strset.c:106 #4 0xaaaab6366560 in strset__add_str tools/lib/bpf/strset.c:157 #5 0xaaaab6352d70 in btf__add_str tools/lib/bpf/btf.c:1519 #6 0xaaaab6353ff0 in btf_add_enum_common tools/lib/bpf/btf.c:2070 #7 0xaaaab6354080 in btf__add_enum tools/lib/bpf/btf.c:2102 #8 0xaaaab6082f50 in test_btf_dump_incremental tools/testing/selftests/bpf/prog_tests/btf_dump.c:162 #9 0xaaaab6097530 in test_btf_dump tools/testing/selftests/bpf/prog_tests/btf_dump.c:875 #10 0xaaaab6314ed0 in run_one_test tools/testing/selftests/bpf/test_progs.c:1062 #11 0xaaaab631a0a8 in main tools/testing/selftests/bpf/test_progs.c:1697 #12 0xffff9676d214 in __libc_start_main ../csu/libc-start.c:308 #13 0xaaaab5d65990 (test_progs+0x185990) The reason is that the key stored in hash table name_map is a string address, and the string memory is allocated by realloc() function, when the memory is resized by realloc() later, the old memory may be freed, so the address stored in name_map references to a freed memory, causing use-after-free. Fix it by storing duplicated string address in name_map. Fixes: 919d2b1dbb07 ("libbpf: Allow modification of BTF and add btf__add_str API") Signed-off-by: Xu Kuohai Signed-off-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20221011120108.782373-2-xukuohai@huaweicloud.com --- tools/lib/bpf/btf_dump.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 0911aea4cdbe..a664ab776946 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -188,6 +188,17 @@ static int btf_dump_resize(struct btf_dump *d) return 0; } +static void btf_dump_free_names(struct hashmap *map) +{ + size_t bkt; + struct hashmap_entry *cur; + + hashmap__for_each_entry(map, cur, bkt) + free((void *)cur->key); + + hashmap__free(map); +} + void btf_dump__free(struct btf_dump *d) { int i; @@ -206,8 +217,8 @@ void btf_dump__free(struct btf_dump *d) free(d->cached_names); free(d->emit_queue); free(d->decl_stack); - hashmap__free(d->type_names); - hashmap__free(d->ident_names); + btf_dump_free_names(d->type_names); + btf_dump_free_names(d->ident_names); free(d); } @@ -1392,11 +1403,23 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, const char *orig_name) { + char *old_name, *new_name; size_t dup_cnt = 0; + int err; + + new_name = strdup(orig_name); + if (!new_name) + return 1; hashmap__find(name_map, orig_name, (void **)&dup_cnt); dup_cnt++; - hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL); + + err = hashmap__set(name_map, new_name, (void *)dup_cnt, + (const void **)&old_name, NULL); + if (err) + free(new_name); + + free(old_name); return dup_cnt; } -- Gitee From 55b9c88c45e518d90a06757acb5e2ade17a5b2ae Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Tue, 19 Jul 2022 12:52:51 +0100 Subject: [PATCH 018/243] io_uring: Use original task for req identity in io_identity_cow() stable inclusion from stable-5.10.134 commit 2ee0cab11f6626071f8a64c7792406dabdd94c8d category: bugfix issue: I5XKLP CVE: CVE-2022-20409 Signed-off-by: gaochao --------------------------------------- This issue is conceptually identical to the one fixed in 29f077d07051 ("io_uring: always use original task when preparing req identity"), so rather than reinvent the wheel, I'm shamelessly quoting the commit message from that patch - thanks Jens: "If the ring is setup with IORING_SETUP_IOPOLL and we have more than one task doing submissions on a ring, we can up in a situation where we assign the context from the current task rather than the request originator. Always use req->task rather than assume it's the same as current. No upstream patch exists for this issue, as only older kernels with the non-native workers have this problem." Cc: Jens Axboe Cc: Pavel Begunkov Cc: Alexander Viro Cc: io-uring@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Fixes: 5c3462cfd123b ("io_uring: store io_identity in io_uring_task") Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 01432143ab5d..e5eddd008651 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1325,7 +1325,7 @@ static void io_req_clean_work(struct io_kiocb *req) */ static bool io_identity_cow(struct io_kiocb *req) { - struct io_uring_task *tctx = current->io_uring; + struct io_uring_task *tctx = req->task->io_uring; const struct cred *creds = NULL; struct io_identity *id; -- Gitee From 893894ca9896bbd63d324a65bc177c0807d08c41 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 9 Sep 2022 08:54:47 +0200 Subject: [PATCH 019/243] scsi: stex: Properly zero out the passthrough command structure stable inclusion from stable-5.10.148 commit 36b33c63515a93246487691046d18dd37a9f589b category: bugfix issue: I5UDXD CVE: CVE-2022-40768 Signed-off-by: gaochao --------------------------------------- commit 6022f210461fef67e6e676fd8544ca02d1bcfa7a upstream. The passthrough structure is declared off of the stack, so it needs to be set to zero before copied back to userspace to prevent any unintentional data leakage. Switch things to be statically allocated which will fill the unused fields with 0 automatically. Link: https://lore.kernel.org/r/YxrjN3OOw2HHl9tx@kroah.com Cc: stable@kernel.org Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: Dan Carpenter Reported-by: hdthky Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/stex.c | 17 +++++++++-------- include/scsi/scsi_cmnd.h | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c index d4f10c0d813c..a3bce11ed4b4 100644 --- a/drivers/scsi/stex.c +++ b/drivers/scsi/stex.c @@ -668,16 +668,17 @@ stex_queuecommand_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) return 0; case PASSTHRU_CMD: if (cmd->cmnd[1] == PASSTHRU_GET_DRVVER) { - struct st_drvver ver; + const struct st_drvver ver = { + .major = ST_VER_MAJOR, + .minor = ST_VER_MINOR, + .oem = ST_OEM, + .build = ST_BUILD_VER, + .signature[0] = PASSTHRU_SIGNATURE, + .console_id = host->max_id - 1, + .host_no = hba->host->host_no, + }; size_t cp_len = sizeof(ver); - ver.major = ST_VER_MAJOR; - ver.minor = ST_VER_MINOR; - ver.oem = ST_OEM; - ver.build = ST_BUILD_VER; - ver.signature[0] = PASSTHRU_SIGNATURE; - ver.console_id = host->max_id - 1; - ver.host_no = hba->host->host_no; cp_len = scsi_sg_copy_from_buffer(cmd, &ver, cp_len); cmd->result = sizeof(ver) == cp_len ? DID_OK << 16 | COMMAND_COMPLETE << 8 : diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 69ade4fb71aa..4d272e834ca2 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -205,7 +205,7 @@ static inline unsigned int scsi_get_resid(struct scsi_cmnd *cmd) for_each_sg(scsi_sglist(cmd), sg, nseg, __i) static inline int scsi_sg_copy_from_buffer(struct scsi_cmnd *cmd, - void *buf, int buflen) + const void *buf, int buflen) { return sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, buflen); -- Gitee From 650c16c1570e698d9b42e4485ff6915d07e716e3 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Oct 2022 11:53:49 -0700 Subject: [PATCH 020/243] tcp: Fix data races around icsk->icsk_af_ops. mainline inclusion from mainline-6.1-rc1 commit f49cd2f4d6170d27a2c61f1fecb03d8a70c91f57 category: bugfix issue: I5XKMF CVE: CVE-2022-3566 Signed-off-by: gaochao --------------------------------------- setsockopt(IPV6_ADDRFORM) and tcp_v6_connect() change icsk->icsk_af_ops under lock_sock(), but tcp_(get|set)sockopt() read it locklessly. To avoid load/store tearing, we need to add READ_ONCE() and WRITE_ONCE() for the reads and writes. Thanks to Eric Dumazet for providing the syzbot report: BUG: KCSAN: data-race in tcp_setsockopt / tcp_v6_connect write to 0xffff88813c624518 of 8 bytes by task 23936 on cpu 0: tcp_v6_connect+0x5b3/0xce0 net/ipv6/tcp_ipv6.c:240 __inet_stream_connect+0x159/0x6d0 net/ipv4/af_inet.c:660 inet_stream_connect+0x44/0x70 net/ipv4/af_inet.c:724 __sys_connect_file net/socket.c:1976 [inline] __sys_connect+0x197/0x1b0 net/socket.c:1993 __do_sys_connect net/socket.c:2003 [inline] __se_sys_connect net/socket.c:2000 [inline] __x64_sys_connect+0x3d/0x50 net/socket.c:2000 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffff88813c624518 of 8 bytes by task 23937 on cpu 1: tcp_setsockopt+0x147/0x1c80 net/ipv4/tcp.c:3789 sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3585 __sys_setsockopt+0x212/0x2b0 net/socket.c:2252 __do_sys_setsockopt net/socket.c:2263 [inline] __se_sys_setsockopt net/socket.c:2260 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2260 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0xffffffff8539af68 -> 0xffffffff8539aff8 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 23937 Comm: syz-executor.5 Not tainted 6.0.0-rc4-syzkaller-00331-g4ed9c1e971b1-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot Reported-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 10 ++++++---- net/ipv6/ipv6_sockglue.c | 3 ++- net/ipv6/tcp_ipv6.c | 6 ++++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 63c81af41b43..9ec269435e77 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3443,8 +3443,9 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, const struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) - return icsk->icsk_af_ops->setsockopt(sk, level, optname, - optval, optlen); + /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */ + return READ_ONCE(icsk->icsk_af_ops)->setsockopt(sk, level, optname, + optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(tcp_setsockopt); @@ -3988,8 +3989,9 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) - return icsk->icsk_af_ops->getsockopt(sk, level, optname, - optval, optlen); + /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */ + return READ_ONCE(icsk->icsk_af_ops)->getsockopt(sk, level, optname, + optval, optlen); return do_tcp_getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(tcp_getsockopt); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0a19f58cbc99..b19eedff6bf5 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -480,7 +480,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, &tcp_prot, 1); local_bh_enable(); sk->sk_prot = &tcp_prot; - icsk->icsk_af_ops = &ipv4_specific; + /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ + WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific); sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index df33145b876c..dfb13b528057 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -237,7 +237,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - icsk->icsk_af_ops = &ipv6_mapped; + /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ + WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -249,7 +250,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (err) { icsk->icsk_ext_hdr_len = exthdrlen; - icsk->icsk_af_ops = &ipv6_specific; + /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ + WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; -- Gitee From 45ec20736616425178274b2d7053314282692006 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 23 Aug 2022 11:53:39 +0200 Subject: [PATCH 021/243] binderfs: rework superblock destruction mainline inclusion from mainline-6.1-rc1 commit 9d64d2405f7d30d49818f6682acd0392348f0fdb category: bugfix issue: I5XD95 CVE: CVE-2022-3577 Signed-off-by: gaochao --------------------------------------- So far we relied on .put_super = binderfs_put_super() to destroy info we stashed in sb->s_fs_info. This gave us the required ordering between ->evict_inode() and sb->s_fs_info destruction. But the current implementation of binderfs_fill_super() has a memory leak in the rare circumstance that d_make_root() fails because ->put_super() is only called when sb->s_root is initialized. Fix this by removing ->put_super() and simply do all that work in binderfs_kill_super(). Reported-by: Dongliang Mu Signed-off-by: Al Viro Signed-off-by: Christian Brauner (Microsoft) Link: https://lore.kernel.org/r/20220823095339.853371-1-brauner@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 7b4f154f07e6..a3beb685748c 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -330,22 +330,10 @@ static int binderfs_show_options(struct seq_file *seq, struct dentry *root) return 0; } -static void binderfs_put_super(struct super_block *sb) -{ - struct binderfs_info *info = sb->s_fs_info; - - if (info && info->ipc_ns) - put_ipc_ns(info->ipc_ns); - - kfree(info); - sb->s_fs_info = NULL; -} - static const struct super_operations binderfs_super_ops = { .evict_inode = binderfs_evict_inode, .show_options = binderfs_show_options, .statfs = simple_statfs, - .put_super = binderfs_put_super, }; static inline bool is_binderfs_control_device(const struct dentry *dentry) @@ -763,11 +751,27 @@ static int binderfs_init_fs_context(struct fs_context *fc) return 0; } +static void binderfs_kill_super(struct super_block *sb) +{ + struct binderfs_info *info = sb->s_fs_info; + + /* + * During inode eviction struct binderfs_info is needed. + * So first wipe the super_block then free struct binderfs_info. + */ + kill_litter_super(sb); + + if (info && info->ipc_ns) + put_ipc_ns(info->ipc_ns); + + kfree(info); +} + static struct file_system_type binder_fs_type = { .name = "binder", .init_fs_context = binderfs_init_fs_context, .parameters = binderfs_fs_parameters, - .kill_sb = kill_litter_super, + .kill_sb = binderfs_kill_super, .fs_flags = FS_USERNS_MOUNT, }; -- Gitee From 2eb014731955e395a4e555786ddbe947ca8563c9 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Thu, 9 Jun 2022 08:35:28 +0100 Subject: [PATCH 022/243] media: pvrusb2: fix memory leak in pvr_probe mainline inclusion from mainline-6.0-rc1 commit 945a9a8e448b65bec055d37eba58f711b39f66f0 category: bugfix issue: I5XD95 CVE: CVE-2022-3577 Signed-off-by: gaochao --------------------------------------- The error handling code in pvr2_hdw_create forgets to unregister the v4l2 device. When pvr2_hdw_create returns back to pvr2_context_create, it calls pvr2_context_destroy to destroy context, but mp->hdw is NULL, which leads to that pvr2_hdw_destroy directly returns. Fix this by adding v4l2_device_unregister to decrease the refcount of usb interface. Reported-by: syzbot+77b432d57c4791183ed4@syzkaller.appspotmail.com Signed-off-by: Dongliang Mu Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/pvrusb2/pvrusb2-hdw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c index 3915d551d59e..c21168a1aa96 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c @@ -2607,6 +2607,7 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf, del_timer_sync(&hdw->encoder_run_timer); del_timer_sync(&hdw->encoder_wait_timer); flush_work(&hdw->workpoll); + v4l2_device_unregister(&hdw->v4l2_dev); usb_free_urb(hdw->ctl_read_urb); usb_free_urb(hdw->ctl_write_urb); kfree(hdw->ctl_read_buffer); -- Gitee From 907a6cd5404b5920e7e31c50f788270361eb67fc Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 5 Oct 2022 00:27:18 +0300 Subject: [PATCH 023/243] Bluetooth: L2CAP: Fix use-after-free caused by l2cap_reassemble_sdu maillist inclusion category: bugfix issue: I5XKL7 CVE: CVE-2022-3564 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git/commit/?id=89f9f3cb86b1c63badaf392a83dd661d56cc50b1 Signed-off-by: gaochao --------------------------------------- Fix the race condition between the following two flows that run in parallel: 1. l2cap_reassemble_sdu -> chan->ops->recv (l2cap_sock_recv_cb) -> __sock_queue_rcv_skb. 2. bt_sock_recvmsg -> skb_recv_datagram, skb_free_datagram. An SKB can be queued by the first flow and immediately dequeued and freed by the second flow, therefore the callers of l2cap_reassemble_sdu can't use the SKB after that function returns. However, some places continue accessing struct l2cap_ctrl that resides in the SKB's CB for a short time after l2cap_reassemble_sdu returns, leading to a use-after-free condition (the stack trace is below, line numbers for kernel 5.19.8). Fix it by keeping a local copy of struct l2cap_ctrl. BUG: KASAN: use-after-free in l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth Read of size 1 at addr ffff88812025f2f0 by task kworker/u17:3/43169 Workqueue: hci0 hci_rx_work [bluetooth] Call Trace: dump_stack_lvl (lib/dump_stack.c:107 (discriminator 4)) print_report.cold (mm/kasan/report.c:314 mm/kasan/report.c:429) ? l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth kasan_report (mm/kasan/report.c:162 mm/kasan/report.c:493) ? l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth l2cap_rx_state_recv (net/bluetooth/l2cap_core.c:6906) bluetooth l2cap_rx (net/bluetooth/l2cap_core.c:7236 net/bluetooth/l2cap_core.c:7271) bluetooth ret_from_fork (arch/x86/entry/entry_64.S:306) Allocated by task 43169: kasan_save_stack (mm/kasan/common.c:39) __kasan_slab_alloc (mm/kasan/common.c:45 mm/kasan/common.c:436 mm/kasan/common.c:469) kmem_cache_alloc_node (mm/slab.h:750 mm/slub.c:3243 mm/slub.c:3293) __alloc_skb (net/core/skbuff.c:414) l2cap_recv_frag (./include/net/bluetooth/bluetooth.h:425 net/bluetooth/l2cap_core.c:8329) bluetooth l2cap_recv_acldata (net/bluetooth/l2cap_core.c:8442) bluetooth hci_rx_work (net/bluetooth/hci_core.c:3642 net/bluetooth/hci_core.c:3832) bluetooth process_one_work (kernel/workqueue.c:2289) worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2437) kthread (kernel/kthread.c:376) ret_from_fork (arch/x86/entry/entry_64.S:306) Freed by task 27920: kasan_save_stack (mm/kasan/common.c:39) kasan_set_track (mm/kasan/common.c:45) kasan_set_free_info (mm/kasan/generic.c:372) ____kasan_slab_free (mm/kasan/common.c:368 mm/kasan/common.c:328) slab_free_freelist_hook (mm/slub.c:1780) kmem_cache_free (mm/slub.c:3536 mm/slub.c:3553) skb_free_datagram (./include/net/sock.h:1578 ./include/net/sock.h:1639 net/core/datagram.c:323) bt_sock_recvmsg (net/bluetooth/af_bluetooth.c:295) bluetooth l2cap_sock_recvmsg (net/bluetooth/l2cap_sock.c:1212) bluetooth sock_read_iter (net/socket.c:1087) new_sync_read (./include/linux/fs.h:2052 fs/read_write.c:401) vfs_read (fs/read_write.c:482) ksys_read (fs/read_write.c:620) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) Link: https://lore.kernel.org/linux-bluetooth/CAKErNvoqga1WcmoR3-0875esY6TVWFQDandbVZncSiuGPBQXLA@mail.gmail.com/T/#u Fixes: d2a7ac5d5d3a ("Bluetooth: Add the ERTM receive state machine") Fixes: 4b51dae96731 ("Bluetooth: Add streaming mode receive and incoming packet classifier") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 48 ++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0ddbc415ce15..d840985501e2 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6831,6 +6831,7 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb, u8 event) { + struct l2cap_ctrl local_control; int err = 0; bool skb_in_use = false; @@ -6855,15 +6856,32 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan, chan->buffer_seq = chan->expected_tx_seq; skb_in_use = true; + /* l2cap_reassemble_sdu may free skb, hence invalidate + * control, so make a copy in advance to use it after + * l2cap_reassemble_sdu returns and to avoid the race + * condition, for example: + * + * The current thread calls: + * l2cap_reassemble_sdu + * chan->ops->recv == l2cap_sock_recv_cb + * __sock_queue_rcv_skb + * Another thread calls: + * bt_sock_recvmsg + * skb_recv_datagram + * skb_free_datagram + * Then the current thread tries to access control, but + * it was freed by skb_free_datagram. + */ + local_control = *control; err = l2cap_reassemble_sdu(chan, skb, control); if (err) break; - if (control->final) { + if (local_control.final) { if (!test_and_clear_bit(CONN_REJ_ACT, &chan->conn_state)) { - control->final = 0; - l2cap_retransmit_all(chan, control); + local_control.final = 0; + l2cap_retransmit_all(chan, &local_control); l2cap_ertm_send(chan); } } @@ -7243,11 +7261,27 @@ static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff *skb) { + /* l2cap_reassemble_sdu may free skb, hence invalidate control, so store + * the txseq field in advance to use it after l2cap_reassemble_sdu + * returns and to avoid the race condition, for example: + * + * The current thread calls: + * l2cap_reassemble_sdu + * chan->ops->recv == l2cap_sock_recv_cb + * __sock_queue_rcv_skb + * Another thread calls: + * bt_sock_recvmsg + * skb_recv_datagram + * skb_free_datagram + * Then the current thread tries to access control, but it was freed by + * skb_free_datagram. + */ + u16 txseq = control->txseq; + BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb, chan->rx_state); - if (l2cap_classify_txseq(chan, control->txseq) == - L2CAP_TXSEQ_EXPECTED) { + if (l2cap_classify_txseq(chan, txseq) == L2CAP_TXSEQ_EXPECTED) { l2cap_pass_to_tx(chan, control); BT_DBG("buffer_seq %d->%d", chan->buffer_seq, @@ -7270,8 +7304,8 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control, } } - chan->last_acked_seq = control->txseq; - chan->expected_tx_seq = __next_seq(chan, control->txseq); + chan->last_acked_seq = txseq; + chan->expected_tx_seq = __next_seq(chan, txseq); return 0; } -- Gitee From 68b3e3f5dff8fa4beb88d6184786ecb0823609db Mon Sep 17 00:00:00 2001 From: Shung-Hsi Yu Date: Wed, 12 Oct 2022 10:23:53 +0800 Subject: [PATCH 024/243] libbpf: Fix null-pointer dereference in find_prog_by_sec_insn() maillist inclusion category: bugfix issue: I5XKKJ CVE: CVE-2022-3606 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/tools/lib/bpf/libbpf.c?h=next-20221024&id=d0d382f95a9270dcf803539d6781d6bd67e3f5b2 Signed-off-by: gaochao --------------------------------------- When there are no program sections, obj->programs is left unallocated, and find_prog_by_sec_insn()'s search lands on &obj->programs[0] == NULL, and will cause null-pointer dereference in the following access to prog->sec_idx. Guard the search with obj->nr_programs similar to what's being done in __bpf_program__iter() to prevent null-pointer access from happening. Fixes: db2b8b06423c ("libbpf: Support CO-RE relocations for multi-prog sections") Signed-off-by: Shung-Hsi Yu Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20221012022353.7350-4-shung-hsi.yu@suse.com --- tools/lib/bpf/libbpf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b337d6f29098..0027e856eb81 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3479,6 +3479,9 @@ static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, int l = 0, r = obj->nr_programs - 1, m; struct bpf_program *prog; + if (!obj->nr_programs) + return NULL; + while (l < r) { m = l + (r - l + 1) / 2; prog = &obj->programs[m]; -- Gitee From c406a99a9117ac7ebc8c0c18fd2f1807c780258e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 4 Oct 2022 00:05:19 +0900 Subject: [PATCH 025/243] nilfs2: fix use-after-free bug of struct nilfs_root stable inclusion from stable-5.10.148 commit 21ee3cffed8fbabb669435facfd576ba18ac8652 category: bugfix issue: I5Y24I CVE: CVE-2022-3649 Signed-off-by: gaochao --------------------------------------- commit d325dc6eb763c10f591c239550b8c7e5466a5d09 upstream. If the beginning of the inode bitmap area is corrupted on disk, an inode with the same inode number as the root inode can be allocated and fail soon after. In this case, the subsequent call to nilfs_clear_inode() on that bogus root inode will wrongly decrement the reference counter of struct nilfs_root, and this will erroneously free struct nilfs_root, causing kernel oopses. This fixes the problem by changing nilfs_new_inode() to skip reserved inode numbers while repairing the inode bitmap. Link: https://lkml.kernel.org/r/20221003150519.39789-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+b8c672b0e22615c80fe0@syzkaller.appspotmail.com Reported-by: Khalid Masum Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/inode.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 745d371d6fea..7068117e36b6 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -327,6 +327,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) struct inode *inode; struct nilfs_inode_info *ii; struct nilfs_root *root; + struct buffer_head *bh; int err = -ENOMEM; ino_t ino; @@ -342,11 +343,25 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) ii->i_state = BIT(NILFS_I_NEW); ii->i_root = root; - err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); + err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ + if (unlikely(ino < NILFS_USER_INO)) { + nilfs_warn(sb, + "inode bitmap is inconsistent for reserved inodes"); + do { + brelse(bh); + err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); + if (unlikely(err)) + goto failed_ifile_create_inode; + } while (ino < NILFS_USER_INO); + + nilfs_info(sb, "repaired inode bitmap for reserved inodes"); + } + ii->i_bh = bh; + atomic64_inc(&root->inodes_count); inode_init_owner(inode, dir, mode); inode->i_ino = ino; -- Gitee From d6b78bf39e2c1d738db0d4d9b9480e6130ba5291 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 3 Oct 2022 13:59:47 +0100 Subject: [PATCH 026/243] io_uring/af_unix: defer registered files gc to io_uring release mainline inclusion from mainline-6.1-rc1 commit 0091bfc81741b8d3aeb3b7ab8636f911b2de6e80 category: bugfix issue: I5XD8P CVE: CVE-2022-2602 Signed-off-by: gaochao --------------------------------------- Instead of putting io_uring's registered files in unix_gc() we want it to be done by io_uring itself. The trick here is to consider io_uring registered files for cycle detection but not actually putting them down. Because io_uring can't register other ring instances, this will remove all refs to the ring file triggering the ->release path and clean up with io_ring_ctx_free(). Cc: stable@vger.kernel.org Fixes: 6b06314c47e1 ("io_uring: add file set registration") Reported-and-tested-by: David Bouman Signed-off-by: Pavel Begunkov Signed-off-by: Thadeu Lima de Souza Cascardo [axboe: add kerneldoc comment to skb, fold in skb leak fix] Signed-off-by: Jens Axboe --- fs/io_uring.c | 1 + include/linux/skbuff.h | 2 ++ net/unix/garbage.c | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index e5eddd008651..e41956f2fc01 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7286,6 +7286,7 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) } skb->sk = sk; + skb->scm_io_uring = 1; nr_files = 0; fpl->user = get_uid(ctx->user); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index acbf1875ad50..a77b38e738e9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -681,6 +681,7 @@ typedef unsigned char *sk_buff_data_t; * @csum_level: indicates the number of consecutive checksums found in * the packet minus one that have been verified as * CHECKSUM_UNNECESSARY (max 3) + * @scm_io_uring: SKB holds io_uring registered files * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @napi_id: id of the NAPI struct this skb came from @@ -858,6 +859,7 @@ struct sk_buff { #ifdef CONFIG_TLS_DEVICE __u8 decrypted:1; #endif + __u8 scm_io_uring:1; #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ diff --git a/net/unix/garbage.c b/net/unix/garbage.c index d45d5366115a..dc2763540393 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -204,6 +204,7 @@ void wait_for_unix_gc(void) /* The external entry point: unix_gc() */ void unix_gc(void) { + struct sk_buff *next_skb, *skb; struct unix_sock *u; struct unix_sock *next; struct sk_buff_head hitlist; @@ -297,11 +298,30 @@ void unix_gc(void) spin_unlock(&unix_gc_lock); + /* We need io_uring to clean its registered files, ignore all io_uring + * originated skbs. It's fine as io_uring doesn't keep references to + * other io_uring instances and so killing all other files in the cycle + * will put all io_uring references forcing it to go through normal + * release.path eventually putting registered files. + */ + skb_queue_walk_safe(&hitlist, skb, next_skb) { + if (skb->scm_io_uring) { + __skb_unlink(skb, &hitlist); + skb_queue_tail(&skb->sk->sk_receive_queue, skb); + } + } + /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); spin_lock(&unix_gc_lock); + /* There could be io_uring registered files, just push them back to + * the inflight list + */ + list_for_each_entry_safe(u, next, &gc_candidates, link) + list_move_tail(&u->link, &gc_inflight_list); + /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); -- Gitee From d787b9fa3feaf37ca4630c96d0e3854aaf13e9f7 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Tue, 16 Aug 2022 12:08:58 +0800 Subject: [PATCH 027/243] fs: fix UAF/GPF bug in nilfs_mdt_destroy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-6.1-rc1 commit 2e488f13755ffbb60f307e991b27024716a33b29 category: bugfix issue: I5OHXC CVE: CVE-2022-2978 Signed-off-by: gaochao --------------------------------------- In alloc_inode, inode_init_always() could return -ENOMEM if security_inode_alloc() fails, which causes inode->i_private uninitialized. Then nilfs_is_metadata_file_inode() returns true and nilfs_free_inode() wrongly calls nilfs_mdt_destroy(), which frees the uninitialized inode->i_private and leads to crashes(e.g., UAF/GPF). Fix this by moving security_inode_alloc just prior to this_cpu_inc(nr_inodes) Link: https://lkml.kernel.org/r/CAFcO6XOcf1Jj2SeGt=jJV59wmhESeSKpfR0omdFRq+J9nD1vfQ@mail.gmail.com Reported-by: butt3rflyh4ck Reported-by: Hao Sun Reported-by: Jiacheng Xu Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Dongliang Mu Cc: Al Viro Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/inode.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 638d5d5bf42d..9f49e0bdc2f7 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -168,8 +168,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_wb_frn_history = 0; #endif - if (security_inode_alloc(inode)) - goto out; spin_lock_init(&inode->i_lock); lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); @@ -202,11 +200,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fsnotify_mask = 0; #endif inode->i_flctx = NULL; + + if (unlikely(security_inode_alloc(inode))) + return -ENOMEM; this_cpu_inc(nr_inodes); return 0; -out: - return -ENOMEM; } EXPORT_SYMBOL(inode_init_always); -- Gitee From 8310baae3cb9539099c42050b53401b806b24571 Mon Sep 17 00:00:00 2001 From: Jialiang Wang Date: Wed, 10 Aug 2022 15:30:57 +0800 Subject: [PATCH 028/243] nfp: fix use-after-free in area_cache_get() mainline inclusion from mainline-6.0-rc1 commit 02e1a114fdb71e59ee6770294166c30d437bf86a category: bugfix issue: I5XD7Q CVE: CVE-2022-3545 Signed-off-by: gaochao --------------------------------------- area_cache_get() is used to distribute cache->area and set cache->id, and if cache->id is not 0 and cache->area->kref refcount is 0, it will release the cache->area by nfp_cpp_area_release(). area_cache_get() set cache->id before cpp->op->area_init() and nfp_cpp_area_acquire(). But if area_init() or nfp_cpp_area_acquire() fails, the cache->id is is already set but the refcount is not increased as expected. At this time, calling the nfp_cpp_area_release() will cause use-after-free. To avoid the use-after-free, set cache->id after area_init() and nfp_cpp_area_acquire() complete successfully. Note: This vulnerability is triggerable by providing emulated device equipped with specified configuration. BUG: KASAN: use-after-free in nfp6000_area_init (drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c:760) Write of size 4 at addr ffff888005b7f4a0 by task swapper/0/1 Call Trace: nfp6000_area_init (drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c:760) area_cache_get.constprop.8 (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:884) Allocated by task 1: nfp_cpp_area_alloc_with_name (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:303) nfp_cpp_area_cache_add (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:802) nfp6000_init (drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c:1230) nfp_cpp_from_operations (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:1215) nfp_pci_probe (drivers/net/ethernet/netronome/nfp/nfp_main.c:744) Freed by task 1: kfree (mm/slub.c:4562) area_cache_get.constprop.8 (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:873) nfp_cpp_read (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:924 drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c:973) nfp_cpp_readl (drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c:48) Signed-off-by: Jialiang Wang Reviewed-by: Yinjun Zhang Acked-by: Simon Horman Link: https://lore.kernel.org/r/20220810073057.4032-1-wangjialiang0806@163.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index 6ef48eb3a77d..b163489489e9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -874,7 +874,6 @@ area_cache_get(struct nfp_cpp *cpp, u32 id, } /* Adjust the start address to be cache size aligned */ - cache->id = id; cache->addr = addr & ~(u64)(cache->size - 1); /* Re-init to the new ID and address */ @@ -894,6 +893,8 @@ area_cache_get(struct nfp_cpp *cpp, u32 id, return NULL; } + cache->id = id; + exit: /* Adjust offset */ *offset = addr - cache->addr; -- Gitee From 9fcec24ec29a7914d8baff875f28b41103382d7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 31 Aug 2022 23:52:18 +0200 Subject: [PATCH 029/243] sch_sfb: Don't assume the skb is still around after enqueueing to child MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.143 commit 2ee85ac1b29dbd2ebd2d8e5ac1dd5793235d516b category: bugfix issue: I5XEAK CVE: CVE-2022-3586 Signed-off-by: gaochao --------------------------------------- [ Upstream commit 9efd23297cca530bb35e1848665805d3fcdd7889 ] The sch_sfb enqueue() routine assumes the skb is still alive after it has been enqueued into a child qdisc, using the data in the skb cb field in the increment_qlen() routine after enqueue. However, the skb may in fact have been freed, causing a use-after-free in this case. In particular, this happens if sch_cake is used as a child of sfb, and the GSO splitting mode of CAKE is enabled (in which case the skb will be split into segments and the original skb freed). Fix this by copying the sfb cb data to the stack before enqueueing the skb, and using this stack copy in increment_qlen() instead of the skb pointer itself. Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-18231 Fixes: e13e02a3c68d ("net_sched: SFB flow scheduler") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_sfb.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index da047a37a3bf..f180cf95cfc9 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -135,15 +135,15 @@ static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q) } } -static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) +static void increment_qlen(const struct sfb_skb_cb *cb, struct sfb_sched_data *q) { u32 sfbhash; - sfbhash = sfb_hash(skb, 0); + sfbhash = cb->hashes[0]; if (sfbhash) increment_one_qlen(sfbhash, 0, q); - sfbhash = sfb_hash(skb, 1); + sfbhash = cb->hashes[1]; if (sfbhash) increment_one_qlen(sfbhash, 1, q); } @@ -283,6 +283,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sfb_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; struct tcf_proto *fl; + struct sfb_skb_cb cb; int i; u32 p_min = ~0; u32 minqlen = ~0; @@ -399,11 +400,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, } enqueue: + memcpy(&cb, sfb_skb_cb(skb), sizeof(cb)); ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; - increment_qlen(skb, q); + increment_qlen(&cb, q); } else if (net_xmit_drop_count(ret)) { q->stats.childdrop++; qdisc_qstats_drop(sch); -- Gitee From 992b6d887d530f08ce8fba7bf3dd54e531675cf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 5 Sep 2022 21:21:36 +0200 Subject: [PATCH 030/243] sch_sfb: Also store skb len before calling child enqueue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.143 commit 2ead78fbe6b523e6232ad286e3c13d2a410de22a category: bugfix issue: I5XEAK CVE: CVE-2022-3586 Signed-off-by: gaochao --------------------------------------- [ Upstream commit 2f09707d0c972120bf794cfe0f0c67e2c2ddb252 ] Cong Wang noticed that the previous fix for sch_sfb accessing the queued skb after enqueueing it to a child qdisc was incomplete: the SFB enqueue function was also calling qdisc_qstats_backlog_inc() after enqueue, which reads the pkt len from the skb cb field. Fix this by also storing the skb len, and using the stored value to increment the backlog after enqueueing. Fixes: 9efd23297cca ("sch_sfb: Don't assume the skb is still around after enqueueing to child") Signed-off-by: Toke Høiland-Jørgensen Acked-by: Cong Wang Link: https://lore.kernel.org/r/20220905192137.965549-1-toke@toke.dk Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/sched/sch_sfb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index f180cf95cfc9..b2724057629f 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -281,6 +281,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct sfb_sched_data *q = qdisc_priv(sch); + unsigned int len = qdisc_pkt_len(skb); struct Qdisc *child = q->qdisc; struct tcf_proto *fl; struct sfb_skb_cb cb; @@ -403,7 +404,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, memcpy(&cb, sfb_skb_cb(skb), sizeof(cb)); ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; increment_qlen(&cb, q); } else if (net_xmit_drop_count(ret)) { -- Gitee From 3e15f379ad3b1c86c550ccb36368b0abd0ab9efc Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 2 Oct 2022 12:08:04 +0900 Subject: [PATCH 031/243] nilfs2: fix NULL pointer dereference at nilfs_bmap_lookup_at_level() mainline inclusion from mainline-6.1-rc1 commit 21a87d88c2253350e115029f14fe2a10a7e6c856 category: bugfix issue: I5YPM3 CVE: CVE-2022-3621 Signed-off-by: gaochao --------------------------------------- If the i_mode field in inode of metadata files is corrupted on disk, it can cause the initialization of bmap structure, which should have been called from nilfs_read_inode_common(), not to be called. This causes a lockdep warning followed by a NULL pointer dereference at nilfs_bmap_lookup_at_level(). This patch fixes these issues by adding a missing sanitiy check for the i_mode field of metadata file's inode. Link: https://lkml.kernel.org/r/20221002030804.29978-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+2b32eb36c1a825b7a74c@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7068117e36b6..50849307eae6 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -454,6 +454,8 @@ int nilfs_read_inode_common(struct inode *inode, inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) + return -EIO; /* this inode is for metadata and corrupted */ if (inode->i_nlink == 0) return -ESTALE; /* this inode is deleted */ -- Gitee From 08ab58da8001597c577675a26ebf769b9c422461 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 1 Sep 2022 18:41:31 +0800 Subject: [PATCH 032/243] mm/hugetlb: fix races when looking up a CONT-PTE/PMD size hugetlb page mainline inclusion from mainline-6.1-rc1 commit fac35ba763ed07ba93154c95ffc0c4a55023707f category: bugfix issue: I5XLH5 CVE: CVE-2022-3623 Signed-off-by: gaochao --------------------------------------- On some architectures (like ARM64), it can support CONT-PTE/PMD size hugetlb, which means it can support not only PMD/PUD size hugetlb (2M and 1G), but also CONT-PTE/PMD size(64K and 32M) if a 4K page size specified. So when looking up a CONT-PTE size hugetlb page by follow_page(), it will use pte_offset_map_lock() to get the pte entry lock for the CONT-PTE size hugetlb in follow_page_pte(). However this pte entry lock is incorrect for the CONT-PTE size hugetlb, since we should use huge_pte_lock() to get the correct lock, which is mm->page_table_lock. That means the pte entry of the CONT-PTE size hugetlb under current pte lock is unstable in follow_page_pte(), we can continue to migrate or poison the pte entry of the CONT-PTE size hugetlb, which can cause some potential race issues, even though they are under the 'pte lock'. For example, suppose thread A is trying to look up a CONT-PTE size hugetlb page by move_pages() syscall under the lock, however antoher thread B can migrate the CONT-PTE hugetlb page at the same time, which will cause thread A to get an incorrect page, if thread A also wants to do page migration, then data inconsistency error occurs. Moreover we have the same issue for CONT-PMD size hugetlb in follow_huge_pmd(). To fix above issues, rename the follow_huge_pmd() as follow_huge_pmd_pte() to handle PMD and PTE level size hugetlb, which uses huge_pte_lock() to get the correct pte entry lock to make the pte entry stable. Mike said: Support for CONT_PMD/_PTE was added with bb9dd3df8ee9 ("arm64: hugetlb: refactor find_num_contig()"). Patch series "Support for contiguous pte hugepages", v4. However, I do not believe these code paths were executed until migration support was added with 5480280d3f2d ("arm64/mm: enable HugeTLB migration for contiguous bit HugeTLB pages") I would go with 5480280d3f2d for the Fixes: targe. Link: https://lkml.kernel.org/r/635f43bdd85ac2615a58405da82b4d33c6e5eb05.1662017562.git.baolin.wang@linux.alibaba.com Fixes: 5480280d3f2d ("arm64/mm: enable HugeTLB migration for contiguous bit HugeTLB pages") Signed-off-by: Baolin Wang Suggested-by: Mike Kravetz Reviewed-by: Mike Kravetz Cc: David Hildenbrand Cc: Muchun Song Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 8 ++++---- mm/gup.c | 14 +++++++++++++- mm/hugetlb.c | 27 +++++++++++++-------------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b9fbb6d4150e..955b19dc28a8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -174,8 +174,8 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, struct page *follow_huge_pd(struct vm_area_struct *vma, unsigned long address, hugepd_t hpd, int flags, int pdshift); -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int flags); +struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, + int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int flags); struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address, @@ -261,8 +261,8 @@ static inline struct page *follow_huge_pd(struct vm_area_struct *vma, return NULL; } -static inline struct page *follow_huge_pmd(struct mm_struct *mm, - unsigned long address, pmd_t *pmd, int flags) +static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, + unsigned long address, int flags) { return NULL; } diff --git a/mm/gup.c b/mm/gup.c index 6cb7d8ae56f6..fcd3f8ad81e6 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -405,6 +405,18 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == (FOLL_PIN | FOLL_GET))) return ERR_PTR(-EINVAL); + + /* + * Considering PTE level hugetlb, like continuous-PTE hugetlb on + * ARM64 architecture. + */ + if (is_vm_hugetlb_page(vma)) { + page = follow_huge_pmd_pte(vma, address, flags); + if (page) + return page; + return no_page_table(vma, flags); + } + retry: if (unlikely(pmd_bad(*pmd))) return no_page_table(vma, flags); @@ -560,7 +572,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, if (pmd_none(pmdval)) return no_page_table(vma, flags); if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) { - page = follow_huge_pmd(mm, address, pmd, flags); + page = follow_huge_pmd_pte(vma, address, flags); if (page) return page; return no_page_table(vma, flags); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4ecf9a0622df..5d6e590580df 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5596,12 +5596,13 @@ follow_huge_pd(struct vm_area_struct *vma, } struct page * __weak -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int flags) +follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags) { + struct hstate *h = hstate_vma(vma); + struct mm_struct *mm = vma->vm_mm; struct page *page = NULL; spinlock_t *ptl; - pte_t pte; + pte_t *ptep, pte; /* FOLL_GET and FOLL_PIN are mutually exclusive. */ if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) == @@ -5609,17 +5610,15 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; retry: - ptl = pmd_lockptr(mm, pmd); - spin_lock(ptl); - /* - * make sure that the address range covered by this pmd is not - * unmapped from other threads. - */ - if (!pmd_huge(*pmd)) - goto out; - pte = huge_ptep_get((pte_t *)pmd); + ptep = huge_pte_offset(mm, address, huge_page_size(h)); + if (!ptep) + return NULL; + + ptl = huge_pte_lock(h, mm, ptep); + pte = huge_ptep_get(ptep); if (pte_present(pte)) { - page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT); + page = pte_page(pte) + + ((address & ~huge_page_mask(h)) >> PAGE_SHIFT); /* * try_grab_page() should always succeed here, because: a) we * hold the pmd (ptl) lock, and b) we've just checked that the @@ -5635,7 +5634,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, } else { if (is_hugetlb_entry_migration(pte)) { spin_unlock(ptl); - __migration_entry_wait(mm, (pte_t *)pmd, ptl); + __migration_entry_wait(mm, ptep, ptl); goto retry; } /* -- Gitee From 7e7a7d287633a235fe9ce7c42307fe05498977bf Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 9 Aug 2022 14:35:06 +0300 Subject: [PATCH 033/243] devlink: Fix use-after-free after a failed reload stable inclusion from stable-5.10.138 commit 0e28678a770df7989108327cfe86f835d8760c33 category: bugfix issue: I5XV72 CVE: CVE-2022-3625 Signed-off-by: gaochao --------------------------------------- commit 6b4db2e528f650c7fb712961aac36455468d5902 upstream. After a failed devlink reload, devlink parameters are still registered, which means user space can set and get their values. In the case of the mlxsw "acl_region_rehash_interval" parameter, these operations will trigger a use-after-free [1]. Fix this by rejecting set and get operations while in the failed state. Return the "-EOPNOTSUPP" error code which does not abort the parameters dump, but instead causes it to skip over the problematic parameter. Another possible fix is to perform these checks in the mlxsw parameter callbacks, but other drivers might be affected by the same problem and I am not aware of scenarios where these stricter checks will cause a regression. [1] mlxsw_spectrum3 0000:00:10.0: Port 125: Failed to register netdev mlxsw_spectrum3 0000:00:10.0: Failed to create ports ================================================================== BUG: KASAN: use-after-free in mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get+0xbd/0xd0 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c:904 Read of size 4 at addr ffff8880099dcfd8 by task kworker/u4:4/777 CPU: 1 PID: 777 Comm: kworker/u4:4 Not tainted 5.19.0-rc7-custom-126601-gfe26f28c586d #1 Hardware name: QEMU MSN4700, BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: netns cleanup_net Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x92/0xbd lib/dump_stack.c:106 print_address_description mm/kasan/report.c:313 [inline] print_report.cold+0x5e/0x5cf mm/kasan/report.c:429 kasan_report+0xb9/0xf0 mm/kasan/report.c:491 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report_generic.c:306 mlxsw_sp_acl_tcam_vregion_rehash_intrvl_get+0xbd/0xd0 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c:904 mlxsw_sp_acl_region_rehash_intrvl_get+0x49/0x60 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c:1106 mlxsw_sp_params_acl_region_rehash_intrvl_get+0x33/0x80 drivers/net/ethernet/mellanox/mlxsw/spectrum.c:3854 devlink_param_get net/core/devlink.c:4981 [inline] devlink_nl_param_fill+0x238/0x12d0 net/core/devlink.c:5089 devlink_param_notify+0xe5/0x230 net/core/devlink.c:5168 devlink_ns_change_notify net/core/devlink.c:4417 [inline] devlink_ns_change_notify net/core/devlink.c:4396 [inline] devlink_reload+0x15f/0x700 net/core/devlink.c:4507 devlink_pernet_pre_exit+0x112/0x1d0 net/core/devlink.c:12272 ops_pre_exit_list net/core/net_namespace.c:152 [inline] cleanup_net+0x494/0xc00 net/core/net_namespace.c:582 process_one_work+0x9fc/0x1710 kernel/workqueue.c:2289 worker_thread+0x675/0x10b0 kernel/workqueue.c:2436 kthread+0x30c/0x3d0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 The buggy address belongs to the physical page: page:ffffea0000267700 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x99dc flags: 0x100000000000000(node=0|zone=1) raw: 0100000000000000 0000000000000000 dead000000000122 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880099dce80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff8880099dcf00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff8880099dcf80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff8880099dd000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff8880099dd080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Fixes: 98bbf70c1c41 ("mlxsw: spectrum: add "acl_region_rehash_interval" devlink param") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/devlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/devlink.c b/net/core/devlink.c index 646d90f63daf..72047750dcd9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3620,7 +3620,7 @@ static int devlink_param_get(struct devlink *devlink, const struct devlink_param *param, struct devlink_param_gset_ctx *ctx) { - if (!param->get) + if (!param->get || devlink->reload_failed) return -EOPNOTSUPP; return param->get(devlink, param->id, ctx); } @@ -3629,7 +3629,7 @@ static int devlink_param_set(struct devlink *devlink, const struct devlink_param *param, struct devlink_param_gset_ctx *ctx) { - if (!param->set) + if (!param->set || devlink->reload_failed) return -EOPNOTSUPP; return param->set(devlink, param->id, ctx); } -- Gitee From 82814d0e24586e40cdf0b427cc5329cc83317010 Mon Sep 17 00:00:00 2001 From: Dokyung Song Date: Thu, 3 Nov 2022 18:49:18 +0800 Subject: [PATCH 034/243] wifi: Fix potential buffer overflow in 'brcmf_fweh_event_worker' maillist inclusion category: bugfix ISSUE:I60AC2 CVE: CVE-2022-3628 Reference: https://patchwork.kernel.org/project/linux-wireless/patch/20221021061359.GA550858@laguna/ Signed-off-by: gaochao -------------------------------- This patch fixes an intra-object buffer overflow in brcmfmac that occurs when the device provides a 'bsscfgidx' equal to or greater than the buffer size. The patch adds a check that leads to a safe failure if that is the case. This fixes CVE-2022-3628. UBSAN: array-index-out-of-bounds in drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 52 is out of range for type 'brcmf_if *[16]' CPU: 0 PID: 1898 Comm: kworker/0:2 Tainted: G O 5.14.0+ #132 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Workqueue: events brcmf_fweh_event_worker Call Trace: dump_stack_lvl+0x57/0x7d ubsan_epilogue+0x5/0x40 __ubsan_handle_out_of_bounds+0x69/0x80 ? memcpy+0x39/0x60 brcmf_fweh_event_worker+0xae1/0xc00 ? brcmf_fweh_call_event_handler.isra.0+0x100/0x100 ? rcu_read_lock_sched_held+0xa1/0xd0 ? rcu_read_lock_bh_held+0xb0/0xb0 ? lockdep_hardirqs_on_prepare+0x273/0x3e0 process_one_work+0x873/0x13e0 ? lock_release+0x640/0x640 ? pwq_dec_nr_in_flight+0x320/0x320 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x8b/0xd10 ? __kthread_parkme+0xd9/0x1d0 ? process_one_work+0x13e0/0x13e0 kthread+0x379/0x450 ? _raw_spin_unlock_irq+0x24/0x30 ? set_kthread_struct+0x100/0x100 ret_from_fork+0x1f/0x30 ================================================================================ general protection fault, probably for non-canonical address 0xe5601c0020023fff: 0000 [#1] SMP KASAN KASAN: maybe wild-memory-access in range [0x2b0100010011fff8-0x2b0100010011ffff] CPU: 0 PID: 1898 Comm: kworker/0:2 Tainted: G O 5.14.0+ #132 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Workqueue: events brcmf_fweh_event_worker RIP: 0010:brcmf_fweh_call_event_handler.isra.0+0x42/0x100 Code: 89 f5 53 48 89 fb 48 83 ec 08 e8 79 0b 38 fe 48 85 ed 74 7e e8 6f 0b 38 fe 48 89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 8b 00 00 00 4c 8b 7d 00 44 89 e0 48 ba 00 00 00 RSP: 0018:ffffc9000259fbd8 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffff888115d8cd50 RCX: 0000000000000000 RDX: 0560200020023fff RSI: ffffffff8304bc91 RDI: ffff888115d8cd50 RBP: 2b0100010011ffff R08: ffff888112340050 R09: ffffed1023549809 R10: ffff88811aa4c047 R11: ffffed1023549808 R12: 0000000000000045 R13: ffffc9000259fca0 R14: ffff888112340050 R15: ffff888112340000 FS: 0000000000000000(0000) GS:ffff88811aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000004053ccc0 CR3: 0000000112740000 CR4: 0000000000750ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: brcmf_fweh_event_worker+0x117/0xc00 ? brcmf_fweh_call_event_handler.isra.0+0x100/0x100 ? rcu_read_lock_sched_held+0xa1/0xd0 ? rcu_read_lock_bh_held+0xb0/0xb0 ? lockdep_hardirqs_on_prepare+0x273/0x3e0 process_one_work+0x873/0x13e0 ? lock_release+0x640/0x640 ? pwq_dec_nr_in_flight+0x320/0x320 ? rwlock_bug.part.0+0x90/0x90 worker_thread+0x8b/0xd10 ? __kthread_parkme+0xd9/0x1d0 ? process_one_work+0x13e0/0x13e0 kthread+0x379/0x450 ? _raw_spin_unlock_irq+0x24/0x30 ? set_kthread_struct+0x100/0x100 ret_from_fork+0x1f/0x30 Modules linked in: 88XXau(O) 88x2bu(O) ---[ end trace 41d302138f3ff55a ]--- RIP: 0010:brcmf_fweh_call_event_handler.isra.0+0x42/0x100 Code: 89 f5 53 48 89 fb 48 83 ec 08 e8 79 0b 38 fe 48 85 ed 74 7e e8 6f 0b 38 fe 48 89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 8b 00 00 00 4c 8b 7d 00 44 89 e0 48 ba 00 00 00 RSP: 0018:ffffc9000259fbd8 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffff888115d8cd50 RCX: 0000000000000000 RDX: 0560200020023fff RSI: ffffffff8304bc91 RDI: ffff888115d8cd50 RBP: 2b0100010011ffff R08: ffff888112340050 R09: ffffed1023549809 R10: ffff88811aa4c047 R11: ffffed1023549808 R12: 0000000000000045 R13: ffffc9000259fca0 R14: ffff888112340050 R15: ffff888112340000 FS: 0000000000000000(0000) GS:ffff88811aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000004053ccc0 CR3: 0000000112740000 CR4: 0000000000750ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Reported-by: Dokyung Song Reported-by: Jisoo Jang Reported-by: Minsuk Kang Reviewed-by: Arend van Spriel Signed-off-by: Dokyung Song Signed-off-by: Liu Jian Reviewed-by: Yue Haibing Reviewed-by: Xiu Jianfeng Signed-off-by: Zheng Zengkai --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 430d2cca98b3..1285d3685c4f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c @@ -228,6 +228,10 @@ static void brcmf_fweh_event_worker(struct work_struct *work) brcmf_fweh_event_name(event->code), event->code, event->emsg.ifidx, event->emsg.bsscfgidx, event->emsg.addr); + if (event->emsg.bsscfgidx >= BRCMF_MAX_IFS) { + bphy_err(drvr, "invalid bsscfg index: %u\n", event->emsg.bsscfgidx); + goto event_free; + } /* convert event message */ emsg_be = &event->emsg; -- Gitee From f621a5e1a702fe2281e2e6a0bcb8e98c6403ee00 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Mon, 8 Aug 2022 11:04:47 -0700 Subject: [PATCH 035/243] vsock: Fix memory leak in vsock_connect() stable inclusion from stable-5.10.138 commit 38ddccbda5e8b762c8ee06670bb1f64f1be5ee50 category: bugfix issue: I5YPMI CVE: CVE-2022-3629 Signed-off-by: gaochao --------------------------------------- commit 7e97cfed9929eaabc41829c395eb0d1350fccb9d upstream. An O_NONBLOCK vsock_connect() request may try to reschedule @connect_work. Imagine the following sequence of vsock_connect() requests: 1. The 1st, non-blocking request schedules @connect_work, which will expire after 200 jiffies. Socket state is now SS_CONNECTING; 2. Later, the 2nd, blocking request gets interrupted by a signal after a few jiffies while waiting for the connection to be established. Socket state is back to SS_UNCONNECTED, but @connect_work is still pending, and will expire after 100 jiffies. 3. Now, the 3rd, non-blocking request tries to schedule @connect_work again. Since @connect_work is already scheduled, schedule_delayed_work() silently returns. sock_hold() is called twice, but sock_put() will only be called once in vsock_connect_timeout(), causing a memory leak reported by syzbot: BUG: memory leak unreferenced object 0xffff88810ea56a40 (size 1232): comm "syz-executor756", pid 3604, jiffies 4294947681 (age 12.350s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 28 00 07 40 00 00 00 00 00 00 00 00 00 00 00 00 (..@............ backtrace: [] sk_prot_alloc+0x3e/0x1b0 net/core/sock.c:1930 [] sk_alloc+0x32/0x2e0 net/core/sock.c:1989 [] __vsock_create.constprop.0+0x38/0x320 net/vmw_vsock/af_vsock.c:734 [] vsock_create+0xc1/0x2d0 net/vmw_vsock/af_vsock.c:2203 [] __sock_create+0x1ab/0x2b0 net/socket.c:1468 [] sock_create net/socket.c:1519 [inline] [] __sys_socket+0x6f/0x140 net/socket.c:1561 [] __do_sys_socket net/socket.c:1570 [inline] [] __se_sys_socket net/socket.c:1568 [inline] [] __x64_sys_socket+0x1a/0x20 net/socket.c:1568 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x35/0x80 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x44/0xae <...> Use mod_delayed_work() instead: if @connect_work is already scheduled, reschedule it, and undo sock_hold() to keep the reference count balanced. Reported-and-tested-by: syzbot+b03f55bf128f9a38f064@syzkaller.appspotmail.com Fixes: d021c344051a ("VSOCK: Introduce VM Sockets") Co-developed-by: Stefano Garzarella Signed-off-by: Stefano Garzarella Reviewed-by: Stefano Garzarella Signed-off-by: Peilin Ye Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/vmw_vsock/af_vsock.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7fe36dbcbe18..36488f952d9a 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1342,7 +1342,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, * timeout fires. */ sock_hold(sk); - schedule_delayed_work(&vsk->connect_work, timeout); + + /* If the timeout function is already scheduled, + * reschedule it, then ungrab the socket refcount to + * keep it balanced. + */ + if (mod_delayed_work(system_wq, &vsk->connect_work, + timeout)) + sock_put(sk); /* Skip ahead to preserve error code set above. */ goto out_wait; -- Gitee From db50a9bc6f75a2235d444702cb93ab03a038c74b Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Fri, 5 Aug 2022 18:02:16 +0300 Subject: [PATCH 036/243] can: j1939: j1939_session_destroy(): fix memory leak of skbs stable inclusion from stable-5.10.138 commit a220ff343396bae8d3b6abee72ab51f1f34b3027 category: bugfix issue: I5YPLT CVE: CVE-2022-3633 Signed-off-by: gaochao --------------------------------------- commit 8c21c54a53ab21842f5050fa090f26b03c0313d6 upstream. We need to drop skb references taken in j1939_session_skb_queue() when destroying a session in j1939_session_destroy(). Otherwise those skbs would be lost. Link to Syzkaller info and repro: https://forge.ispras.ru/issues/11743. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. V1: https://lore.kernel.org/all/20220708175949.539064-1-pchelkin@ispras.ru Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Suggested-by: Oleksij Rempel Signed-off-by: Fedor Pchelkin Signed-off-by: Alexey Khoroshilov Acked-by: Oleksij Rempel Link: https://lore.kernel.org/all/20220805150216.66313-1-pchelkin@ispras.ru Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/j1939/transport.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 9c39b0f5d6e0..2830a12a4dd1 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -260,6 +260,8 @@ static void __j1939_session_drop(struct j1939_session *session) static void j1939_session_destroy(struct j1939_session *session) { + struct sk_buff *skb; + if (session->err) j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT); else @@ -270,7 +272,11 @@ static void j1939_session_destroy(struct j1939_session *session) WARN_ON_ONCE(!list_empty(&session->sk_session_queue_entry)); WARN_ON_ONCE(!list_empty(&session->active_session_list_entry)); - skb_queue_purge(&session->skb_queue); + while ((skb = skb_dequeue(&session->skb_queue)) != NULL) { + /* drop ref taken in j1939_session_skb_queue() */ + skb_unref(skb); + kfree_skb(skb); + } __j1939_session_drop(session); j1939_priv_put(session->priv); kfree(session); -- Gitee From d06143856e46e7bd9f132892e001f9a6a467579c Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Fri, 5 Aug 2022 15:00:08 +0800 Subject: [PATCH 037/243] atm: idt77252: fix use-after-free bugs caused by tst_timer stable inclusion from stable-5.10.138 commit a0ae122e9aeccbff75014c4d36d11a9d32e7fb5e category: bugfix issue: I5YPMB CVE: CVE-2022-3635 Signed-off-by: gaochao --------------------------------------- commit 3f4093e2bf4673f218c0bf17d8362337c400e77b upstream. There are use-after-free bugs caused by tst_timer. The root cause is that there are no functions to stop tst_timer in idt77252_exit(). One of the possible race conditions is shown below: (thread 1) | (thread 2) | idt77252_init_one | init_card | fill_tst | mod_timer(&card->tst_timer, ...) idt77252_exit | (wait a time) | tst_timer | | ... kfree(card) // FREE | | card->soft_tst[e] // USE The idt77252_dev is deallocated in idt77252_exit() and used in timer handler. This patch adds del_timer_sync() in idt77252_exit() in order that the timer handler could be stopped before the idt77252_dev is deallocated. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Duoming Zhou Link: https://lore.kernel.org/r/20220805070008.18007-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/atm/idt77252.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 5f0472c18bcb..82f6f1fbe9e7 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3767,6 +3767,7 @@ static void __exit idt77252_exit(void) card = idt77252_chain; dev = card->atmdev; idt77252_chain = card->next; + del_timer_sync(&card->tst_timer); if (dev->phy->stop) dev->phy->stop(dev); -- Gitee From a6748667ed84fb24cb69281aba3f066dc53385bf Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 7 Oct 2022 17:52:26 +0900 Subject: [PATCH 038/243] nilfs2: fix leak of nilfs_root in case of writer thread creation failure mainline inclusion from mainline-6.1-rc1 commit d0d51a97063db4704a5ef6bc978dddab1636a306 category: bugfix issue: I5YPLN CVE: CVE-2022-3646 Signed-off-by: gaochao --------------------------------------- If nilfs_attach_log_writer() failed to create a log writer thread, it frees a data structure of the log writer without any cleanup. After commit e912a5b66837 ("nilfs2: use root object to get ifile"), this causes a leak of struct nilfs_root, which started to leak an ifile metadata inode and a kobject on that struct. In addition, if the kernel is booted with panic_on_warn, the above ifile metadata inode leak will cause the following panic when the nilfs2 kernel module is removed: kmem_cache_destroy nilfs2_inode_cache: Slab cache still has objects when called from nilfs_destroy_cachep+0x16/0x3a [nilfs2] WARNING: CPU: 8 PID: 1464 at mm/slab_common.c:494 kmem_cache_destroy+0x138/0x140 ... RIP: 0010:kmem_cache_destroy+0x138/0x140 Code: 00 20 00 00 e8 a9 55 d8 ff e9 76 ff ff ff 48 8b 53 60 48 c7 c6 20 70 65 86 48 c7 c7 d8 69 9c 86 48 8b 4c 24 28 e8 ef 71 c7 00 <0f> 0b e9 53 ff ff ff c3 48 81 ff ff 0f 00 00 77 03 31 c0 c3 53 48 ... Call Trace: ? nilfs_palloc_freev.cold.24+0x58/0x58 [nilfs2] nilfs_destroy_cachep+0x16/0x3a [nilfs2] exit_nilfs_fs+0xa/0x1b [nilfs2] __x64_sys_delete_module+0x1d9/0x3a0 ? __sanitizer_cov_trace_pc+0x1a/0x50 ? syscall_trace_enter.isra.19+0x119/0x190 do_syscall_64+0x34/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd ... Kernel panic - not syncing: panic_on_warn set ... This patch fixes these issues by calling nilfs_detach_log_writer() cleanup function if spawning the log writer thread fails. Link: https://lkml.kernel.org/r/20221007085226.57667-1-konishi.ryusuke@gmail.com Fixes: e912a5b66837 ("nilfs2: use root object to get ifile") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+7381dc4ad60658ca4c05@syzkaller.appspotmail.com Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index e3726aca28ed..6f5ccfc8819e 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2788,10 +2788,9 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL); err = nilfs_segctor_start_thread(nilfs->ns_writer); - if (err) { - kfree(nilfs->ns_writer); - nilfs->ns_writer = NULL; - } + if (unlikely(err)) + nilfs_detach_log_writer(sb); + return err; } -- Gitee From 48496867e7f3e9dea265e6af0f0867f69c5e9582 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 7 Sep 2022 10:26:18 +0200 Subject: [PATCH 039/243] netfilter: nfnetlink_osf: fix possible bogus match in nf_osf_find() stable inclusion from stable-5.10.146 commit 5d75fef3e61e797fab5c3fbba88caa74ab92ad47 category: bugfix issue: I5YPLH CVE: CVE-2022-42432 Signed-off-by: gaochao --------------------------------------- [ Upstream commit 559c36c5a8d730c49ef805a72b213d3bba155cc8 ] nf_osf_find() incorrectly returns true on mismatch, this leads to copying uninitialized memory area in nft_osf which can be used to leak stale kernel stack data to userspace. Fixes: 22c7652cdaa8 ("netfilter: nft_osf: Add version option support") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin --- net/netfilter/nfnetlink_osf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 79fbf37291f3..51e3953b414c 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -269,6 +269,7 @@ bool nf_osf_find(const struct sk_buff *skb, struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; struct tcphdr _tcph; + bool found = false; memset(&ctx, 0, sizeof(ctx)); @@ -283,10 +284,11 @@ bool nf_osf_find(const struct sk_buff *skb, data->genre = f->genre; data->version = f->version; + found = true; break; } - return true; + return found; } EXPORT_SYMBOL_GPL(nf_osf_find); -- Gitee From 866e22e0fdd1100ccb6e492a9cea6fd2c0cdd7ca Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 19 Sep 2022 14:59:57 -0700 Subject: [PATCH 040/243] usb: mon: make mmapped memory read only mainline inclusion from mainline-6.1-rc1 commit a659daf63d16aa883be42f3f34ff84235c302198 category: bugfix issue: I5Z39B CVE: CVE-2022-43750 Signed-off-by: gaochao --------------------------------------- Syzbot found an issue in usbmon module, where the user space client can corrupt the monitor's internal memory, causing the usbmon module to crash the kernel with segfault, UAF, etc. The reproducer mmaps the /dev/usbmon memory to user space, and overwrites it with arbitrary data, which causes all kinds of issues. Return an -EPERM error from mon_bin_mmap() if the flag VM_WRTIE is set. Also clear VM_MAYWRITE to make it impossible to change it to writable later. Cc: "Dmitry Vyukov" Cc: stable Fixes: 6f23ee1fefdc ("USB: add binary API to usbmon") Suggested-by: PaX Team # for the VM_MAYRITE portion Link: https://syzkaller.appspot.com/bug?id=2eb1f35d6525fa4a74d75b4244971e5b1411c95a Reported-by: syzbot+23f57c5ae902429285d7@syzkaller.appspotmail.com Signed-off-by: Tadeusz Struk Link: https://lore.kernel.org/r/20220919215957.205681-1-tadeusz.struk@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mon/mon_bin.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index f48a23adbc35..094e812e9e69 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -1268,6 +1268,11 @@ static int mon_bin_mmap(struct file *filp, struct vm_area_struct *vma) { /* don't do anything here: "fault" will set up page table entries */ vma->vm_ops = &mon_bin_vm_ops; + + if (vma->vm_flags & VM_WRITE) + return -EPERM; + + vma->vm_flags &= ~VM_MAYWRITE; vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; vma->vm_private_data = filp->private_data; mon_bin_vma_open(vma); -- Gitee From 82a6a74bb04803d0b38a7decd31b3730082feda0 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Thu, 3 Nov 2022 18:49:10 +0800 Subject: [PATCH 041/243] mm/memory.c: fix race when faulting a device private page MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.1-rc1 commit 16ce101db85db694a91380aa4c89b25530871d33 category: bugfix issue: I5XD82 CVE: CVE-2022-3523 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=16ce101db85db694a91380aa4c89b25530871d33 Signed-off-by: gaochao -------------------------------- Patch series "Fix several device private page reference counting issues", v2 This series aims to fix a number of page reference counting issues in drivers dealing with device private ZONE_DEVICE pages. These result in use-after-free type bugs, either from accessing a struct page which no longer exists because it has been removed or accessing fields within the struct page which are no longer valid because the page has been freed. During normal usage it is unlikely these will cause any problems. However without these fixes it is possible to crash the kernel from userspace. These crashes can be triggered either by unloading the kernel module or unbinding the device from the driver prior to a userspace task exiting. In modules such as Nouveau it is also possible to trigger some of these issues by explicitly closing the device file-descriptor prior to the task exiting and then accessing device private memory. This involves some minor changes to both PowerPC and AMD GPU code. Unfortunately I lack hardware to test either of those so any help there would be appreciated. The changes mimic what is done in for both Nouveau and hmm-tests though so I doubt they will cause problems. This patch (of 8): When the CPU tries to access a device private page the migrate_to_ram() callback associated with the pgmap for the page is called. However no reference is taken on the faulting page. Therefore a concurrent migration of the device private page can free the page and possibly the underlying pgmap. This results in a race which can crash the kernel due to the migrate_to_ram() function pointer becoming invalid. It also means drivers can't reliably read the zone_device_data field because the page may have been freed with memunmap_pages(). Close the race by getting a reference on the page while holding the ptl to ensure it has not been freed. Unfortunately the elevated reference count will cause the migration required to handle the fault to fail. To avoid this failure pass the faulting page into the migrate_vma functions so that if an elevated reference count is found it can be checked to see if it's expected or not. [mpe@ellerman.id.au: fix build] Link: https://lkml.kernel.org/r/87fsgbf3gh.fsf@mpe.ellerman.id.au Link: https://lkml.kernel.org/r/cover.60659b549d8509ddecafad4f498ee7f03bb23c69.1664366292.git-series.apopple@nvidia.com Link: https://lkml.kernel.org/r/d3e813178a59e565e8d78d9b9a4e2562f6494f90.1664366292.git-series.apopple@nvidia.com Signed-off-by: Alistair Popple Acked-by: Felix Kuehling Cc: Jason Gunthorpe Cc: John Hubbard Cc: Ralph Campbell Cc: Michael Ellerman Cc: Lyude Paul Cc: Alex Deucher Cc: Alex Sierra Cc: Ben Skeggs Cc: Christian König Cc: Dan Williams Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Matthew Wilcox Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton Conflicts: arch/powerpc/kvm/book3s_hv_uvmem.c include/linux/migrate.h lib/test_hmm.c mm/migrate.c Signed-off-by: Ma Wupeng Reviewed-by: tong tiangen Reviewed-by: Xiu Jianfeng Signed-off-by: Zheng Zengkai --- arch/powerpc/kvm/book3s_hv_uvmem.c | 19 ++++++++------ include/linux/migrate.h | 9 +++++++ lib/test_hmm.c | 5 ++-- mm/memory.c | 16 +++++++++++- mm/migrate.c | 42 ++++++++++++++++++++---------- 5 files changed, 66 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 84e5a2dc8be5..9168238860b8 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -504,10 +504,10 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm) static int __kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long page_shift, - struct kvm *kvm, unsigned long gpa) + struct kvm *kvm, unsigned long gpa, struct page *fault_page) { unsigned long src_pfn, dst_pfn = 0; - struct migrate_vma mig; + struct migrate_vma mig = { 0 }; struct page *dpage, *spage; struct kvmppc_uvmem_page_pvt *pvt; unsigned long pfn; @@ -521,6 +521,7 @@ static int __kvmppc_svm_page_out(struct vm_area_struct *vma, mig.dst = &dst_pfn; mig.pgmap_owner = &kvmppc_uvmem_pgmap; mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + mig.fault_page = fault_page; /* The requested page is already paged-out, nothing to do */ if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL)) @@ -576,12 +577,14 @@ static int __kvmppc_svm_page_out(struct vm_area_struct *vma, static inline int kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long page_shift, - struct kvm *kvm, unsigned long gpa) + struct kvm *kvm, unsigned long gpa, + struct page *fault_page) { int ret; mutex_lock(&kvm->arch.uvmem_lock); - ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa); + ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa, + fault_page); mutex_unlock(&kvm->arch.uvmem_lock); return ret; @@ -630,7 +633,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot, pvt->remove_gfn = true; if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE, - PAGE_SHIFT, kvm, pvt->gpa)) + PAGE_SHIFT, kvm, pvt->gpa, NULL)) pr_err("Can't page out gpa:0x%lx addr:0x%lx\n", pvt->gpa, addr); } else { @@ -733,7 +736,7 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma, bool pagein) { unsigned long src_pfn, dst_pfn = 0; - struct migrate_vma mig; + struct migrate_vma mig = { 0 }; struct page *spage; unsigned long pfn; struct page *dpage; @@ -991,7 +994,7 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf) if (kvmppc_svm_page_out(vmf->vma, vmf->address, vmf->address + PAGE_SIZE, PAGE_SHIFT, - pvt->kvm, pvt->gpa)) + pvt->kvm, pvt->gpa, vmf->page)) return VM_FAULT_SIGBUS; else return 0; @@ -1062,7 +1065,7 @@ kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa, if (!vma || vma->vm_start > start || vma->vm_end < end) goto out; - if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa)) + if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa, NULL)) ret = H_SUCCESS; out: mmap_read_unlock(kvm->mm); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 0f8d1583fa8e..a9de6d3ae07d 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -36,6 +36,9 @@ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); +extern int migrate_page_extra(struct address_space *mapping, + struct page *newpage, struct page *page, + enum migrate_mode mode, int extra_count); extern int migrate_page(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode); @@ -190,6 +193,12 @@ struct migrate_vma { */ void *pgmap_owner; unsigned long flags; + + /* + * Set to vmf->page if this is being called to migrate a page as part of + * a migrate_to_ram() callback. + */ + struct page *fault_page; }; int migrate_vma_setup(struct migrate_vma *args); diff --git a/lib/test_hmm.c b/lib/test_hmm.c index a85613068d60..58d1e8c41889 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -671,7 +671,7 @@ static int dmirror_migrate(struct dmirror *dmirror, unsigned long src_pfns[64]; unsigned long dst_pfns[64]; struct dmirror_bounce bounce; - struct migrate_vma args; + struct migrate_vma args = { 0 }; unsigned long next; int ret; @@ -1048,7 +1048,7 @@ static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args, static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) { - struct migrate_vma args; + struct migrate_vma args = { 0 }; unsigned long src_pfns; unsigned long dst_pfns; struct page *rpage; @@ -1071,6 +1071,7 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) args.dst = &dst_pfns; args.pgmap_owner = dmirror->mdevice; args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + args.fault_page = vmf->page; if (migrate_vma_setup(&args)) return VM_FAULT_SIGBUS; diff --git a/mm/memory.c b/mm/memory.c index 582776bec124..824bd57169d6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3329,7 +3329,21 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) vmf->address); } else if (is_device_private_entry(entry)) { vmf->page = device_private_entry_to_page(entry); - ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) { + spin_unlock(vmf->ptl); + goto out; + } + + /* + * Get a page reference while we know the page can't be + * freed. + */ + get_page(vmf->page); + pte_unmap_unlock(vmf->pte, vmf->ptl); + vmf->page->pgmap->ops->migrate_to_ram(vmf); + put_page(vmf->page); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; } else { diff --git a/mm/migrate.c b/mm/migrate.c index 278e6f3fa62c..0758aa3836f7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -690,21 +690,15 @@ EXPORT_SYMBOL(migrate_page_copy); * Migration functions ***********************************************************/ -/* - * Common logic to directly migrate a single LRU page suitable for - * pages that do not use PagePrivate/PagePrivate2. - * - * Pages are locked upon entry and exit. - */ -int migrate_page(struct address_space *mapping, +int migrate_page_extra(struct address_space *mapping, struct page *newpage, struct page *page, - enum migrate_mode mode) + enum migrate_mode mode, int extra_count) { int rc; BUG_ON(PageWriteback(page)); /* Writeback must be complete */ - rc = migrate_page_move_mapping(mapping, newpage, page, 0); + rc = migrate_page_move_mapping(mapping, newpage, page, extra_count); if (rc != MIGRATEPAGE_SUCCESS) return rc; @@ -715,6 +709,19 @@ int migrate_page(struct address_space *mapping, migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } + +/* + * Common logic to directly migrate a single LRU page suitable for + * pages that do not use PagePrivate/PagePrivate2. + * + * Pages are locked upon entry and exit. + */ +int migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, + enum migrate_mode mode) +{ + return migrate_page_extra(mapping, newpage, page, mode, 0); +} EXPORT_SYMBOL(migrate_page); #ifdef CONFIG_BLOCK @@ -2522,14 +2529,14 @@ static void migrate_vma_collect(struct migrate_vma *migrate) * migrate_page_move_mapping(), except that here we allow migration of a * ZONE_DEVICE page. */ -static bool migrate_vma_check_page(struct page *page) +static bool migrate_vma_check_page(struct page *page, struct page *fault_page) { /* * One extra ref because caller holds an extra reference, either from * isolate_lru_page() for a regular page, or migrate_vma_collect() for * a device page. */ - int extra = 1; + int extra = 1 + (page == fault_page); /* * FIXME support THP (transparent huge page), it is bit more complex to @@ -2637,7 +2644,7 @@ static void migrate_vma_prepare(struct migrate_vma *migrate) put_page(page); } - if (!migrate_vma_check_page(page)) { + if (!migrate_vma_check_page(page, migrate->fault_page)) { if (remap) { migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; migrate->cpages--; @@ -2705,7 +2712,7 @@ static void migrate_vma_unmap(struct migrate_vma *migrate) goto restore; } - if (migrate_vma_check_page(page)) + if (migrate_vma_check_page(page, migrate->fault_page)) continue; restore: @@ -2815,6 +2822,8 @@ int migrate_vma_setup(struct migrate_vma *args) return -EINVAL; if (!args->src || !args->dst) return -EINVAL; + if (args->fault_page && !is_device_private_page(args->fault_page)) + return -EINVAL; memset(args->src, 0, sizeof(*args->src) * nr_pages); args->cpages = 0; @@ -3045,7 +3054,12 @@ void migrate_vma_pages(struct migrate_vma *migrate) } } - r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY); + if (migrate->fault_page == page) + r = migrate_page_extra(mapping, newpage, page, + MIGRATE_SYNC_NO_COPY, 1); + else + r = migrate_page(mapping, newpage, page, + MIGRATE_SYNC_NO_COPY); if (r != MIGRATEPAGE_SUCCESS) migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; } -- Gitee From fabc1269c2156f35420ab740c9f61e2c6fe5f5c3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 21 May 2022 12:19:15 +0800 Subject: [PATCH 042/243] ipv6: annotate some data-races around sk->sk_prot mainline inclusion from mainline-v5.18-rc1 commit 086d49058cd8471046ae9927524708820f5fd1c7 category: bugfix ISSUE:I5YPMQ CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=086d49058cd8471046ae9927524708820f5fd1c7 Signed-off-by: gaochao -------------------------------- IPv6 has this hack changing sk->sk_prot when an IPv6 socket is 'converted' to an IPv4 one with IPV6_ADDRFORM option. This operation is only performed for TCP and UDP, knowing their 'struct proto' for the two network families are populated in the same way, and can not disappear while a reader might use and dereference sk->sk_prot. If we think about it all reads of sk->sk_prot while either socket lock or RTNL is not acquired should be using READ_ONCE(). Also note that other layers like MPTCP, XFRM, CHELSIO_TLS also write over sk->sk_prot. BUG: KCSAN: data-race in inet6_recvmsg / ipv6_setsockopt write to 0xffff8881386f7aa8 of 8 bytes by task 26932 on cpu 0: do_ipv6_setsockopt net/ipv6/ipv6_sockglue.c:492 [inline] ipv6_setsockopt+0x3758/0x3910 net/ipv6/ipv6_sockglue.c:1019 udpv6_setsockopt+0x85/0x90 net/ipv6/udp.c:1649 sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3489 __sys_setsockopt+0x209/0x2a0 net/socket.c:2180 __do_sys_setsockopt net/socket.c:2191 [inline] __se_sys_setsockopt net/socket.c:2188 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff8881386f7aa8 of 8 bytes by task 26911 on cpu 1: inet6_recvmsg+0x7a/0x210 net/ipv6/af_inet6.c:659 ____sys_recvmsg+0x16c/0x320 ___sys_recvmsg net/socket.c:2674 [inline] do_recvmmsg+0x3f5/0xae0 net/socket.c:2768 __sys_recvmmsg net/socket.c:2847 [inline] __do_sys_recvmmsg net/socket.c:2870 [inline] __se_sys_recvmmsg net/socket.c:2863 [inline] __x64_sys_recvmmsg+0xde/0x160 net/socket.c:2863 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0xffffffff85e0e980 -> 0xffffffff85e01580 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 26911 Comm: syz-executor.3 Not tainted 5.17.0-rc2-syzkaller-00316-g0457e5153e0e-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Reported-by: syzbot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller conflicts: net/ipv6/af_inet6.c Signed-off-by: Xu Jia Reviewed-by: Wei Yongjun Reviewed-by: Yue Haibing Signed-off-by: Zheng Zengkai --- net/ipv6/af_inet6.c | 23 +++++++++++++++++------ net/ipv6/ipv6_sockglue.c | 6 ++++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 090575346daf..8ba9dceb361f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -439,11 +439,13 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; + const struct proto *prot; int err = 0; /* If the socket has its own bind function then use it. */ - if (sk->sk_prot->bind) - return sk->sk_prot->bind(sk, uaddr, addr_len); + prot = READ_ONCE(sk->sk_prot); + if (prot->bind) + return prot->bind(sk, uaddr, addr_len); if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; @@ -550,6 +552,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; struct net *net = sock_net(sk); + const struct proto *prot; switch (cmd) { case SIOCADDRT: @@ -567,9 +570,11 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCSIFDSTADDR: return addrconf_set_dstaddr(net, argp); default: - if (!sk->sk_prot->ioctl) + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + if (!prot->ioctl) return -ENOIOCTLCMD; - return sk->sk_prot->ioctl(sk, cmd, arg); + return prot->ioctl(sk, cmd, arg); } /*NOTREACHED*/ return 0; @@ -631,11 +636,14 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; + const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; - return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, sk, msg, size); } @@ -645,13 +653,16 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; int addr_len = 0; int err; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); - err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, sk, msg, size, flags & MSG_DONTWAIT, flags & ~MSG_DONTWAIT, &addr_len); if (err >= 0) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index b19eedff6bf5..6cfb4042994e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -479,7 +479,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, &tcp_prot, 1); local_bh_enable(); - sk->sk_prot = &tcp_prot; + /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ + WRITE_ONCE(sk->sk_prot, &tcp_prot); /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific); sk->sk_socket->ops = &inet_stream_ops; @@ -494,7 +495,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, prot, 1); local_bh_enable(); - sk->sk_prot = prot; + /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ + WRITE_ONCE(sk->sk_prot, prot); sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } -- Gitee From cc50aedbba37454ad4f85b99ced5c50812fc2d96 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 27 Oct 2022 18:36:29 +0800 Subject: [PATCH 043/243] ipv6: Fix data races around sk->sk_prot. mainline inclusion from mainline-6.1-rc1 commit 364f997b5cfe1db0d63a390fe7c801fa2b3115f6 category: bugfix issue: I5YPMQ CVE: CVE-2022-3567 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=364f997b5cfe1db0d63a390fe7c801fa2b3115f6 Signed-off-by: gaochao --------------------------- Commit 086d49058cd8 ("ipv6: annotate some data-races around sk->sk_prot") fixed some data-races around sk->sk_prot but it was not enough. Some functions in inet6_(stream|dgram)_ops still access sk->sk_prot without lock_sock() or rtnl_lock(), so they need READ_ONCE() to avoid load tearing. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Jakub Kicinski Signed-off-by: Liu Jian Conflicts: net/ipv6/ipv6_sockglue.c Reviewed-by: Yue Haibing Reviewed-by: Xiu Jianfeng Signed-off-by: Zheng Zengkai --- net/core/sock.c | 6 ++++-- net/ipv4/af_inet.c | 23 ++++++++++++++++------- net/ipv6/ipv6_sockglue.c | 4 ++-- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index bee3c320dbfe..d75b3af50864 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3223,7 +3223,8 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; - return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_common_getsockopt); @@ -3250,7 +3251,8 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; - return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_common_setsockopt); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e2f85a16fad9..5f2eb6d95097 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -559,22 +559,27 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; int err; if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; + + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + if (uaddr->sa_family == AF_UNSPEC) - return sk->sk_prot->disconnect(sk, flags); + return prot->disconnect(sk, flags); if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { - err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); + err = prot->pre_connect(sk, uaddr, addr_len); if (err) return err; } if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; - return sk->sk_prot->connect(sk, uaddr, addr_len); + return prot->connect(sk, uaddr, addr_len); } EXPORT_SYMBOL(inet_dgram_connect); @@ -735,10 +740,11 @@ EXPORT_SYMBOL(inet_stream_connect); int inet_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { - struct sock *sk1 = sock->sk; + struct sock *sk1 = sock->sk, *sk2; int err = -EINVAL; - struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern); if (!sk2) goto do_err; @@ -823,12 +829,15 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; - if (sk->sk_prot->sendpage) - return sk->sk_prot->sendpage(sk, page, offset, size, flags); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + if (prot->sendpage) + return prot->sendpage(sk, page, offset, size, flags); return sock_no_sendpage(sock, page, offset, size, flags); } EXPORT_SYMBOL(inet_sendpage); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 6cfb4042994e..65f83ce728fa 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -479,7 +479,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, &tcp_prot, 1); local_bh_enable(); - /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ + /* Paired with READ_ONCE(sk->sk_prot) in inet6_stream_ops */ WRITE_ONCE(sk->sk_prot, &tcp_prot); /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific); @@ -495,7 +495,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, prot, 1); local_bh_enable(); - /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ + /* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */ WRITE_ONCE(sk->sk_prot, prot); sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; -- Gitee From 04773d48efa883dc87970e4dea37d3cb9036ff87 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 31 Oct 2022 16:10:52 -0700 Subject: [PATCH 044/243] Bluetooth: L2CAP: Fix attempting to access uninitialized memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.154 commit 26ca2ac091b49281d73df86111d16e5a76e43bd7 category: bugfix issue: I61IRV CVE: CVE-2022-42895 Signed-off-by: gaochao --------------------------------------- commit b1a2cd50c0357f243b7435a732b4e62ba3157a2e upstream. On l2cap_parse_conf_req the variable efs is only initialized if remote_efs has been set. CVE: CVE-2022-42895 CC: stable@vger.kernel.org Reported-by: Tamás Koczka Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Tedd Ho-Jeong An Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d840985501e2..b133ede71ec1 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3732,7 +3732,8 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), (unsigned long) &rfc, endptr - ptr); - if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { + if (remote_efs && + test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->remote_id = efs.id; chan->remote_stype = efs.stype; chan->remote_msdu = le16_to_cpu(efs.msdu); -- Gitee From 9d93e6a41dfbecb20e979dafae5e4b890ffb62eb Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 31 Oct 2022 16:10:32 -0700 Subject: [PATCH 045/243] Bluetooth: L2CAP: Fix accepting connection request for invalid SPSM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.154 commit 6b6f94fb9a74dd2891f11de4e638c6202bc89476 category: bugfix issue: I61IRG CVE: CVE-2022-42896 Signed-off-by: gaochao --------------------------------------- commit 711f8c3fb3db61897080468586b970c87c61d9e4 upstream. The Bluetooth spec states that the valid range for SPSM is from 0x0001-0x00ff so it is invalid to accept values outside of this range: BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A page 1059: Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges CVE: CVE-2022-42896 CC: stable@vger.kernel.org Reported-by: Tamás Koczka Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Tedd Ho-Jeong An Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/l2cap_core.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b133ede71ec1..46c00ad3f6e4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5767,6 +5767,19 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm), scid, mtu, mps); + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A + * page 1059: + * + * Valid range: 0x0001-0x00ff + * + * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges + */ + if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { + result = L2CAP_CR_LE_BAD_PSM; + chan = NULL; + goto response; + } + /* Check if we have socket listening on psm */ pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src, &conn->hcon->dst, LE_LINK); @@ -5946,6 +5959,18 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, psm = req->psm; + /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A + * page 1059: + * + * Valid range: 0x0001-0x00ff + * + * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges + */ + if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) { + result = L2CAP_CR_LE_BAD_PSM; + goto response; + } + BT_DBG("psm 0x%2.2x mtu %u mps %u", __le16_to_cpu(psm), mtu, mps); memset(&pdu, 0, sizeof(pdu)); -- Gitee From b7f26c41206fe5cd868e6853829e0407844bc913 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 1 Sep 2022 16:16:10 +0100 Subject: [PATCH 046/243] io_uring: disable polling pollfree files stable inclusion from stable-5.10.141 commit 28d8d2737e82fc29ff9e788597661abecc7f7994 category: bugfix issue: I62LR8 CVE: CVE-2022-3176 Signed-off-by: gaochao --------------------------------------- Older kernels lack io_uring POLLFREE handling. As only affected files are signalfd and android binder the safest option would be to disable polling those files via io_uring and hope there are no users. Fixes: 221c5eb233823 ("io_uring: add support for IORING_OP_POLL") Signed-off-by: Pavel Begunkov Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 1 + fs/io_uring.c | 5 +++++ fs/signalfd.c | 1 + include/linux/fs.h | 1 + 4 files changed, 8 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 60dba1f2eb16..e310641c78eb 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -6190,6 +6190,7 @@ const struct file_operations binder_fops = { .open = binder_open, .flush = binder_flush, .release = binder_release, + .may_pollfree = true, }; #ifdef CONFIG_BINDER_TRANSACTION_PROC_BRIEF diff --git a/fs/io_uring.c b/fs/io_uring.c index e41956f2fc01..d3df30098241 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5185,6 +5185,11 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, struct io_ring_ctx *ctx = req->ctx; bool cancel = false; + if (req->file->f_op->may_pollfree) { + spin_lock_irq(&ctx->completion_lock); + return -EOPNOTSUPP; + } + INIT_HLIST_NODE(&req->hash_node); io_init_poll_iocb(poll, mask, wake_func); poll->file = req->file; diff --git a/fs/signalfd.c b/fs/signalfd.c index b94fb5f81797..41dc597b78cc 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -248,6 +248,7 @@ static const struct file_operations signalfd_fops = { .poll = signalfd_poll, .read = signalfd_read, .llseek = noop_llseek, + .may_pollfree = true, }; static int do_signalfd4(int ufd, sigset_t *mask, int flags) diff --git a/include/linux/fs.h b/include/linux/fs.h index b7f42d3dce26..3e548c0d3e07 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1860,6 +1860,7 @@ struct file_operations { struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); + bool may_pollfree; } __randomize_layout; struct inode_operations { -- Gitee From 1d22d2f7845b627b8aabff2b426ea900daf33476 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 22 Sep 2022 08:13:47 -0700 Subject: [PATCH 047/243] nvme: ensure subsystem reset is single threaded mainline inclusion from mainline-6.1-rc1 commit 1e866afd4bcdd01a70a5eddb4371158d3035ce03 category: bugfix issue: I5VMGU CVE: CVE-2022-3169 Signed-off-by: gaochao --------------------------------------- The subsystem reset writes to a register, so we have to ensure the device state is capable of handling that otherwise the driver may access unmapped registers. Use the state machine to ensure the subsystem reset doesn't try to write registers on a device already undergoing this type of reset. Link: https://bugzilla.kernel.org/show_bug.cgi?id=214771 Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ad46208eac76..160c23d12a54 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -538,11 +538,23 @@ static inline void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inj) static inline void nvme_should_fail(struct request *req) {} #endif +bool nvme_wait_reset(struct nvme_ctrl *ctrl); +int nvme_try_sched_reset(struct nvme_ctrl *ctrl); + static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) { + int ret; + if (!ctrl->subsystem) return -ENOTTY; - return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); + if (!nvme_wait_reset(ctrl)) + return -EBUSY; + + ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); + if (ret) + return ret; + + return nvme_try_sched_reset(ctrl); } /* @@ -629,7 +641,6 @@ void nvme_cancel_tagset(struct nvme_ctrl *ctrl); void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); -bool nvme_wait_reset(struct nvme_ctrl *ctrl); int nvme_disable_ctrl(struct nvme_ctrl *ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); @@ -682,7 +693,6 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); -int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, -- Gitee From 167f3595b39cc305b8b952d3702a37e8faa5f16b Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 8 Dec 2022 21:45:20 +0800 Subject: [PATCH 048/243] l2tp: Serialize access to sk_user_data with sk_callback_lock mainline inclusion from mainline-v6.1-rc6 commit b68777d54fac21fc833ec26ea1a2a84f975ab035 category: bugfix ISSUE:I656U4 CVE: CVE-2022-4129 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b68777d54fac21fc833ec26ea1a2a84f975ab035 Signed-off-by: gaochao -------------------------------- sk->sk_user_data has multiple users, which are not compatible with each other. Writers must synchronize by grabbing the sk->sk_callback_lock. l2tp currently fails to grab the lock when modifying the underlying tunnel socket fields. Fix it by adding appropriate locking. We err on the side of safety and grab the sk_callback_lock also inside the sk_destruct callback overridden by l2tp, even though there should be no refs allowing access to the sock at the time when sk_destruct gets called. v4: - serialize write to sk_user_data in l2tp sk_destruct v3: - switch from sock lock to sk_callback_lock - document write-protection for sk_user_data v2: - update Fixes to point to origin of the bug - use real names in Reported/Tested-by tags Cc: Tom Parkin Fixes: 3557baabf280 ("[L2TP]: PPP over L2TP driver core") Reported-by: Haowei Yan Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller Signed-off-by: Lu Wei Reviewed-by: Yue Haibing Reviewed-by: Wang Weiyang Signed-off-by: Zheng Zengkai --- include/net/sock.h | 2 +- net/l2tp/l2tp_core.c | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 477e7bc6831e..7ba93f164d6b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -326,7 +326,7 @@ struct bpf_local_storage; * @sk_tskey: counter to disambiguate concurrent tstamp requests * @sk_zckey: counter to order MSG_ZEROCOPY notifications * @sk_socket: Identd and reporting IO signals - * @sk_user_data: RPC layer private data + * @sk_user_data: RPC layer private data. Write-protected by @sk_callback_lock. * @sk_frag: cached page frag * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index dc8987ed08ad..e89852bc5309 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1150,8 +1150,10 @@ static void l2tp_tunnel_destruct(struct sock *sk) } /* Remove hooks into tunnel socket */ + write_lock_bh(&sk->sk_callback_lock); sk->sk_destruct = tunnel->old_sk_destruct; sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); /* Call the original destructor */ if (sk->sk_destruct) @@ -1471,16 +1473,18 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, sock = sockfd_lookup(tunnel->fd, &ret); if (!sock) goto err; - - ret = l2tp_validate_socket(sock->sk, net, tunnel->encap); - if (ret < 0) - goto err_sock; } + sk = sock->sk; + write_lock(&sk->sk_callback_lock); + + ret = l2tp_validate_socket(sk, net, tunnel->encap); + if (ret < 0) + goto err_sock; + tunnel->l2tp_net = net; pn = l2tp_pernet(net); - sk = sock->sk; sock_hold(sk); tunnel->sock = sk; @@ -1506,7 +1510,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, setup_udp_tunnel_sock(net, sock, &udp_cfg); } else { - sk->sk_user_data = tunnel; + rcu_assign_sk_user_data(sk, tunnel); } tunnel->old_sk_destruct = sk->sk_destruct; @@ -1520,6 +1524,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, if (tunnel->fd >= 0) sockfd_put(sock); + write_unlock(&sk->sk_callback_lock); return 0; err_sock: @@ -1527,6 +1532,8 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, sock_release(sock); else sockfd_put(sock); + + write_unlock(&sk->sk_callback_lock); err: return ret; } -- Gitee From 79e4b1648c003332e79952388c3e00ee4344078a Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 8 Dec 2022 21:45:21 +0800 Subject: [PATCH 049/243] l2tp: Don't sleep and disable BH under writer-side sk_callback_lock mainline inclusion from mainline-v6.1-rc7 commit af295e854a4e3813ffbdef26dbb6a4d6226c3ea1 category: bugfix ISSUE:I656U4 CVE: CVE-2022-4129 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=af295e854a4e3813ffbdef26dbb6a4d6226c3ea1 Signed-off-by: gaochao -------------------------------- When holding a reader-writer spin lock we cannot sleep. Calling setup_udp_tunnel_sock() with write lock held violates this rule, because we end up calling percpu_down_read(), which might sleep, as syzbot reports [1]: __might_resched.cold+0x222/0x26b kernel/sched/core.c:9890 percpu_down_read include/linux/percpu-rwsem.h:49 [inline] cpus_read_lock+0x1b/0x140 kernel/cpu.c:310 static_key_slow_inc+0x12/0x20 kernel/jump_label.c:158 udp_tunnel_encap_enable include/net/udp_tunnel.h:187 [inline] setup_udp_tunnel_sock+0x43d/0x550 net/ipv4/udp_tunnel_core.c:81 l2tp_tunnel_register+0xc51/0x1210 net/l2tp/l2tp_core.c:1509 pppol2tp_connect+0xcdc/0x1a10 net/l2tp/l2tp_ppp.c:723 Trim the writer-side critical section for sk_callback_lock down to the minimum, so that it covers only operations on sk_user_data. Also, when grabbing the sk_callback_lock, we always need to disable BH, as Eric points out. Failing to do so leads to deadlocks because we acquire sk_callback_lock in softirq context, which can get stuck waiting on us if: 1) it runs on the same CPU, or CPU0 ---- lock(clock-AF_INET6); lock(clock-AF_INET6); 2) lock ordering leads to priority inversion CPU0 CPU1 ---- ---- lock(clock-AF_INET6); local_irq_disable(); lock(&tcp_hashinfo.bhash[i].lock); lock(clock-AF_INET6); lock(&tcp_hashinfo.bhash[i].lock); ... as syzbot reports [2,3]. Use the _bh variants for write_(un)lock. [1] https://lore.kernel.org/netdev/0000000000004e78ec05eda79749@google.com/ [2] https://lore.kernel.org/netdev/000000000000e38b6605eda76f98@google.com/ [3] https://lore.kernel.org/netdev/000000000000dfa31e05eda76f75@google.com/ v2: - Check and set sk_user_data while holding sk_callback_lock for both L2TP encapsulation types (IP and UDP) (Tetsuo) Cc: Tom Parkin Cc: Tetsuo Handa Fixes: b68777d54fac ("l2tp: Serialize access to sk_user_data with sk_callback_lock") Reported-by: Eric Dumazet Reported-by: syzbot+703d9e154b3b58277261@syzkaller.appspotmail.com Reported-by: syzbot+50680ced9e98a61f7698@syzkaller.appspotmail.com Reported-by: syzbot+de987172bb74a381879b@syzkaller.appspotmail.com Signed-off-by: Jakub Sitnicki Signed-off-by: David S. Miller Signed-off-by: Lu Wei Reviewed-by: Yue Haibing Reviewed-by: Wang Weiyang Signed-off-by: Zheng Zengkai --- net/l2tp/l2tp_core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index e89852bc5309..d6bb1795329a 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1476,11 +1476,12 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, } sk = sock->sk; - write_lock(&sk->sk_callback_lock); - + write_lock_bh(&sk->sk_callback_lock); ret = l2tp_validate_socket(sk, net, tunnel->encap); if (ret < 0) - goto err_sock; + goto err_inval_sock; + rcu_assign_sk_user_data(sk, tunnel); + write_unlock_bh(&sk->sk_callback_lock); tunnel->l2tp_net = net; pn = l2tp_pernet(net); @@ -1509,8 +1510,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, }; setup_udp_tunnel_sock(net, sock, &udp_cfg); - } else { - rcu_assign_sk_user_data(sk, tunnel); } tunnel->old_sk_destruct = sk->sk_destruct; @@ -1524,16 +1523,18 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, if (tunnel->fd >= 0) sockfd_put(sock); - write_unlock(&sk->sk_callback_lock); return 0; err_sock: + write_lock_bh(&sk->sk_callback_lock); + rcu_assign_sk_user_data(sk, NULL); +err_inval_sock: + write_unlock_bh(&sk->sk_callback_lock); + if (tunnel->fd < 0) sock_release(sock); else sockfd_put(sock); - - write_unlock(&sk->sk_callback_lock); err: return ret; } -- Gitee From edc7502e1196199cee06bb22b6af0f00fe71977d Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 16 Dec 2022 17:40:17 +0800 Subject: [PATCH 050/243] proc: avoid integer type confusion in get_proc_long stable inclusion from stable-v5.10.157 commit 4aa32aaef6c1b5e39ae2508ec596bd7b67871043 category: bugfix ISSUE:I66BMO CVE: CVE-2022-4378 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4aa32aaef6c1b5e39ae2508ec596bd7b67871043 Signed-off-by: gaochao -------------------------------- commit e6cfaf34be9fcd1a8285a294e18986bfc41a409c upstream. proc_get_long() is passed a size_t, but then assigns it to an 'int' variable for the length. Let's not do that, even if our IO paths are limited to MAX_RW_COUNT (exactly because of these kinds of type errors). So do the proper test in the rigth type. Reported-by: Kyle Zeng Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: Long Li Reviewed-by: Zhang Yi Reviewed-by: Xiu Jianfeng Signed-off-by: Zheng Zengkai --- kernel/sysctl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a5be63fcb4ca..7acfa131d7eb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -466,13 +466,12 @@ static int proc_get_long(char **buf, size_t *size, unsigned long *val, bool *neg, const char *perm_tr, unsigned perm_tr_len, char *tr) { - int len; char *p, tmp[TMPBUFLEN]; + ssize_t len = *size; - if (!*size) + if (len <= 0) return -EINVAL; - len = *size; if (len > TMPBUFLEN - 1) len = TMPBUFLEN - 1; -- Gitee From bac68a79d8e754a142f9645314ac01718294e212 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 16 Dec 2022 17:40:18 +0800 Subject: [PATCH 051/243] proc: proc_skip_spaces() shouldn't think it is working on C strings stable inclusion from stable-v5.10.157 commit 9ba389863ac63032d4b6ffad2c90a62cd78082ee category: bugfix ISSUE:I66BMO CVE: CVE-2022-4378 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9ba389863ac63032d4b6ffad2c90a62cd78082ee Signed-off-by: gaochao -------------------------------- commit bce9332220bd677d83b19d21502776ad555a0e73 upstream. proc_skip_spaces() seems to think it is working on C strings, and ends up being just a wrapper around skip_spaces() with a really odd calling convention. Instead of basing it on skip_spaces(), it should have looked more like proc_skip_char(), which really is the exact same function (except it skips a particular character, rather than whitespace). So use that as inspiration, odd coding and all. Now the calling convention actually makes sense and works for the intended purpose. Reported-and-tested-by: Kyle Zeng Acked-by: Eric Dumazet Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: Long Li Reviewed-by: Zhang Yi Reviewed-by: Xiu Jianfeng Signed-off-by: Zheng Zengkai --- kernel/sysctl.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7acfa131d7eb..7c25df8e9e2e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -391,13 +391,14 @@ int proc_dostring(struct ctl_table *table, int write, ppos); } -static size_t proc_skip_spaces(char **buf) +static void proc_skip_spaces(char **buf, size_t *size) { - size_t ret; - char *tmp = skip_spaces(*buf); - ret = tmp - *buf; - *buf = tmp; - return ret; + while (*size) { + if (!isspace(**buf)) + break; + (*size)--; + (*buf)++; + } } static void proc_skip_char(char **buf, size_t *size, const char v) @@ -629,7 +630,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, bool neg; if (write) { - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) break; @@ -656,7 +657,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, if (!write && !first && left && !err) proc_put_char(&buffer, &left, '\n'); if (write && !err && left) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (write && first) return err ? : -EINVAL; *lenp -= left; @@ -698,7 +699,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, if (left > PAGE_SIZE - 1) left = PAGE_SIZE - 1; - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) { err = -EINVAL; goto out_free; @@ -718,7 +719,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, } if (!err && left) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); out_free: if (err) @@ -1176,7 +1177,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, if (write) { bool neg; - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) break; @@ -1204,7 +1205,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, if (!write && !first && left && !err) proc_put_char(&buffer, &left, '\n'); if (write && !err) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (write && first) return err ? : -EINVAL; *lenp -= left; -- Gitee From fd5edccebafc3c1ed07e566502d654842c67b8ee Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Thu, 8 Dec 2022 21:45:19 +0800 Subject: [PATCH 052/243] Bluetooth: L2CAP: Fix u8 overflow maillist inclusion category: bugfix ISSUE:I63XMP CVE: CVE-2022-45934 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=ae4569813a6e931258db627cdfe50dfb4f917d5d Signed-off-by: gaochao -------------------------------- By keep sending L2CAP_CONF_REQ packets, chan->num_conf_rsp increases multiple times and eventually it will wrap around the maximum number (i.e., 255). This patch prevents this by adding a boundary check with L2CAP_MAX_CONF_RSP Btmon log: Bluetooth monitor ver 5.64 = Note: Linux version 6.1.0-rc2 (x86_64) 0.264594 = Note: Bluetooth subsystem version 2.22 0.264636 @ MGMT Open: btmon (privileged) version 1.22 {0x0001} 0.272191 = New Index: 00:00:00:00:00:00 (Primary,Virtual,hci0) [hci0] 13.877604 @ RAW Open: 9496 (privileged) version 2.22 {0x0002} 13.890741 = Open Index: 00:00:00:00:00:00 [hci0] 13.900426 (...) > ACL Data RX: Handle 200 flags 0x00 dlen 1033 #32 [hci0] 14.273106 invalid packet size (12 != 1033) 08 00 01 00 02 01 04 00 01 10 ff ff ............ > ACL Data RX: Handle 200 flags 0x00 dlen 1547 #33 [hci0] 14.273561 invalid packet size (14 != 1547) 0a 00 01 00 04 01 06 00 40 00 00 00 00 00 ........@..... > ACL Data RX: Handle 200 flags 0x00 dlen 2061 #34 [hci0] 14.274390 invalid packet size (16 != 2061) 0c 00 01 00 04 01 08 00 40 00 00 00 00 00 00 04 ........@....... > ACL Data RX: Handle 200 flags 0x00 dlen 2061 #35 [hci0] 14.274932 invalid packet size (16 != 2061) 0c 00 01 00 04 01 08 00 40 00 00 00 07 00 03 00 ........@....... = bluetoothd: Bluetooth daemon 5.43 14.401828 > ACL Data RX: Handle 200 flags 0x00 dlen 1033 #36 [hci0] 14.275753 invalid packet size (12 != 1033) 08 00 01 00 04 01 04 00 40 00 00 00 ........@... Signed-off-by: Sungwoo Kim Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Baisong Zhong Reviewed-by: Liu Jian Reviewed-by: Yue Haibing Reviewed-by: Xiu Jianfeng Signed-off-by: Zheng Zengkai --- net/bluetooth/l2cap_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 46c00ad3f6e4..60da4a550c96 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4414,7 +4414,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, chan->ident = cmd->ident; l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); - chan->num_conf_rsp++; + if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP) + chan->num_conf_rsp++; /* Reset config buffer. */ chan->conf_len = 0; -- Gitee From 0e3037fae10a23adb80d0ef18f0759382ab4e3f7 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 23 Nov 2022 10:35:40 -0500 Subject: [PATCH 053/243] wifi: wilc1000: validate pairwise and authentication suite offsets stable inclusion from stable-5.10.157 commit 7c6535fb4d67ea37c98a1d1d24ca33dd5ec42693 category: bugfix issue: I67191 CVE: CVE-2022-47520 Signed-off-by: gaochao --------------------------------------- commit cd21d99e595ec1d8721e1058dcdd4f1f7de1d793 upstream. There is no validation of 'offset' which can trigger an out-of-bounds read when extracting RSN capabilities. Signed-off-by: Phil Turnbull Tested-by: Ajay Kathat Acked-by: Ajay Kathat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221123153543.8568-2-philipturnbull@github.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/microchip/wilc1000/hif.c | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index d025a3093015..b25847799138 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -467,14 +467,25 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss, rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); if (rsn_ie) { + int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8; - param->mode_802_11i = 2; - param->rsn_found = true; /* extract RSN capabilities */ - offset += (rsn_ie[offset] * 4) + 2; - offset += (rsn_ie[offset] * 4) + 2; - memcpy(param->rsn_cap, &rsn_ie[offset], 2); + if (offset < rsn_ie_len) { + /* skip over pairwise suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset < rsn_ie_len) { + /* skip over authentication suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset + 1 < rsn_ie_len) { + param->mode_802_11i = 2; + param->rsn_found = true; + memcpy(param->rsn_cap, &rsn_ie[offset], 2); + } + } + } } if (param->rsn_found) { -- Gitee From e82bfd6f18fde4a576eac3da4ec4bf3f35ba4ebc Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 23 Nov 2022 10:35:41 -0500 Subject: [PATCH 054/243] wifi: wilc1000: validate length of IEEE80211_P2P_ATTR_OPER_CHANNEL attribute stable inclusion from stable-5.10.157 commit 905f886eae4b065656a575e8a02544045cbaadcf category: bugfix issue: I673ZZ CVE: CVE-2022-47519 Signed-off-by: gaochao --------------------------------------- commit 051ae669e4505abbe05165bebf6be7922de11f41 upstream. Validate that the IEEE80211_P2P_ATTR_OPER_CHANNEL attribute contains enough space for a 'struct struct wilc_attr_oper_ch'. If the attribute is too small then it triggers an out-of-bounds write later in the function. Signed-off-by: Phil Turnbull Tested-by: Ajay Kathat Acked-by: Ajay Kathat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221123153543.8568-3-philipturnbull@github.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 6be5ac8ba518..b42e9eb2631c 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -939,14 +939,24 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) return; while (index + sizeof(*e) <= len) { + u16 attr_size; + e = (struct wilc_attr_entry *)&buf[index]; + attr_size = le16_to_cpu(e->attr_len); + + if (index + sizeof(*e) + attr_size > len) + return; + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) ch_list_idx = index; - else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL) + else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && + attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) op_ch_idx = index; + if (ch_list_idx && op_ch_idx) break; - index += le16_to_cpu(e->attr_len) + sizeof(*e); + + index += sizeof(*e) + attr_size; } if (ch_list_idx) { -- Gitee From fee054bfdc90cfcf2c7935ef7bcd0b8c93b0c0b1 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 23 Nov 2022 10:35:42 -0500 Subject: [PATCH 055/243] wifi: wilc1000: validate length of IEEE80211_P2P_ATTR_CHANNEL_LIST attribute stable inclusion from stable-5.10.157 commit 5a068535c0073c8402aa0755e8ef259fb98a33c5 category: bugfix issue: I6718W CVE: CVE-2022-47521 Signed-off-by: gaochao --------------------------------------- commit f9b62f9843c7b0afdaecabbcebf1dbba18599408 upstream. Validate that the IEEE80211_P2P_ATTR_CHANNEL_LIST attribute contains enough space for a 'struct wilc_attr_oper_ch'. If the attribute is too small then it can trigger an out-of-bounds write later in the function. 'struct wilc_attr_oper_ch' is variable sized so also check 'attr_len' does not extend beyond the end of 'buf'. Signed-off-by: Phil Turnbull Tested-by: Ajay Kathat Acked-by: Ajay Kathat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221123153543.8568-4-philipturnbull@github.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index b42e9eb2631c..df03d4d71a5c 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -947,7 +947,8 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) if (index + sizeof(*e) + attr_size > len) return; - if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST && + attr_size >= (sizeof(struct wilc_attr_ch_list) - sizeof(*e))) ch_list_idx = index; else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) -- Gitee From 88a0ddf6618386a861b7c88291b44f4f35e9613c Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 23 Nov 2022 10:35:43 -0500 Subject: [PATCH 056/243] wifi: wilc1000: validate number of channels stable inclusion from stable-5.10.157 commit 3eb6b89a4e9f9e44c3170d70d8d16c3c8dc8c800 category: bugfix issue: I673ZW CVE: CVE-2022-47518 Signed-off-by: gaochao --------------------------------------- commit 0cdfa9e6f0915e3d243e2393bfa8a22e12d553b0 upstream. There is no validation of 'e->no_of_channels' which can trigger an out-of-bounds write in the following 'memset' call. Validate that the number of channels does not extends beyond the size of the channel list element. Signed-off-by: Phil Turnbull Tested-by: Ajay Kathat Acked-by: Ajay Kathat Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221123153543.8568-5-philipturnbull@github.com Signed-off-by: Greg Kroah-Hartman --- .../wireless/microchip/wilc1000/cfg80211.c | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index df03d4d71a5c..dd26f2086180 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -961,19 +961,30 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) } if (ch_list_idx) { - u16 attr_size; - struct wilc_ch_list_elem *e; - int i; + unsigned int i; + u16 elem_size; ch_list = (struct wilc_attr_ch_list *)&buf[ch_list_idx]; - attr_size = le16_to_cpu(ch_list->attr_len); - for (i = 0; i < attr_size;) { + /* the number of bytes following the final 'elem' member */ + elem_size = le16_to_cpu(ch_list->attr_len) - + (sizeof(*ch_list) - sizeof(struct wilc_attr_entry)); + for (i = 0; i < elem_size;) { + struct wilc_ch_list_elem *e; + e = (struct wilc_ch_list_elem *)(ch_list->elem + i); + + i += sizeof(*e); + if (i > elem_size) + break; + + i += e->no_of_channels; + if (i > elem_size) + break; + if (e->op_class == WILC_WLAN_OPERATING_CLASS_2_4GHZ) { memset(e->ch_list, sta_ch, e->no_of_channels); break; } - i += e->no_of_channels; } } -- Gitee From 0294073103df75550cad7e1c18296f25cece5594 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 5 Jan 2022 17:09:43 +0800 Subject: [PATCH 057/243] drm/amdkfd: Check for null pointer after calling kmemdup mainline inclusion from mainline-5.17-rc1 commit abfaf0eee97925905e742aa3b0b72e04a918fa9e category: bugfix issue: I66OGA CVE: CVE-2022-3108 Signed-off-by: gaochao --------------------------------------- As the possible failure of the allocation, kmemdup() may return NULL pointer. Therefore, it should be better to check the 'props2' in order to prevent the dereference of NULL pointer. Fixes: 3a87177eb141 ("drm/amdkfd: Add topology support for dGPUs") Signed-off-by: Jiasheng Jiang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 31d793ee0836..6866fa94df97 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -408,6 +408,9 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, return -ENODEV; /* same everything but the other direction */ props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); + if (!props2) + return -ENOMEM; + props2->node_from = id_to; props2->node_to = id_from; props2->kobj = NULL; -- Gitee From 093c953e59c1f968558621463b119e526c6a3e19 Mon Sep 17 00:00:00 2001 From: gc1202 Date: Fri, 30 Dec 2022 11:45:26 +0800 Subject: [PATCH 058/243] access_tokenid : fix task cred leak in check_permission_for_set_tokenid ohos inclusion category: bugfix issue: I684DP CVE: NO Signed-off-by: gaochao --------------------------------------- in check_permission_for_set_tokenid() called get_task_cred() without put_cred(), change get_task_cred() to current_uid() to solve this. --- drivers/accesstokenid/access_tokenid.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/accesstokenid/access_tokenid.c b/drivers/accesstokenid/access_tokenid.c index c83fc114adbd..e7bae802c531 100644 --- a/drivers/accesstokenid/access_tokenid.c +++ b/drivers/accesstokenid/access_tokenid.c @@ -8,7 +8,6 @@ #define pr_fmt(fmt) "access_token_id: " fmt -#include #include #include #include @@ -24,7 +23,7 @@ int access_tokenid_get_tokenid(struct file *file, void __user *uarg) static bool check_permission_for_set_tokenid(struct file *file) { - const struct cred *cred = get_task_cred(current); + kuid_t uid = current_uid(); struct inode *inode = file->f_inode; if (inode == NULL) { @@ -32,8 +31,8 @@ static bool check_permission_for_set_tokenid(struct file *file) return false; } - if (uid_eq(cred->uid, GLOBAL_ROOT_UID) || - uid_eq(cred->uid, inode->i_uid)) { + if (uid_eq(uid, GLOBAL_ROOT_UID) || + uid_eq(uid, inode->i_uid)) { return true; } @@ -58,7 +57,7 @@ static bool check_permission_for_ftokenid(struct file *file) { int i; struct group_info *group_info; - const struct cred *cred = get_task_cred(current); + kuid_t uid = current_uid(); struct inode *inode = file->f_inode; if (inode == NULL) { @@ -66,7 +65,7 @@ static bool check_permission_for_ftokenid(struct file *file) return false; } - if (uid_eq(cred->uid, GLOBAL_ROOT_UID)) + if (uid_eq(uid, GLOBAL_ROOT_UID)) return true; group_info = get_current_groups(); -- Gitee From 5500eb6e048068ebfc3bf998be8b6ecb6cb139fe Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 16 Nov 2022 09:43:39 +0100 Subject: [PATCH 059/243] io_uring: kill goto error handling in io_sqpoll_wait_sq() stable inclusion from stable-5.10.155 commit 0f544353fec8e717d37724d95b92538e1de79e86 category: bugfix issue: I693SL CVE: CVE-2022-47946 Signed-off-by: gaochao --------------------------------------- Hunk extracted from commit 70aacfe66136809d7f080f89c492c278298719f4 upstream. If the sqpoll thread has died, the out condition doesn't remove the waiting task from the waitqueue. The goto and check are not needed, just make it a break condition after setting the error value. That ensures that we always remove ourselves from sqo_sq_wait waitqueue. Reported-by: Xingyuan Mo Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d3df30098241..8b0f67489fff 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9020,7 +9020,7 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) if (unlikely(ctx->sqo_dead)) { ret = -EOWNERDEAD; - goto out; + break; } if (!io_sqring_full(ctx)) @@ -9030,7 +9030,6 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); -out: return ret; } -- Gitee From 7e7d5a4880ff1862265d08e085dafa85a1a22213 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 7 Dec 2022 08:19:38 +0100 Subject: [PATCH 060/243] xen/netback: fix build warning stable inclusion from stable-5.10.159 commit a00444e25bbc3ff90314ebc72e9b4952b12211d9 category: bugfix issue: I66BND CVE: CVE-2022-3643 Signed-off-by: gaochao --------------------------------------- [ Upstream commit 7dfa764e0223a324366a2a1fc056d4d9d4e95491 ] Commit ad7f402ae4f4 ("xen/netback: Ensure protocol headers don't fall in the non-linear area") introduced a (valid) build warning. There have even been reports of this problem breaking networking of Xen guests. Fixes: ad7f402ae4f4 ("xen/netback: Ensure protocol headers don't fall in the non-linear area") Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Reviewed-by: Ross Lagerwall Tested-by: Jason Andryuk Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/net/xen-netback/netback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index b0cbc7fead74..4de0e62af4d1 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -461,7 +461,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, int nr_frags = shinfo->nr_frags; const bool sharedslot = nr_frags && frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; - int i, err; + int i, err = 0; /* Check status of header. */ err = (*gopp_copy)->status; -- Gitee From f8f49c44391e93e441307de36e94d9c665c6e55a Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Tue, 22 Nov 2022 09:16:59 +0000 Subject: [PATCH 061/243] xen/netback: Ensure protocol headers don't fall in the non-linear area stable inclusion from stable-5.10.159 commit 49e07c0768dbebff672ee1834eff9680fc6277bf category: bugfix issue: I66BND CVE: CVE-2022-3643 Signed-off-by: gaochao --------------------------------------- [ Upstream commit ad7f402ae4f466647c3a669b8a6f3e5d4271c84a ] In some cases, the frontend may send a packet where the protocol headers are spread across multiple slots. This would result in netback creating an skb where the protocol headers spill over into the non-linear area. Some drivers and NICs don't handle this properly resulting in an interface reset or worse. This issue was introduced by the removal of an unconditional skb pull in the tx path to improve performance. Fix this without reintroducing the pull by setting up grant copy ops for as many slots as needed to reach the XEN_NETBACK_TX_COPY_LEN size. Adjust the rest of the code to handle multiple copy operations per skb. This is XSA-423 / CVE-2022-3643. Fixes: 7e5d7753956b ("xen-netback: remove unconditional __pskb_pull_tail() in guest Tx path") Signed-off-by: Ross Lagerwall Reviewed-by: Paul Durrant Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/net/xen-netback/netback.c | 223 ++++++++++++++++-------------- 1 file changed, 123 insertions(+), 100 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 4de0e62af4d1..2898d74572b1 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -330,10 +330,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue, struct xenvif_tx_cb { - u16 pending_idx; + u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1]; + u8 copy_count; }; #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) +#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i]) +#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count) static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue, u16 pending_idx, @@ -368,31 +371,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) return skb; } -static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, - struct sk_buff *skb, - struct xen_netif_tx_request *txp, - struct gnttab_map_grant_ref *gop, - unsigned int frag_overflow, - struct sk_buff *nskb) +static void xenvif_get_requests(struct xenvif_queue *queue, + struct sk_buff *skb, + struct xen_netif_tx_request *first, + struct xen_netif_tx_request *txfrags, + unsigned *copy_ops, + unsigned *map_ops, + unsigned int frag_overflow, + struct sk_buff *nskb, + unsigned int extra_count, + unsigned int data_len) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; - int start; + u16 pending_idx; pending_ring_idx_t index; unsigned int nr_slots; + struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops; + struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops; + struct xen_netif_tx_request *txp = first; + + nr_slots = shinfo->nr_frags + 1; + + copy_count(skb) = 0; + + /* Create copy ops for exactly data_len bytes into the skb head. */ + __skb_put(skb, data_len); + while (data_len > 0) { + int amount = data_len > txp->size ? txp->size : data_len; + + cop->source.u.ref = txp->gref; + cop->source.domid = queue->vif->domid; + cop->source.offset = txp->offset; + + cop->dest.domid = DOMID_SELF; + cop->dest.offset = (offset_in_page(skb->data + + skb_headlen(skb) - + data_len)) & ~XEN_PAGE_MASK; + cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb) + - data_len); + + cop->len = amount; + cop->flags = GNTCOPY_source_gref; - nr_slots = shinfo->nr_frags; + index = pending_index(queue->pending_cons); + pending_idx = queue->pending_ring[index]; + callback_param(queue, pending_idx).ctx = NULL; + copy_pending_idx(skb, copy_count(skb)) = pending_idx; + copy_count(skb)++; + + cop++; + data_len -= amount; - /* Skip first skb fragment if it is on same page as header fragment. */ - start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); + if (amount == txp->size) { + /* The copy op covered the full tx_request */ + + memcpy(&queue->pending_tx_info[pending_idx].req, + txp, sizeof(*txp)); + queue->pending_tx_info[pending_idx].extra_count = + (txp == first) ? extra_count : 0; + + if (txp == first) + txp = txfrags; + else + txp++; + queue->pending_cons++; + nr_slots--; + } else { + /* The copy op partially covered the tx_request. + * The remainder will be mapped. + */ + txp->offset += amount; + txp->size -= amount; + } + } - for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; - shinfo->nr_frags++, txp++, gop++) { + for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; + shinfo->nr_frags++, gop++) { index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; - xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); + xenvif_tx_create_map_op(queue, pending_idx, txp, + txp == first ? extra_count : 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + + if (txp == first) + txp = txfrags; + else + txp++; } if (frag_overflow) { @@ -413,7 +478,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que skb_shinfo(skb)->frag_list = nskb; } - return gop; + (*copy_ops) = cop - queue->tx_copy_ops; + (*map_ops) = gop - queue->tx_map_ops; } static inline void xenvif_grant_handle_set(struct xenvif_queue *queue, @@ -449,7 +515,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct gnttab_copy **gopp_copy) { struct gnttab_map_grant_ref *gop_map = *gopp_map; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; + u16 pending_idx; /* This always points to the shinfo of the skb being checked, which * could be either the first or the one on the frag_list */ @@ -460,24 +526,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct skb_shared_info *first_shinfo = NULL; int nr_frags = shinfo->nr_frags; const bool sharedslot = nr_frags && - frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; + frag_get_pending_idx(&shinfo->frags[0]) == + copy_pending_idx(skb, copy_count(skb) - 1); int i, err = 0; - /* Check status of header. */ - err = (*gopp_copy)->status; - if (unlikely(err)) { - if (net_ratelimit()) - netdev_dbg(queue->vif->dev, - "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", - (*gopp_copy)->status, - pending_idx, - (*gopp_copy)->source.u.ref); - /* The first frag might still have this slot mapped */ - if (!sharedslot) - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_ERROR); + for (i = 0; i < copy_count(skb); i++) { + int newerr; + + /* Check status of header. */ + pending_idx = copy_pending_idx(skb, i); + + newerr = (*gopp_copy)->status; + if (likely(!newerr)) { + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_OKAY); + } else { + err = newerr; + if (net_ratelimit()) + netdev_dbg(queue->vif->dev, + "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", + (*gopp_copy)->status, + pending_idx, + (*gopp_copy)->source.u.ref); + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_ERROR); + } + (*gopp_copy)++; } - (*gopp_copy)++; check_frags: for (i = 0; i < nr_frags; i++, gop_map++) { @@ -524,14 +603,6 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, if (err) continue; - /* First error: if the header haven't shared a slot with the - * first frag, release it as well. - */ - if (!sharedslot) - xenvif_idx_release(queue, - XENVIF_TX_CB(skb)->pending_idx, - XEN_NETIF_RSP_OKAY); - /* Invalidate preceding fragments of this skb. */ for (j = 0; j < i; j++) { pending_idx = frag_get_pending_idx(&shinfo->frags[j]); @@ -801,7 +872,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, unsigned *copy_ops, unsigned *map_ops) { - struct gnttab_map_grant_ref *gop = queue->tx_map_ops; struct sk_buff *skb, *nskb; int ret; unsigned int frag_overflow; @@ -883,8 +953,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, continue; } + data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ? + XEN_NETBACK_TX_COPY_LEN : txreq.size; + ret = xenvif_count_requests(queue, &txreq, extra_count, txfrags, work_to_do); + if (unlikely(ret < 0)) break; @@ -910,9 +984,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, index = pending_index(queue->pending_cons); pending_idx = queue->pending_ring[index]; - data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN && - ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? - XEN_NETBACK_TX_COPY_LEN : txreq.size; + if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size) + data_len = txreq.size; skb = xenvif_alloc_skb(data_len); if (unlikely(skb == NULL)) { @@ -923,8 +996,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, } skb_shinfo(skb)->nr_frags = ret; - if (data_len < txreq.size) - skb_shinfo(skb)->nr_frags++; /* At this point shinfo->nr_frags is in fact the number of * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. */ @@ -986,54 +1057,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, type); } - XENVIF_TX_CB(skb)->pending_idx = pending_idx; - - __skb_put(skb, data_len); - queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; - queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid; - queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; - - queue->tx_copy_ops[*copy_ops].dest.u.gmfn = - virt_to_gfn(skb->data); - queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; - queue->tx_copy_ops[*copy_ops].dest.offset = - offset_in_page(skb->data) & ~XEN_PAGE_MASK; - - queue->tx_copy_ops[*copy_ops].len = data_len; - queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; - - (*copy_ops)++; - - if (data_len < txreq.size) { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - pending_idx); - xenvif_tx_create_map_op(queue, pending_idx, &txreq, - extra_count, gop); - gop++; - } else { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - INVALID_PENDING_IDX); - memcpy(&queue->pending_tx_info[pending_idx].req, - &txreq, sizeof(txreq)); - queue->pending_tx_info[pending_idx].extra_count = - extra_count; - } - - queue->pending_cons++; - - gop = xenvif_get_requests(queue, skb, txfrags, gop, - frag_overflow, nskb); + xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops, + map_ops, frag_overflow, nskb, extra_count, + data_len); __skb_queue_tail(&queue->tx_queue, skb); queue->tx.req_cons = idx; - if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) || + if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) || (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) break; } - (*map_ops) = gop - queue->tx_map_ops; return; } @@ -1112,9 +1148,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) { struct xen_netif_tx_request *txp; u16 pending_idx; - unsigned data_len; - pending_idx = XENVIF_TX_CB(skb)->pending_idx; + pending_idx = copy_pending_idx(skb, 0); txp = &queue->pending_tx_info[pending_idx].req; /* Check the remap error code. */ @@ -1133,18 +1168,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) continue; } - data_len = skb->len; - callback_param(queue, pending_idx).ctx = NULL; - if (data_len < txp->size) { - /* Append the packet payload as a fragment. */ - txp->offset += data_len; - txp->size -= data_len; - } else { - /* Schedule a response immediately. */ - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_OKAY); - } - if (txp->flags & XEN_NETTXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; else if (txp->flags & XEN_NETTXF_data_validated) @@ -1330,7 +1353,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) /* Called after netfront has transmitted */ int xenvif_tx_action(struct xenvif_queue *queue, int budget) { - unsigned nr_mops, nr_cops = 0; + unsigned nr_mops = 0, nr_cops = 0; int work_done, ret; if (unlikely(!tx_work_todo(queue))) -- Gitee From 87da490c926e769222f8400b3a621bfa9cdce28c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 6 Dec 2022 08:54:24 +0100 Subject: [PATCH 062/243] xen/netback: don't call kfree_skb() with interrupts disabled stable inclusion from stable-5.10.159 commit 83632fc41449c480f2d0193683ec202caaa186c9 category: bugfix issue: I66BN5 CVE: CVE-2022-42328 Signed-off-by: gaochao --------------------------------------- [ Upstream commit 74e7e1efdad45580cc3839f2a155174cf158f9b5 ] It is not allowed to call kfree_skb() from hardware interrupt context or with interrupts being disabled. So remove kfree_skb() from the spin_lock_irqsave() section and use the already existing "drop" label in xenvif_start_xmit() for dropping the SKB. At the same time replace the dev_kfree_skb() call there with a call of dev_kfree_skb_any(), as xenvif_start_xmit() can be called with disabled interrupts. This is XSA-424 / CVE-2022-42328 / CVE-2022-42329. Fixes: be81992f9086 ("xen/netback: don't queue unlimited number of packages") Reported-by: Yang Yingliang Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/net/xen-netback/common.h | 2 +- drivers/net/xen-netback/interface.c | 6 ++++-- drivers/net/xen-netback/rx.c | 8 +++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 6a9178896c90..962e654d4a8d 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -395,7 +395,7 @@ irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); void xenvif_rx_action(struct xenvif_queue *queue); -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_carrier_on(struct xenvif *vif); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 7ce9807fc24c..c98890674e5d 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -269,14 +269,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) skb_clear_hash(skb); - xenvif_rx_queue_tail(queue, skb); + if (!xenvif_rx_queue_tail(queue, skb)) + goto drop; + xenvif_kick_thread(queue); return NETDEV_TX_OK; drop: vif->dev->stats.tx_dropped++; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index dbac4c03d21a..1b581d340ab4 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -82,9 +82,10 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) return false; } -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) { unsigned long flags; + bool ret = true; spin_lock_irqsave(&queue->rx_queue.lock, flags); @@ -92,8 +93,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); - kfree_skb(skb); - queue->vif->dev->stats.rx_dropped++; + ret = false; } else { if (skb_queue_empty(&queue->rx_queue)) xenvif_update_needed_slots(queue, skb); @@ -104,6 +104,8 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + + return ret; } static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) -- Gitee From 1b77e96029a9c0723812ea2ceec19cfc669116e4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 31 Oct 2022 11:02:45 +0100 Subject: [PATCH 063/243] media: dvb-core: Fix UAF due to refcount races at releasing mainline inclusion from mainline-6.2-rc1 commit fd3d91ab1c6ab0628fe642dd570b56302c30a792 category: bugfix issue: I5UDY5 CVE: CVE-2022-41218 Signed-off-by: gaochao --------------------------------------- The dvb-core tries to sync the releases of opened files at dvb_dmxdev_release() with two refcounts: dvbdev->users and dvr_dvbdev->users. A problem is present in those two syncs: when yet another dvb_demux_open() is called during those sync waits, dvb_demux_open() continues to process even if the device is being closed. This includes the increment of the former refcount, resulting in the leftover refcount after the sync of the latter refcount at dvb_dmxdev_release(). It ends up with use-after-free, since the function believes that all usages were gone and releases the resources. This patch addresses the problem by adding the check of dmxdev->exit flag at dvb_demux_open(), just like dvb_dvr_open() already does. With the exit flag check, the second call of dvb_demux_open() fails, hence the further corruption can be avoided. Also for avoiding the races of the dmxdev->exit flag reference, this patch serializes the dmxdev->exit set up and the sync waits with the dmxdev->mutex lock at dvb_dmxdev_release(). Without the mutex lock, dvb_demux_open() (or dvb_dvr_open()) may run concurrently with dvb_dmxdev_release(), which allows to skip the exit flag check and continue the open process that is being closed. CVE-2022-41218 is assigned to those bugs above. Reported-by: Hyunwoo Kim Cc: Link: https://lore.kernel.org/20220908132754.30532-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Hans Verkuil --- drivers/media/dvb-core/dmxdev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index e58cb8434daf..12b7f698f562 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -800,6 +800,11 @@ static int dvb_demux_open(struct inode *inode, struct file *file) if (mutex_lock_interruptible(&dmxdev->mutex)) return -ERESTARTSYS; + if (dmxdev->exit) { + mutex_unlock(&dmxdev->mutex); + return -ENODEV; + } + for (i = 0; i < dmxdev->filternum; i++) if (dmxdev->filter[i].state == DMXDEV_STATE_FREE) break; @@ -1458,7 +1463,10 @@ EXPORT_SYMBOL(dvb_dmxdev_init); void dvb_dmxdev_release(struct dmxdev *dmxdev) { + mutex_lock(&dmxdev->mutex); dmxdev->exit = 1; + mutex_unlock(&dmxdev->mutex); + if (dmxdev->dvbdev->users > 1) { wait_event(dmxdev->dvbdev->wait_queue, dmxdev->dvbdev->users == 1); -- Gitee From 8e5d9b2596600a5f4e94e661a27ab8a109399a0c Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Thu, 10 Nov 2022 11:50:33 +0800 Subject: [PATCH 064/243] misc: sgi-gru: fix use-after-free error in gru_set_context_option, gru_fault and gru_handle_user_call_os mainline inclusion from mainline-6.2-rc1 commit 643a16a0eb1d6ac23744bb6e90a00fc21148a9dc category: bugfix issue: I5VMGF CVE: CVE-2022-3424 Signed-off-by: gaochao --------------------------------------- In some bad situation, the gts may be freed gru_check_chiplet_assignment. The call chain can be gru_unload_context->gru_free_gru_context->gts_drop and kfree finally. However, the caller didn't know if the gts is freed or not and use it afterwards. This will trigger a Use after Free bug. Fix it by introducing a return value to see if it's in error path or not. Free the gts in caller if gru_check_chiplet_assignment check failed. Fixes: 55484c45dbec ("gru: allow users to specify gru chiplet 2") Signed-off-by: Zheng Wang Acked-by: Dimitri Sivanich Link: https://lore.kernel.org/r/20221110035033.19498-1-zyytlz.wz@163.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/sgi-gru/grufault.c | 13 +++++++++++-- drivers/misc/sgi-gru/grumain.c | 22 ++++++++++++++++++---- drivers/misc/sgi-gru/grutables.h | 2 +- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 723825524ea0..9c7d475d1890 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -648,6 +648,7 @@ int gru_handle_user_call_os(unsigned long cb) if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) return -EINVAL; +again: gts = gru_find_lock_gts(cb); if (!gts) return -EINVAL; @@ -656,7 +657,11 @@ int gru_handle_user_call_os(unsigned long cb) if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) goto exit; - gru_check_context_placement(gts); + if (gru_check_context_placement(gts)) { + gru_unlock_gts(gts); + gru_unload_context(gts, 1); + goto again; + } /* * CCH may contain stale data if ts_force_cch_reload is set. @@ -874,7 +879,11 @@ int gru_set_context_option(unsigned long arg) } else { gts->ts_user_blade_id = req.val1; gts->ts_user_chiplet_id = req.val0; - gru_check_context_placement(gts); + if (gru_check_context_placement(gts)) { + gru_unlock_gts(gts); + gru_unload_context(gts, 1); + return ret; + } } break; case sco_gseg_owner: diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index 40ac59dd018c..e2325e3d077e 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -716,9 +716,10 @@ static int gru_check_chiplet_assignment(struct gru_state *gru, * chiplet. Misassignment can occur if the process migrates to a different * blade or if the user changes the selected blade/chiplet. */ -void gru_check_context_placement(struct gru_thread_state *gts) +int gru_check_context_placement(struct gru_thread_state *gts) { struct gru_state *gru; + int ret = 0; /* * If the current task is the context owner, verify that the @@ -726,15 +727,23 @@ void gru_check_context_placement(struct gru_thread_state *gts) * references. Pthread apps use non-owner references to the CBRs. */ gru = gts->ts_gru; + /* + * If gru or gts->ts_tgid_owner isn't initialized properly, return + * success to indicate that the caller does not need to unload the + * gru context.The caller is responsible for their inspection and + * reinitialization if needed. + */ if (!gru || gts->ts_tgid_owner != current->tgid) - return; + return ret; if (!gru_check_chiplet_assignment(gru, gts)) { STAT(check_context_unload); - gru_unload_context(gts, 1); + ret = -EINVAL; } else if (gru_retarget_intr(gts)) { STAT(check_context_retarget_intr); } + + return ret; } @@ -934,7 +943,12 @@ vm_fault_t gru_fault(struct vm_fault *vmf) mutex_lock(>s->ts_ctxlock); preempt_disable(); - gru_check_context_placement(gts); + if (gru_check_context_placement(gts)) { + preempt_enable(); + mutex_unlock(>s->ts_ctxlock); + gru_unload_context(gts, 1); + return VM_FAULT_NOPAGE; + } if (!gts->ts_gru) { STAT(load_user_context); diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index 5ce8f3081e96..10f0a083b1fa 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -637,7 +637,7 @@ extern int gru_user_flush_tlb(unsigned long arg); extern int gru_user_unload_context(unsigned long arg); extern int gru_get_exception_detail(unsigned long arg); extern int gru_set_context_option(unsigned long address); -extern void gru_check_context_placement(struct gru_thread_state *gts); +extern int gru_check_context_placement(struct gru_thread_state *gts); extern int gru_cpu_fault_map_id(void); extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); extern void gru_flush_all_tlb(struct gru_state *gru); -- Gitee From 974d9b118f3e7512e81f719a86f31059d01fce39 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 19 Oct 2022 14:36:20 -0700 Subject: [PATCH 065/243] KVM: VMX: Execute IBPB on emulated VM-exit when guest has IBRS mainline inclusion from mainline-v6.2-rc1 commit 2e7eab81425ad6c875f2ed47c0ce01e78afc38a5 category: bugfix issue: I69PJS CVE: CVE-2022-2196 Signed-off-by: gaochao --------------------------------------- According to Intel's document on Indirect Branch Restricted Speculation, "Enabling IBRS does not prevent software from controlling the predicted targets of indirect branches of unrelated software executed later at the same predictor mode (for example, between two different user applications, or two different virtual machines). Such isolation can be ensured through use of the Indirect Branch Predictor Barrier (IBPB) command." This applies to both basic and enhanced IBRS. Since L1 and L2 VMs share hardware predictor modes (guest-user and guest-kernel), hardware IBRS is not sufficient to virtualize IBRS. (The way that basic IBRS is implemented on pre-eIBRS parts, hardware IBRS is actually sufficient in practice, even though it isn't sufficient architecturally.) For virtual CPUs that support IBRS, add an indirect branch prediction barrier on emulated VM-exit, to ensure that the predicted targets of indirect branches executed in L1 cannot be controlled by software that was executed in L2. Since we typically don't intercept guest writes to IA32_SPEC_CTRL, perform the IBPB at emulated VM-exit regardless of the current IA32_SPEC_CTRL.IBRS value, even though the IBPB could technically be deferred until L1 sets IA32_SPEC_CTRL.IBRS, if IA32_SPEC_CTRL.IBRS is clear at emulated VM-exit. This is CVE-2022-2196. Fixes: 5c911beff20a ("KVM: nVMX: Skip IBPB when switching between vmcs01 and vmcs02") Cc: Sean Christopherson Signed-off-by: Jim Mattson Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20221019213620.1953281-3-jmattson@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/nested.c | 11 +++++++++++ arch/x86/kvm/vmx/vmx.c | 6 ++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 07e2ca5ccb80..6aaf1bf68028 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4497,6 +4497,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, vmx_switch_vmcs(vcpu, &vmx->vmcs01); + /* + * If IBRS is advertised to the vCPU, KVM must flush the indirect + * branch predictors when transitioning from L2 to L1, as L1 expects + * hardware (KVM in this case) to provide separate predictor modes. + * Bare metal isolates VMX root (host) from VMX non-root (guest), but + * doesn't isolate different VMCSs, i.e. in this case, doesn't provide + * separate modes for L2 vs L1. + */ + if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + indirect_branch_prediction_barrier(); + /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cf95e490b8e7..58d453b66677 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1431,8 +1431,10 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, /* * No indirect branch prediction barrier needed when switching - * the active VMCS within a guest, e.g. on nested VM-Enter. - * The L1 VMM can protect itself with retpolines, IBPB or IBRS. + * the active VMCS within a vCPU, unless IBRS is advertised to + * the vCPU. To minimize the number of IBPBs executed, KVM + * performs IBPB on nested VM-Exit (a single nested transition + * may switch the active VMCS multiple times). */ if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev)) indirect_branch_prediction_barrier(); -- Gitee From fd2768bd11c91d8cca8a270f834bce893b508c83 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Fri, 4 Nov 2022 17:54:49 +0000 Subject: [PATCH 066/243] binder: fix UAF of alloc->vma in race with munmap() stable inclusion from stable-v5.10.154 commit 015ac18be7de25d17d6e5f1643cb3b60bfbe859e category: bugfix issue: I6A5HR CVE: CVE-2023-20928 Signed-off-by: gaochao --------------------------------------- In commit 720c24192404 ("ANDROID: binder: change down_write to down_read") binder assumed the mmap read lock is sufficient to protect alloc->vma inside binder_update_page_range(). This used to be accurate until commit dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap"), which now downgrades the mmap_lock after detaching the vma from the rbtree in munmap(). Then it proceeds to teardown and free the vma with only the read lock held. This means that accesses to alloc->vma in binder_update_page_range() now will race with vm_area_free() in munmap() and can cause a UAF as shown in the following KASAN trace: ================================================================== BUG: KASAN: use-after-free in vm_insert_page+0x7c/0x1f0 Read of size 8 at addr ffff16204ad00600 by task server/558 CPU: 3 PID: 558 Comm: server Not tainted 5.10.150-00001-gdc8dcf942daa #1 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x2a0 show_stack+0x18/0x2c dump_stack+0xf8/0x164 print_address_description.constprop.0+0x9c/0x538 kasan_report+0x120/0x200 __asan_load8+0xa0/0xc4 vm_insert_page+0x7c/0x1f0 binder_update_page_range+0x278/0x50c binder_alloc_new_buf+0x3f0/0xba0 binder_transaction+0x64c/0x3040 binder_thread_write+0x924/0x2020 binder_ioctl+0x1610/0x2e5c __arm64_sys_ioctl+0xd4/0x120 el0_svc_common.constprop.0+0xac/0x270 do_el0_svc+0x38/0xa0 el0_svc+0x1c/0x2c el0_sync_handler+0xe8/0x114 el0_sync+0x180/0x1c0 Allocated by task 559: kasan_save_stack+0x38/0x6c __kasan_kmalloc.constprop.0+0xe4/0xf0 kasan_slab_alloc+0x18/0x2c kmem_cache_alloc+0x1b0/0x2d0 vm_area_alloc+0x28/0x94 mmap_region+0x378/0x920 do_mmap+0x3f0/0x600 vm_mmap_pgoff+0x150/0x17c ksys_mmap_pgoff+0x284/0x2dc __arm64_sys_mmap+0x84/0xa4 el0_svc_common.constprop.0+0xac/0x270 do_el0_svc+0x38/0xa0 el0_svc+0x1c/0x2c el0_sync_handler+0xe8/0x114 el0_sync+0x180/0x1c0 Freed by task 560: kasan_save_stack+0x38/0x6c kasan_set_track+0x28/0x40 kasan_set_free_info+0x24/0x4c __kasan_slab_free+0x100/0x164 kasan_slab_free+0x14/0x20 kmem_cache_free+0xc4/0x34c vm_area_free+0x1c/0x2c remove_vma+0x7c/0x94 __do_munmap+0x358/0x710 __vm_munmap+0xbc/0x130 __arm64_sys_munmap+0x4c/0x64 el0_svc_common.constprop.0+0xac/0x270 do_el0_svc+0x38/0xa0 el0_svc+0x1c/0x2c el0_sync_handler+0xe8/0x114 el0_sync+0x180/0x1c0 [...] ================================================================== To prevent the race above, revert back to taking the mmap write lock inside binder_update_page_range(). One might expect an increase of mmap lock contention. However, binder already serializes these calls via top level alloc->mutex. Also, there was no performance impact shown when running the binder benchmark tests. Note this patch is specific to stable branches 5.4 and 5.10. Since in newer kernel releases binder no longer caches a pointer to the vma. Instead, it has been refactored to use vma_lookup() which avoids the issue described here. This switch was introduced in commit a43cfc87caaf ("android: binder: stop saving a pointer to the VMA"). Fixes: dd2283f2605e ("mm: mmap: zap pages with read mmap_sem in munmap") Reported-by: Jann Horn Cc: # 5.10.x Cc: Minchan Kim Cc: Yang Shi Cc: Liam Howlett Signed-off-by: Carlos Llamas Acked-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 95ca4f934d28..a77ed66425f2 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -212,7 +212,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, mm = alloc->vma_vm_mm; if (mm) { - mmap_read_lock(mm); + mmap_write_lock(mm); vma = alloc->vma; } @@ -270,7 +270,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, trace_binder_alloc_page_end(alloc, index); } if (mm) { - mmap_read_unlock(mm); + mmap_write_unlock(mm); mmput(mm); } return 0; @@ -303,7 +303,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, } err_no_vma: if (mm) { - mmap_read_unlock(mm); + mmap_write_unlock(mm); mmput(mm); } return vma ? -ENOMEM : -ESRCH; -- Gitee From 3bf532a81ed92a12f49278a00dce64ee2a40094a Mon Sep 17 00:00:00 2001 From: linqiheng Date: Sat, 14 Jan 2023 11:29:24 +0800 Subject: [PATCH 067/243] hmdfs: fix nullptr dereference hmdfs_init_writeback Signed-off-by: linqiheng --- fs/hmdfs/client_writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hmdfs/client_writeback.c b/fs/hmdfs/client_writeback.c index d4da7ec482a5..d62c286affc1 100644 --- a/fs/hmdfs/client_writeback.c +++ b/fs/hmdfs/client_writeback.c @@ -512,7 +512,7 @@ int hmdfs_init_writeback(struct hmdfs_sb_info *sbi) free_i_wq: destroy_workqueue(hwb->dirty_inode_writeback_wq); free_bdp: - free_percpu(sbi->h_wb->bdp_ratelimits); + free_percpu(hwb->bdp_ratelimits); free_hwb: kfree(hwb); return ret; -- Gitee From e997e851fffb3bbc6e5f7b9924e84c4e9986177f Mon Sep 17 00:00:00 2001 From: linqiheng Date: Sat, 14 Jan 2023 11:29:24 +0800 Subject: [PATCH 068/243] hmdfs: fix memleak in hmdfs_match_strdup The old *dst would not be freed when parsing duplicated tokens. Signed-off-by: linqiheng --- fs/hmdfs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/hmdfs/super.c b/fs/hmdfs/super.c index 52cc857f5e45..a4b1170153b3 100644 --- a/fs/hmdfs/super.c +++ b/fs/hmdfs/super.c @@ -63,6 +63,8 @@ static int hmdfs_match_strdup(const substring_t *s, char **dst) if (!dup) return -ENOMEM; + if (*dst) + kfree(*dst); *dst = dup; return 0; -- Gitee From fe448e1df5904e999f3dd2cbc6db335c983e79ee Mon Sep 17 00:00:00 2001 From: linqiheng Date: Sat, 14 Jan 2023 11:29:24 +0800 Subject: [PATCH 069/243] hmdfs: remove log of cur_file_ver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit file_version is the unique value initialized at mount time, and its log leakage will make the attack less difficult。 Signed-off-by: linqiheng --- fs/hmdfs/hmdfs_server.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/hmdfs/hmdfs_server.c b/fs/hmdfs/hmdfs_server.c index dcb15c8b6446..fc0274233970 100644 --- a/fs/hmdfs/hmdfs_server.c +++ b/fs/hmdfs/hmdfs_server.c @@ -315,8 +315,8 @@ static struct file *get_file_by_fid_and_ver(struct hmdfs_peer *con, __u64 cur_file_ver = hmdfs_server_pack_fid_ver(con, cmd); if (file_ver != cur_file_ver) { - hmdfs_warning("Stale file version %llu for fid %u (ver %llu)", - file_ver, file_id, cur_file_ver); + hmdfs_warning("Stale file version %llu for fid %u", + file_ver, file_id); return ERR_PTR(-EBADF); } -- Gitee From 83644f5bc675b2d5d0eec7f533f3b62dbf735a23 Mon Sep 17 00:00:00 2001 From: linqiheng Date: Sat, 14 Jan 2023 11:29:25 +0800 Subject: [PATCH 070/243] hmdfs: fix nullptr dereference of share_item_timeout_wq Add check of `share_item_timeout_wq` after create_singlethread_workqueue(), to avoid nullptr dereference when destroy workqueue in hmdfs_clear_share_table(). Signed-off-by: linqiheng --- fs/hmdfs/hmdfs_share.c | 6 +++++- fs/hmdfs/hmdfs_share.h | 2 +- fs/hmdfs/main.c | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/hmdfs/hmdfs_share.c b/fs/hmdfs/hmdfs_share.c index c8314a5d7fca..6b9557d02263 100644 --- a/fs/hmdfs/hmdfs_share.c +++ b/fs/hmdfs/hmdfs_share.c @@ -302,7 +302,7 @@ int hmdfs_check_share_access_permission(struct hmdfs_sb_info *sbi, } -void hmdfs_init_share_table(struct hmdfs_sb_info *sbi) +int hmdfs_init_share_table(struct hmdfs_sb_info *sbi) { spin_lock_init(&sbi->share_table.item_list_lock); INIT_LIST_HEAD(&sbi->share_table.item_list_head); @@ -310,6 +310,10 @@ void hmdfs_init_share_table(struct hmdfs_sb_info *sbi) sbi->share_table.max_cnt = HMDFS_SHARE_ITEMS_MAX; sbi->share_table.share_item_timeout_wq = create_singlethread_workqueue("share_item_timeout_wq"); + + if (!sbi->share_table.share_item_timeout_wq) + return -ENOMEM; + return 0; } void hmdfs_clear_share_table(struct hmdfs_sb_info *sbi) diff --git a/fs/hmdfs/hmdfs_share.h b/fs/hmdfs/hmdfs_share.h index eef53bf19bab..3c055805bd6d 100644 --- a/fs/hmdfs/hmdfs_share.h +++ b/fs/hmdfs/hmdfs_share.h @@ -60,7 +60,7 @@ void hmdfs_close_share_item(struct hmdfs_sb_info *sbi, struct file *file, int hmdfs_check_share_access_permission(struct hmdfs_sb_info *sbi, const char *filename, char *cid); -void hmdfs_init_share_table(struct hmdfs_sb_info *sbi); +int hmdfs_init_share_table(struct hmdfs_sb_info *sbi); void hmdfs_clear_share_table(struct hmdfs_sb_info *sbi); int hmdfs_clear_first_item(struct hmdfs_share_table *st); diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c index d0a41061bb2f..9443abde8057 100644 --- a/fs/hmdfs/main.c +++ b/fs/hmdfs/main.c @@ -712,7 +712,9 @@ static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi) mutex_init(&sbi->connections.node_lock); INIT_LIST_HEAD(&sbi->connections.node_list); - hmdfs_init_share_table(sbi); + ret = hmdfs_init_share_table(sbi); + if (ret) + goto out; init_waitqueue_head(&sbi->async_readdir_wq); INIT_LIST_HEAD(&sbi->async_readdir_msg_list); INIT_LIST_HEAD(&sbi->async_readdir_work_list); -- Gitee From b1967e9f7b8648c3f3039dc7e49234a48c125045 Mon Sep 17 00:00:00 2001 From: linqiheng Date: Sat, 14 Jan 2023 11:29:25 +0800 Subject: [PATCH 071/243] hmdfs: fix nullptr dereference of kern_path(dev_name) in hmdfs_fill_super Signed-off-by: linqiheng --- fs/hmdfs/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c index 9443abde8057..33484bc23be2 100644 --- a/fs/hmdfs/main.c +++ b/fs/hmdfs/main.c @@ -956,6 +956,10 @@ static struct dentry *hmdfs_mount(struct file_system_type *fs_type, int flags, .dev_name = dev_name, .raw_data = raw_data, }; + + /* hmdfs needs a valid dev_name to get the lower_sb's metadata */ + if (!dev_name || !*dev_name) + return ERR_PTR(-EINVAL); return mount_nodev(fs_type, flags, &priv, hmdfs_fill_super); } -- Gitee From 995096ee3b81aa2dbcc2f59e546f4aeecbcf9d4e Mon Sep 17 00:00:00 2001 From: linqiheng Date: Sat, 14 Jan 2023 11:29:25 +0800 Subject: [PATCH 072/243] hmdfs: fix memcpy reads d_name out of bounds Signed-off-by: linqiheng --- fs/hmdfs/inode_root.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hmdfs/inode_root.c b/fs/hmdfs/inode_root.c index 30d0ca6a2264..b3b2b5c2b2fc 100644 --- a/fs/hmdfs/inode_root.c +++ b/fs/hmdfs/inode_root.c @@ -153,7 +153,7 @@ struct dentry *hmdfs_device_lookup(struct inode *parent_inode, ret_dentry = ERR_PTR(err); goto out; } - memcpy(cid, d_name, HMDFS_CID_SIZE); + strncpy(cid, d_name, HMDFS_CID_SIZE); cid[HMDFS_CID_SIZE] = '\0'; con = hmdfs_lookup_from_cid(sbi, cid); if (!con) { -- Gitee From f898c313c319f3af8ba9c405d2101cf027380627 Mon Sep 17 00:00:00 2001 From: linqiheng Date: Sat, 14 Jan 2023 11:29:25 +0800 Subject: [PATCH 073/243] hmdfs: fix client readdir dentry file leak Signed-off-by: linqiheng --- fs/hmdfs/comm/socket_adapter.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/hmdfs/comm/socket_adapter.c b/fs/hmdfs/comm/socket_adapter.c index eff3d3e1c044..913d667f0f73 100644 --- a/fs/hmdfs/comm/socket_adapter.c +++ b/fs/hmdfs/comm/socket_adapter.c @@ -409,18 +409,23 @@ int hmdfs_sendmessage_request(struct hmdfs_peer *con, struct hmdfs_head_cmd *head = NULL; bool dec = false; - if (!hmdfs_is_node_online(con)) - return -EAGAIN; + if (!hmdfs_is_node_online(con)) { + ret = -EAGAIN; + goto free_filp; + } if (timeout == TIMEOUT_UNINIT) { hmdfs_err_ratelimited("send msg %d with uninitialized timeout", sm->operations.command); - return -EINVAL; + ret = -EINVAL; + goto free_filp; } head = kzalloc(sizeof(struct hmdfs_head_cmd), GFP_KERNEL); - if (!head) - return -ENOMEM; + if (!head) { + ret = -ENOMEM; + goto free_filp; + } sm->out_buf = NULL; head->magic = HMDFS_MSG_MAGIC; @@ -504,6 +509,11 @@ int hmdfs_sendmessage_request(struct hmdfs_peer *con, hmdfs_dec_msg_idr_process(con); kfree(head); return ret; + +free_filp: + if (sm->local_filp) + fput(sm->local_filp); + return ret; } static int hmdfs_send_slice(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, -- Gitee From 5eb8e2669d504535738e5d6c0f3e4da0599c602a Mon Sep 17 00:00:00 2001 From: linqiheng Date: Sat, 14 Jan 2023 11:29:25 +0800 Subject: [PATCH 074/243] hmdfs: fix memleak of outdata when recv message error case Signed-off-by: linqiheng --- fs/hmdfs/comm/connection.c | 2 +- fs/hmdfs/comm/socket_adapter.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/hmdfs/comm/connection.c b/fs/hmdfs/comm/connection.c index 5a2e5b0f5b02..7613da514c7c 100644 --- a/fs/hmdfs/comm/connection.c +++ b/fs/hmdfs/comm/connection.c @@ -706,7 +706,7 @@ void connection_handshake_recv_handler(struct connection *conn_impl, void *buf, } fallthrough; default: - return; + break; } out: kfree(data); diff --git a/fs/hmdfs/comm/socket_adapter.c b/fs/hmdfs/comm/socket_adapter.c index 913d667f0f73..7005303e3735 100644 --- a/fs/hmdfs/comm/socket_adapter.c +++ b/fs/hmdfs/comm/socket_adapter.c @@ -1092,7 +1092,7 @@ static void hmdfs_recv_mesg_callback(struct hmdfs_peer *con, void *head, default: hmdfs_err("Fatal! Unexpected msg cmd %d", hmdfs_head->operations.cmd_flag); - break; + goto out_err; } return; -- Gitee From cfe893efbfc3150d92877ca54d2013813f6bfd08 Mon Sep 17 00:00:00 2001 From: linqiheng Date: Sat, 14 Jan 2023 11:29:25 +0800 Subject: [PATCH 075/243] hmdfs: fix double free of sbi->local_dst Signed-off-by: linqiheng --- fs/hmdfs/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c index 33484bc23be2..53d698917969 100644 --- a/fs/hmdfs/main.c +++ b/fs/hmdfs/main.c @@ -764,6 +764,7 @@ static int hmdfs_update_dst(struct hmdfs_sb_info *sbi) goto out_err; } kfree(sbi->local_dst); + sbi->local_dst = NULL; len = strlen(sbi->real_dst) + strlen(path_local) + 1; if (len > PATH_MAX) { -- Gitee From 151b7a7df7a540caa9de53a6907d7523cc6f18ea Mon Sep 17 00:00:00 2001 From: linqiheng Date: Sat, 14 Jan 2023 11:29:25 +0800 Subject: [PATCH 076/243] hmdfs: fix the message version check Only the head version of 2.0 is supported, and the corresponding message length must be checked. Signed-off-by: linqiheng Change-Id: I2f55f9ef7822c701f4ca2ccee61e8ff2088c1bb1 --- fs/hmdfs/comm/message_verify.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/hmdfs/comm/message_verify.c b/fs/hmdfs/comm/message_verify.c index c9eb94d8b615..fd76658ef16d 100644 --- a/fs/hmdfs/comm/message_verify.c +++ b/fs/hmdfs/comm/message_verify.c @@ -934,7 +934,9 @@ int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, goto handle_bad_msg; } - if (head->version == DFS_2_0) { + if (head->version != DFS_2_0) { + err = -EINVAL; + } else { len = le32_to_cpu(head->data_len) - sizeof(struct hmdfs_head_cmd); min = message_length[flag][cmd][HMDFS_MESSAGE_MIN_INDEX]; @@ -973,13 +975,6 @@ int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, } handle_bad_msg: - if (err) { - handle_bad_message(con, head, &err); - return err; - } - - if (head->version == DFS_1_0) - return err; // now, DFS_1_0 version do not verify - - return -EINVAL; + handle_bad_message(con, head, &err); + return err; } -- Gitee From a73def80b6d2c9c85c561ad50d1690706f5fef76 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 24 Aug 2020 14:27:46 +0200 Subject: [PATCH 077/243] media: dvbdev: Fix memleak in dvb_register_device mainline inclusion from v5.11-rc1 commit 167faadfcf9339088910e9e85a1b711fcbbef8e9 category: bugfix issue: I63U62 CVE: NA Signed-off-by: gaochao --------------------------------------- When device_create() fails, dvbdev and dvbdevfops should be freed just like when dvb_register_media_device() fails. Signed-off-by: Dinghao Liu Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dvbdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index ec9ebff28552..3862ddc86ec4 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -540,6 +540,9 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, if (IS_ERR(clsdev)) { pr_err("%s: failed to create device dvb%d.%s%d (%ld)\n", __func__, adap->num, dnames[type], id, PTR_ERR(clsdev)); + dvb_media_device_free(dvbdev); + kfree(dvbdevfops); + kfree(dvbdev); return PTR_ERR(clsdev); } dprintk("DVB: register adapter%d/%s%d @ minor: %i (0x%02x)\n", -- Gitee From efadd0f5c68f621236a532ba226ba2a1e4735151 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 9 Jun 2021 14:32:29 +0200 Subject: [PATCH 078/243] media: dvbdev: fix error logic at dvb_register_device() mainline inclusion from v5.14-rc1 commit 1fec2ecc252301110e4149e6183fa70460d29674 category: bugfix issue: I63U62 CVE: NA Signed-off-by: gaochao --------------------------------------- As reported by smatch: drivers/media/dvb-core/dvbdev.c: drivers/media/dvb-core/dvbdev.c:510 dvb_register_device() warn: '&dvbdev->list_head' not removed from list drivers/media/dvb-core/dvbdev.c: drivers/media/dvb-core/dvbdev.c:530 dvb_register_device() warn: '&dvbdev->list_head' not removed from list drivers/media/dvb-core/dvbdev.c: drivers/media/dvb-core/dvbdev.c:545 dvb_register_device() warn: '&dvbdev->list_head' not removed from list The error logic inside dvb_register_device() doesn't remove devices from the dvb_adapter_list in case of errors. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/dvb-core/dvbdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index 3862ddc86ec4..795d9bfaba5c 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -506,6 +506,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, break; if (minor == MAX_DVB_MINORS) { + list_del (&dvbdev->list_head); kfree(dvbdevfops); kfree(dvbdev); up_write(&minor_rwsem); @@ -526,6 +527,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, __func__); dvb_media_device_free(dvbdev); + list_del (&dvbdev->list_head); kfree(dvbdevfops); kfree(dvbdev); mutex_unlock(&dvbdev_register_lock); @@ -541,6 +543,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, pr_err("%s: failed to create device dvb%d.%s%d (%ld)\n", __func__, adap->num, dnames[type], id, PTR_ERR(clsdev)); dvb_media_device_free(dvbdev); + list_del (&dvbdev->list_head); kfree(dvbdevfops); kfree(dvbdev); return PTR_ERR(clsdev); -- Gitee From f3a1b189eceb8dea5ae6bd7ff82f482ca2aac6e4 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Mon, 16 Jan 2023 15:07:52 +0800 Subject: [PATCH 079/243] media: dvb-core: Fix use-after-free due to race condition occurring in dvb_ca_en50221 maillist inclusion category: bugfix issue: I63NCW CVE: CVE-2022-45919 Reference: https://lore.kernel.org/linux-media/20221121063308.GA33821@ubuntu/ Signed-off-by: gaochao --------------------------------------- If the device node of dvb_ca_en50221 is open() and the device is disconnected, a UAF may occur when calling close() on the device node. The root cause is that wake_up() and wait_event() for dvbdev->wait_queue are not implemented. So implement wait_event() function in dvb_ca_en50221_release() and add 'remove_mutex' which prevents race condition for 'ca->exit'. Signed-off-by: Hyunwoo Kim --- drivers/media/dvb-core/dvb_ca_en50221.c | 36 ++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/media/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb-core/dvb_ca_en50221.c index cfc27629444f..7accdd93f829 100644 --- a/drivers/media/dvb-core/dvb_ca_en50221.c +++ b/drivers/media/dvb-core/dvb_ca_en50221.c @@ -151,6 +151,12 @@ struct dvb_ca_private { /* mutex serializing ioctls */ struct mutex ioctl_mutex; + + /* A mutex used when a device is disconnected */ + struct mutex remove_mutex; + + /* Whether the device is disconnected */ + int exit; }; static void dvb_ca_private_free(struct dvb_ca_private *ca) @@ -1706,12 +1712,22 @@ static int dvb_ca_en50221_io_open(struct inode *inode, struct file *file) dprintk("%s\n", __func__); - if (!try_module_get(ca->pub->owner)) + mutex_lock(&ca->remove_mutex); + + if (ca->exit) { + mutex_unlock(&ca->remove_mutex); + return -ENODEV; + } + + if (!try_module_get(ca->pub->owner)) { + mutex_unlock(&ca->remove_mutex); return -EIO; + } err = dvb_generic_open(inode, file); if (err < 0) { module_put(ca->pub->owner); + mutex_unlock(&ca->remove_mutex); return err; } @@ -1736,6 +1752,7 @@ static int dvb_ca_en50221_io_open(struct inode *inode, struct file *file) dvb_ca_private_get(ca); + mutex_unlock(&ca->remove_mutex); return 0; } @@ -1755,6 +1772,8 @@ static int dvb_ca_en50221_io_release(struct inode *inode, struct file *file) dprintk("%s\n", __func__); + mutex_lock(&ca->remove_mutex); + /* mark the CA device as closed */ ca->open = 0; dvb_ca_en50221_thread_update_delay(ca); @@ -1765,6 +1784,12 @@ static int dvb_ca_en50221_io_release(struct inode *inode, struct file *file) dvb_ca_private_put(ca); + if (dvbdev->users == 1 && ca->exit == 1) { + mutex_unlock(&ca->remove_mutex); + wake_up(&dvbdev->wait_queue); + } else + mutex_unlock(&ca->remove_mutex); + return err; } @@ -1888,6 +1913,7 @@ int dvb_ca_en50221_init(struct dvb_adapter *dvb_adapter, } mutex_init(&ca->ioctl_mutex); + mutex_init(&ca->remove_mutex); if (signal_pending(current)) { ret = -EINTR; @@ -1930,6 +1956,14 @@ void dvb_ca_en50221_release(struct dvb_ca_en50221 *pubca) dprintk("%s\n", __func__); + mutex_lock(&ca->remove_mutex); + ca->exit = 1; + mutex_unlock(&ca->remove_mutex); + + if (ca->dvbdev->users < 1) + wait_event(ca->dvbdev->wait_queue, + ca->dvbdev->users == 1); + /* shutdown the thread if there was one */ kthread_stop(ca->thread); -- Gitee From 62f5d1b69b28e35d1ff2a4f5336f01fe3712fd49 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Wed, 16 Nov 2022 20:59:22 -0800 Subject: [PATCH 080/243] media: dvb-core: Fix use-after-free due to race condition occurring in dvb_frontend maillist inclusion category: bugfix issue: I63U5S CVE: CVE-2022-45884 CVE-2022-45885 CVE-2022-45886 CVE-2022-45887 Reference: https://lore.kernel.org/all/20221117045925.14297-2-imv4bel@gmail.com/ Signed-off-by: gaochao --------------------------------------- If the device node of dvb_frontend is open() and the device is disconnected, many kinds of UAFs may occur when calling close() on the device node. The root cause of this is that wake_up() for dvbdev->wait_queue is implemented in the dvb_frontend_release() function, but wait_event() is not implemented in the dvb_frontend_stop() function. So, implement wait_event() function in dvb_frontend_stop() and add 'remove_mutex' which prevents race condition for 'fe->exit'. Signed-off-by: Hyunwoo Kim --- drivers/media/dvb-core/dvb_frontend.c | 39 +++++++++++++++++++++++---- include/media/dvb_frontend.h | 6 ++++- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index 06ea30a689d7..a5e4ebf9ef19 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -804,6 +804,8 @@ static void dvb_frontend_stop(struct dvb_frontend *fe) dev_dbg(fe->dvb->device, "%s:\n", __func__); + mutex_lock(&fe->remove_mutex); + if (fe->exit != DVB_FE_DEVICE_REMOVED) fe->exit = DVB_FE_NORMAL_EXIT; mb(); @@ -813,6 +815,13 @@ static void dvb_frontend_stop(struct dvb_frontend *fe) kthread_stop(fepriv->thread); + mutex_unlock(&fe->remove_mutex); + + if (fepriv->dvbdev->users < -1) { + wait_event(fepriv->dvbdev->wait_queue, + fepriv->dvbdev->users == -1); + } + sema_init(&fepriv->sem, 1); fepriv->state = FESTATE_IDLE; @@ -2729,9 +2738,13 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) struct dvb_adapter *adapter = fe->dvb; int ret; + mutex_lock(&fe->remove_mutex); + dev_dbg(fe->dvb->device, "%s:\n", __func__); - if (fe->exit == DVB_FE_DEVICE_REMOVED) + if (fe->exit == DVB_FE_DEVICE_REMOVED) { + mutex_unlock(&fe->remove_mutex); return -ENODEV; + } if (adapter->mfe_shared) { mutex_lock(&adapter->mfe_lock); @@ -2752,8 +2765,10 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) while (mferetry-- && (mfedev->users != -1 || mfepriv->thread)) { if (msleep_interruptible(500)) { - if (signal_pending(current)) + if (signal_pending(current)) { + mutex_unlock(&fe->remove_mutex); return -EINTR; + } } } @@ -2765,6 +2780,7 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) if (mfedev->users != -1 || mfepriv->thread) { mutex_unlock(&adapter->mfe_lock); + mutex_unlock(&fe->remove_mutex); return -EBUSY; } adapter->mfe_dvbdev = dvbdev; @@ -2824,6 +2840,8 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) if (adapter->mfe_shared) mutex_unlock(&adapter->mfe_lock); + + mutex_unlock(&fe->remove_mutex); return ret; err3: @@ -2845,6 +2863,8 @@ static int dvb_frontend_open(struct inode *inode, struct file *file) err0: if (adapter->mfe_shared) mutex_unlock(&adapter->mfe_lock); + + mutex_unlock(&fe->remove_mutex); return ret; } @@ -2855,6 +2875,8 @@ static int dvb_frontend_release(struct inode *inode, struct file *file) struct dvb_frontend_private *fepriv = fe->frontend_priv; int ret; + mutex_lock(&fe->remove_mutex); + dev_dbg(fe->dvb->device, "%s:\n", __func__); if ((file->f_flags & O_ACCMODE) != O_RDONLY) { @@ -2876,11 +2898,17 @@ static int dvb_frontend_release(struct inode *inode, struct file *file) } mutex_unlock(&fe->dvb->mdev_lock); #endif - if (fe->exit != DVB_FE_NO_EXIT) - wake_up(&dvbdev->wait_queue); if (fe->ops.ts_bus_ctrl) fe->ops.ts_bus_ctrl(fe, 0); - } + + if (fe->exit != DVB_FE_NO_EXIT) { + mutex_unlock(&fe->remove_mutex); + wake_up(&dvbdev->wait_queue); + } else + mutex_unlock(&fe->remove_mutex); + + } else + mutex_unlock(&fe->remove_mutex); dvb_frontend_put(fe); @@ -2975,6 +3003,7 @@ int dvb_register_frontend(struct dvb_adapter *dvb, fepriv = fe->frontend_priv; kref_init(&fe->refcount); + mutex_init(&fe->remove_mutex); /* * After initialization, there need to be two references: one diff --git a/include/media/dvb_frontend.h b/include/media/dvb_frontend.h index 0d76fa4551b3..df9fbebe256d 100644 --- a/include/media/dvb_frontend.h +++ b/include/media/dvb_frontend.h @@ -680,7 +680,10 @@ struct dtv_frontend_properties { * @id: Frontend ID * @exit: Used to inform the DVB core that the frontend * thread should exit (usually, means that the hardware - * got disconnected. + * got disconnected.) + * @remove_mutex: mutex that avoids a race condition between a callback + * called when the hardware is disconnected and the + * file_operations of dvb_frontend */ struct dvb_frontend { @@ -698,6 +701,7 @@ struct dvb_frontend { int (*callback)(void *adapter_priv, int component, int cmd, int arg); int id; unsigned int exit; + struct mutex remove_mutex; }; /** -- Gitee From b42e66e769dbd916741e4cf020c91c19bc23c66b Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Wed, 16 Nov 2022 20:59:23 -0800 Subject: [PATCH 081/243] media: dvb-core: Fix use-after-free due to race condition occurring in dvb_net maillist inclusion category: bugfix issue: I63U5S CVE: CVE-2022-45884 CVE-2022-45885 CVE-2022-45886 CVE-2022-45887 Reference: https://lore.kernel.org/all/20221117045925.14297-2-imv4bel@gmail.com/ Signed-off-by: gaochao --------------------------------------- A race condition may occur between the .disconnect function, which is called when the device is disconnected, and the dvb_device_open() function, which is called when the device node is open()ed. This results in several types of UAFs. The root cause of this is that you use the dvb_device_open() function, which does not implement a conditional statement that checks 'dvbnet->exit'. So, add 'remove_mutex` to protect 'dvbnet->exit' and use locked_dvb_net_open() function to check 'dvbnet->exit'. Signed-off-by: Hyunwoo Kim --- drivers/media/dvb-core/dvb_net.c | 37 +++++++++++++++++++++++++++++--- include/media/dvb_net.h | 4 ++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c index dddebea644bb..3a8833b9f7d2 100644 --- a/drivers/media/dvb-core/dvb_net.c +++ b/drivers/media/dvb-core/dvb_net.c @@ -1564,15 +1564,42 @@ static long dvb_net_ioctl(struct file *file, return dvb_usercopy(file, cmd, arg, dvb_net_do_ioctl); } +static int locked_dvb_net_open(struct inode *inode, struct file *file) +{ + struct dvb_device *dvbdev = file->private_data; + struct dvb_net *dvbnet = dvbdev->priv; + int ret; + + if (mutex_lock_interruptible(&dvbnet->remove_mutex)) + return -ERESTARTSYS; + + if (dvbnet->exit) { + mutex_unlock(&dvbnet->remove_mutex); + return -ENODEV; + } + + ret = dvb_generic_open(inode, file); + + mutex_unlock(&dvbnet->remove_mutex); + + return ret; +} + static int dvb_net_close(struct inode *inode, struct file *file) { struct dvb_device *dvbdev = file->private_data; struct dvb_net *dvbnet = dvbdev->priv; + mutex_lock(&dvbnet->remove_mutex); + dvb_generic_release(inode, file); - if(dvbdev->users == 1 && dvbnet->exit == 1) + if (dvbdev->users == 1 && dvbnet->exit == 1) { + mutex_unlock(&dvbnet->remove_mutex); wake_up(&dvbdev->wait_queue); + } else + mutex_unlock(&dvbnet->remove_mutex); + return 0; } @@ -1580,7 +1607,7 @@ static int dvb_net_close(struct inode *inode, struct file *file) static const struct file_operations dvb_net_fops = { .owner = THIS_MODULE, .unlocked_ioctl = dvb_net_ioctl, - .open = dvb_generic_open, + .open = locked_dvb_net_open, .release = dvb_net_close, .llseek = noop_llseek, }; @@ -1599,10 +1626,13 @@ void dvb_net_release (struct dvb_net *dvbnet) { int i; + mutex_lock(&dvbnet->remove_mutex); dvbnet->exit = 1; + mutex_unlock(&dvbnet->remove_mutex); + if (dvbnet->dvbdev->users < 1) wait_event(dvbnet->dvbdev->wait_queue, - dvbnet->dvbdev->users==1); + dvbnet->dvbdev->users == 1); dvb_unregister_device(dvbnet->dvbdev); @@ -1621,6 +1651,7 @@ int dvb_net_init (struct dvb_adapter *adap, struct dvb_net *dvbnet, int i; mutex_init(&dvbnet->ioctl_mutex); + mutex_init(&dvbnet->remove_mutex); dvbnet->demux = dmx; for (i=0; i Date: Wed, 16 Nov 2022 20:59:24 -0800 Subject: [PATCH 082/243] media: dvb-core: Fix use-after-free due to race condition occurring in dvb_register_device() maillist inclusion category: bugfix issue: I63U5S CVE: CVE-2022-45884 CVE-2022-45885 CVE-2022-45886 CVE-2022-45887 Reference: https://lore.kernel.org/all/20221117045925.14297-2-imv4bel@gmail.com/ Signed-off-by: gaochao --------------------------------------- dvb_register_device() dynamically allocates fops with kmemdup() to set the fops->owner. And these fops are registered in 'file->f_ops' using replace_fops() in the dvb_device_open() process, and kfree()d in dvb_free_device(). However, it is not common to use dynamically allocated fops instead of 'static const' fops as an argument of replace_fops(), and UAF may occur. These UAFs can occur on any dvb type using dvb_register_device(), such as dvb_dvr, dvb_demux, dvb_frontend, dvb_net, etc. So, instead of kfree() the fops dynamically allocated in dvb_register_device() in dvb_free_device() called during the .disconnect() process, kfree() it collectively in exit_dvbdev() called when the dvbdev.c module is removed. Signed-off-by: Hyunwoo Kim Reported-by: kernel test robot Reported-by: Dan Carpenter --- drivers/media/dvb-core/dvbdev.c | 84 ++++++++++++++++++++++++--------- include/media/dvbdev.h | 15 ++++++ 2 files changed, 78 insertions(+), 21 deletions(-) diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c index 795d9bfaba5c..c1138da5d8fb 100644 --- a/drivers/media/dvb-core/dvbdev.c +++ b/drivers/media/dvb-core/dvbdev.c @@ -37,6 +37,7 @@ #include static DEFINE_MUTEX(dvbdev_mutex); +static LIST_HEAD(dvbdevfops_list); static int dvbdev_debug; module_param(dvbdev_debug, int, 0644); @@ -458,14 +459,15 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, enum dvb_device_type type, int demux_sink_pads) { struct dvb_device *dvbdev; - struct file_operations *dvbdevfops; + struct file_operations *dvbdevfops = NULL; + struct dvbdevfops_node *node = NULL, *new_node = NULL; struct device *clsdev; int minor; int id, ret; mutex_lock(&dvbdev_register_lock); - if ((id = dvbdev_get_free_id (adap, type)) < 0){ + if ((id = dvbdev_get_free_id (adap, type)) < 0) { mutex_unlock(&dvbdev_register_lock); *pdvbdev = NULL; pr_err("%s: couldn't find free device id\n", __func__); @@ -473,18 +475,45 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, } *pdvbdev = dvbdev = kzalloc(sizeof(*dvbdev), GFP_KERNEL); - if (!dvbdev){ mutex_unlock(&dvbdev_register_lock); return -ENOMEM; } - dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL); + /* + * When a device of the same type is probe()d more than once, + * the first allocated fops are used. This prevents memory leaks + * that can occur when the same device is probe()d repeatedly. + */ + list_for_each_entry(node, &dvbdevfops_list, list_head) { + if (node->fops->owner == adap->module && + node->type == type && + node->template == template) { + dvbdevfops = node->fops; + break; + } + } - if (!dvbdevfops){ - kfree (dvbdev); - mutex_unlock(&dvbdev_register_lock); - return -ENOMEM; + if (dvbdevfops == NULL) { + dvbdevfops = kmemdup(template->fops, sizeof(*dvbdevfops), GFP_KERNEL); + if (!dvbdevfops) { + kfree(dvbdev); + mutex_unlock(&dvbdev_register_lock); + return -ENOMEM; + } + + new_node = kzalloc(sizeof(struct dvbdevfops_node), GFP_KERNEL); + if (!new_node) { + kfree(dvbdevfops); + kfree(dvbdev); + mutex_unlock(&dvbdev_register_lock); + return -ENOMEM; + } + + new_node->fops = dvbdevfops; + new_node->type = type; + new_node->template = template; + list_add_tail (&new_node->list_head, &dvbdevfops_list); } memcpy(dvbdev, template, sizeof(struct dvb_device)); @@ -494,20 +523,20 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, dvbdev->priv = priv; dvbdev->fops = dvbdevfops; init_waitqueue_head (&dvbdev->wait_queue); - dvbdevfops->owner = adap->module; - list_add_tail (&dvbdev->list_head, &adap->device_list); - down_write(&minor_rwsem); #ifdef CONFIG_DVB_DYNAMIC_MINORS for (minor = 0; minor < MAX_DVB_MINORS; minor++) if (dvb_minors[minor] == NULL) break; - if (minor == MAX_DVB_MINORS) { + if (new_node) { + list_del (&new_node->list_head); + kfree(dvbdevfops); + kfree(new_node); + } list_del (&dvbdev->list_head); - kfree(dvbdevfops); kfree(dvbdev); up_write(&minor_rwsem); mutex_unlock(&dvbdev_register_lock); @@ -516,41 +545,47 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev, #else minor = nums2minor(adap->num, type, id); #endif - dvbdev->minor = minor; dvb_minors[minor] = dvbdev; up_write(&minor_rwsem); - ret = dvb_register_media_device(dvbdev, type, minor, demux_sink_pads); if (ret) { pr_err("%s: dvb_register_media_device failed to create the mediagraph\n", __func__); - + if (new_node) { + list_del (&new_node->list_head); + kfree(dvbdevfops); + kfree(new_node); + } dvb_media_device_free(dvbdev); list_del (&dvbdev->list_head); - kfree(dvbdevfops); kfree(dvbdev); mutex_unlock(&dvbdev_register_lock); return ret; } - mutex_unlock(&dvbdev_register_lock); - clsdev = device_create(dvb_class, adap->device, MKDEV(DVB_MAJOR, minor), dvbdev, "dvb%d.%s%d", adap->num, dnames[type], id); if (IS_ERR(clsdev)) { pr_err("%s: failed to create device dvb%d.%s%d (%ld)\n", __func__, adap->num, dnames[type], id, PTR_ERR(clsdev)); + if (new_node) { + list_del (&new_node->list_head); + kfree(dvbdevfops); + kfree(new_node); + } dvb_media_device_free(dvbdev); list_del (&dvbdev->list_head); - kfree(dvbdevfops); kfree(dvbdev); + mutex_unlock(&dvbdev_register_lock); return PTR_ERR(clsdev); } + dprintk("DVB: register adapter%d/%s%d @ minor: %i (0x%02x)\n", adap->num, dnames[type], id, minor, minor); + mutex_unlock(&dvbdev_register_lock); return 0; } EXPORT_SYMBOL(dvb_register_device); @@ -579,7 +614,6 @@ void dvb_free_device(struct dvb_device *dvbdev) if (!dvbdev) return; - kfree (dvbdev->fops); kfree (dvbdev); } EXPORT_SYMBOL(dvb_free_device); @@ -1071,9 +1105,17 @@ static int __init init_dvbdev(void) static void __exit exit_dvbdev(void) { + struct dvbdevfops_node *node, *next; + class_destroy(dvb_class); cdev_del(&dvb_device_cdev); unregister_chrdev_region(MKDEV(DVB_MAJOR, 0), MAX_DVB_MINORS); + + list_for_each_entry_safe(node, next, &dvbdevfops_list, list_head) { + list_del (&node->list_head); + kfree(node->fops); + kfree(node); + } } subsys_initcall(init_dvbdev); diff --git a/include/media/dvbdev.h b/include/media/dvbdev.h index e547cbeee431..2506a635b67b 100644 --- a/include/media/dvbdev.h +++ b/include/media/dvbdev.h @@ -187,6 +187,21 @@ struct dvb_device { void *priv; }; +/** + * struct dvbdevfops_node - fops nodes registered in dvbdevfops_list + * + * @fops: Dynamically allocated fops for ->owner registration + * @type: type of dvb_device + * @template: dvb_device used for registration + * @list_head: list_head for dvbdevfops_list + */ +struct dvbdevfops_node { + struct file_operations *fops; + enum dvb_device_type type; + const struct dvb_device *template; + struct list_head list_head; +}; + /** * dvb_register_adapter - Registers a new DVB adapter * -- Gitee From 31dc825ad4bb216e02b570a04f7913dd4547c861 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Wed, 16 Nov 2022 20:59:25 -0800 Subject: [PATCH 083/243] media: ttusb-dec: Fix memory leak in ttusb_dec_exit_dvb() maillist inclusion category: bugfix issue: I63U5S CVE: CVE-2022-45884 CVE-2022-45885 CVE-2022-45886 CVE-2022-45887 Reference: https://lore.kernel.org/all/20221117045925.14297-2-imv4bel@gmail.com/ Signed-off-by: gaochao --------------------------------------- Since dvb_frontend_detach() is not called in ttusb_dec_exit_dvb(), which is called when the device is disconnected, dvb_frontend_free() is not finally called. This causes a memory leak just by repeatedly plugging and unplugging the device. Fix this issue by adding dvb_frontend_detach() to ttusb_dec_exit_dvb(). Signed-off-by: Hyunwoo Kim --- drivers/media/usb/ttusb-dec/ttusb_dec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/usb/ttusb-dec/ttusb_dec.c b/drivers/media/usb/ttusb-dec/ttusb_dec.c index df6c5e4a0f05..68f88143c8a6 100644 --- a/drivers/media/usb/ttusb-dec/ttusb_dec.c +++ b/drivers/media/usb/ttusb-dec/ttusb_dec.c @@ -1551,8 +1551,7 @@ static void ttusb_dec_exit_dvb(struct ttusb_dec *dec) dvb_dmx_release(&dec->demux); if (dec->fe) { dvb_unregister_frontend(dec->fe); - if (dec->fe->ops.release) - dec->fe->ops.release(dec->fe); + dvb_frontend_detach(dec->fe); } dvb_unregister_adapter(&dec->adapter); } -- Gitee From 8f2b74c50166b5ddd59ea7464bcd0d774681d5d2 Mon Sep 17 00:00:00 2001 From: shenchenkai Date: Mon, 16 Jan 2023 07:03:43 +0000 Subject: [PATCH 084/243] hilog: fix dos error ohos inclusion category: bugfix bugzilla: NA issue: #I6AQCC CVE: NA ----------------------------------- fix hilog driver dos error Signed-off-by: shenchenkai --- drivers/staging/hilog/hilog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/hilog/hilog.c b/drivers/staging/hilog/hilog.c index af490cdaa291..4e6168ce787e 100644 --- a/drivers/staging/hilog/hilog.c +++ b/drivers/staging/hilog/hilog.c @@ -273,7 +273,7 @@ static void hilog_cover_old_log(size_t buf_len) static bool is_last_time_full; bool is_this_time_full = false; - while (total_size + hilog_dev.size >= HILOG_BUFFER) { + while (total_size + hilog_dev.size > HILOG_BUFFER) { retval = hilog_read_ring_head_buffer((unsigned char *)&header, sizeof(header)); if (retval < 0) -- Gitee From cf1ed408e1bd3684624ba2152502e8560d0b17c8 Mon Sep 17 00:00:00 2001 From: xuyong Date: Mon, 16 Jan 2023 20:24:57 +0800 Subject: [PATCH 085/243] hievent: fix dos error ohos inclusion category:bugfix issue:#I6AAW3 CVE:NA ------------------------------------- fix hievent driver dos error Signed-off-by: xuyong --- drivers/staging/hievent/hievent_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/hievent/hievent_driver.c b/drivers/staging/hievent/hievent_driver.c index b65dee9392a3..86363d11d5c2 100644 --- a/drivers/staging/hievent/hievent_driver.c +++ b/drivers/staging/hievent/hievent_driver.c @@ -194,7 +194,7 @@ static void hievent_cover_old_log(size_t buf_len) struct hievent_entry header; size_t total_size = buf_len + sizeof(struct hievent_entry); - while (total_size + hievent_dev.size >= HIEVENT_BUFFER) { + while (total_size + hievent_dev.size > HIEVENT_BUFFER) { retval = hievent_read_ring_head_buffer((unsigned char *)&header, sizeof(header)); if (retval < 0) -- Gitee From 83d9000051629b148647d49f9017183db5ffa6a3 Mon Sep 17 00:00:00 2001 From: linqiheng Date: Tue, 10 Jan 2023 15:13:53 +0000 Subject: [PATCH 086/243] hmdfs: avoid recv_task racing when concurrently handle cmds When CMD_UPDATE_SOCKET and CMD_OFF_LINE operate on the same fd concurrently, there is no lock protection after acquiring the same connect_handle, which is released and emptied by tcp_close_socket() before the wake_up_process() call. Signed-off-by: linqiheng --- fs/hmdfs/comm/device_node.c | 2 ++ fs/hmdfs/hmdfs.h | 1 + fs/hmdfs/main.c | 1 + 3 files changed, 4 insertions(+) diff --git a/fs/hmdfs/comm/device_node.c b/fs/hmdfs/comm/device_node.c index 7c2bac914bb4..0f2585de61fe 100644 --- a/fs/hmdfs/comm/device_node.c +++ b/fs/hmdfs/comm/device_node.c @@ -232,9 +232,11 @@ static ssize_t sbi_cmd_store(struct kobject *kobj, struct sbi_attribute *attr, hmdfs_err("Illegal cmd : cmd = %d", cmd); return len; } + mutex_lock(&sbi->cmd_handler_mutex); hmdfs_info("Recved cmd: %s", cmd2str(cmd)); if (cmd_handler[cmd]) cmd_handler[cmd](buf, len, sbi); + mutex_unlock(&sbi->cmd_handler_mutex); return len; } diff --git a/fs/hmdfs/hmdfs.h b/fs/hmdfs/hmdfs.h index 9157c55ba392..45fcf9f9b3f3 100644 --- a/fs/hmdfs/hmdfs.h +++ b/fs/hmdfs/hmdfs.h @@ -169,6 +169,7 @@ struct hmdfs_sb_info { /* To bridge the userspace utils */ struct kfifo notify_fifo; spinlock_t notify_fifo_lock; + struct mutex cmd_handler_mutex; /* For reboot detect */ uint64_t boot_cookie; diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c index 53d698917969..8075fe4bc5c9 100644 --- a/fs/hmdfs/main.c +++ b/fs/hmdfs/main.c @@ -692,6 +692,7 @@ static int hmdfs_init_sbi(struct hmdfs_sb_info *sbi) ret = 0; spin_lock_init(&sbi->notify_fifo_lock); + mutex_init(&sbi->cmd_handler_mutex); sbi->s_case_sensitive = false; sbi->s_features = HMDFS_FEATURE_READPAGES | HMDFS_FEATURE_READPAGES_OPEN | -- Gitee From 63b48ca603fd1e4dcf3d0352080652ab9886375c Mon Sep 17 00:00:00 2001 From: Qiheng Lin Date: Tue, 17 Jan 2023 11:46:07 +0800 Subject: [PATCH 087/243] hmdfs: fix UAF of mdi(or d_fsdata) merge_lookup_async() adds the lookup works, and merge_lookup_work_func() consumes the works. The mdi->work_lock should protect the mdi->work_count to avoid d_release_merge free the mdi(or d_fsdata). Signed-off-by: Qiheng Lin Change-Id: I31a688940ee710467a6350a51f986ad40bb615cb --- fs/hmdfs/inode_merge.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/hmdfs/inode_merge.c b/fs/hmdfs/inode_merge.c index c020c8c201fa..c5cd7bdc4905 100644 --- a/fs/hmdfs/inode_merge.c +++ b/fs/hmdfs/inode_merge.c @@ -408,6 +408,7 @@ static void merge_lookup_work_func(struct work_struct *work) comrade = merge_lookup_comrade(ml_work->sbi, ml_work->name, ml_work->devid, ml_work->flags); if (IS_ERR(comrade)) { + mutex_lock(&mdi->work_lock); goto out; } @@ -504,6 +505,7 @@ static int lookup_merge_normal(struct dentry *dentry, unsigned int flags) goto out_ppath; } + mutex_lock(&mdi->work_lock); mutex_lock(&sbi->connections.node_lock); if (mdi->type != DT_REG || devid == 0) { snprintf(cpath, PATH_MAX, "device_view/local%s/%s", ppath, @@ -524,6 +526,7 @@ static int lookup_merge_normal(struct dentry *dentry, unsigned int flags) hmdfs_err("failed to create remote lookup work"); } mutex_unlock(&sbi->connections.node_lock); + mutex_unlock(&mdi->work_lock); wait_event(mdi->wait_queue, is_merge_lookup_end(mdi)); -- Gitee From eb9c6f5946ec208e984be8c0e67968427af625ac Mon Sep 17 00:00:00 2001 From: Qiheng Lin Date: Tue, 17 Jan 2023 16:52:07 +0800 Subject: [PATCH 088/243] hmdfs: fix client readdir dentry file leak Signed-off-by: Qiheng Lin Change-Id: I34b7de075e891f7bcc8669f99a7fbc67c6a49a7d --- fs/hmdfs/comm/socket_adapter.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/hmdfs/comm/socket_adapter.c b/fs/hmdfs/comm/socket_adapter.c index 7005303e3735..0404c2a79d3a 100644 --- a/fs/hmdfs/comm/socket_adapter.c +++ b/fs/hmdfs/comm/socket_adapter.c @@ -439,13 +439,13 @@ int hmdfs_sendmessage_request(struct hmdfs_peer *con, msg_wq = kzalloc(sizeof(*msg_wq), GFP_KERNEL); if (!msg_wq) { ret = -ENOMEM; - goto free; + goto free_filp; } ret = msg_init(con, msg_wq); if (ret) { kfree(msg_wq); msg_wq = NULL; - goto free; + goto free_filp; } dec = true; head->msg_id = cpu_to_le32(msg_wq->head.msg_id); @@ -513,6 +513,7 @@ int hmdfs_sendmessage_request(struct hmdfs_peer *con, free_filp: if (sm->local_filp) fput(sm->local_filp); + kfree(head); return ret; } -- Gitee From 15812bde9dde397866b958017a05f5433d8fc322 Mon Sep 17 00:00:00 2001 From: Bing-Jhong Billy Jheng Date: Thu, 15 Dec 2022 06:43:56 -0800 Subject: [PATCH 089/243] io_uring: add missing item types for splice request stable inclusion from stable-v5.10.160 commit 75454b4bbfc7e6a4dd8338556f36ea9107ddf61a category: bugfix issue: I6ANAQ CVE: CVE-2022-4696 Signed-off-by: gaochao --------------------------------------- Splice is like read/write and should grab current->nsproxy, denoted by IO_WQ_WORK_FILES as it refers to current->files as well Signed-off-by: Bing-Jhong Billy Jheng Reviewed-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 8b0f67489fff..55166d9d7d13 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -935,7 +935,7 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, .hash_reg_file = 1, .unbound_nonreg_file = 1, - .work_flags = IO_WQ_WORK_BLKCG, + .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FILES, }, [IORING_OP_PROVIDE_BUFFERS] = {}, [IORING_OP_REMOVE_BUFFERS] = {}, -- Gitee From 9b261a7560f6299ffc6b2ca421e57d261312e829 Mon Sep 17 00:00:00 2001 From: zhangkaixiang Date: Tue, 17 Jan 2023 18:53:18 +0800 Subject: [PATCH 090/243] hmdfs: fix up the problem of readdir UAF ohos inclusion category: bugfix issue: #I6AX84 CVE: NA ---------------------------------------------- fix up the problem of readdir UAF Signed-off-by: zhangkaixiang --- fs/hmdfs/inode_remote.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/hmdfs/inode_remote.c b/fs/hmdfs/inode_remote.c index 73d459bf4533..c9bd5275c8fd 100644 --- a/fs/hmdfs/inode_remote.c +++ b/fs/hmdfs/inode_remote.c @@ -121,12 +121,13 @@ static void hmdfs_remote_readdir_work(struct work_struct *work) hmdfs_d(dentry)->async_readdir_in_progress = 0; hmdfs_revert_creds(old_cred); - dput(dentry); - peer_put(con); spin_lock(&con->sbi->async_readdir_work_lock); list_del(&rw->head); empty = list_empty(&con->sbi->async_readdir_work_list); spin_unlock(&con->sbi->async_readdir_work_lock); + + dput(dentry); + peer_put(con); kfree(rw); if (empty) -- Gitee From 41d01b6ed2b3db65585dd45d7968a45049def0ec Mon Sep 17 00:00:00 2001 From: zhangkaixiang Date: Tue, 17 Jan 2023 19:13:13 +0800 Subject: [PATCH 091/243] hmdfs: fix up the problem of param size limitation ohos inclusion category: bugfix issue: I6AX9W CVE: NA ---------------------------------------------- fix up the problem of param size limitation Signed-off-by: zhangkaixiang --- fs/hmdfs/super.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/hmdfs/super.c b/fs/hmdfs/super.c index a4b1170153b3..18f222c6d4cc 100644 --- a/fs/hmdfs/super.c +++ b/fs/hmdfs/super.c @@ -81,7 +81,13 @@ int hmdfs_parse_options(struct hmdfs_sb_info *sbi, const char *data) unsigned int user_id = 0; struct super_block *sb = sbi->sb; int err = 0; + size_t size = 0; + size = strlen(data); + if (size >= HMDFS_PAGE_SIZE) { + return -EINVAL; + } + options = kstrdup(data, GFP_KERNEL); if (data && !options) { err = -ENOMEM; -- Gitee From a40b5adddd23051da2fba3e94aa300f022488015 Mon Sep 17 00:00:00 2001 From: Qiheng Lin Date: Sun, 29 Jan 2023 16:33:26 +0800 Subject: [PATCH 092/243] hmdfs: remove d_rehash in hmdfs_fill_super d_rehash would add the root_dentry to the dcache hlist, and kill_anon_super--generic_shutdown_super--shrink_dcache_for_umount --do_one_tree() calls d_drop() to unhash the dentry and dput() to destroy the dentry. While multiple hmdfs instances umount concurrently, their root_dentry are removed from dcache hlist. ___d_drop() use IS_ROOT() to judge them and use the d_sb->s_roots rather than the d_hash lock, so cause UAF when __hlist_bl_del accessing the ->pprev dentery. Signed-off-by: Qiheng Lin Change-Id: I5f72bd076174a2e92ad04320cac5a412a4087cb2 --- fs/hmdfs/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c index 8075fe4bc5c9..cb3034e95651 100644 --- a/fs/hmdfs/main.c +++ b/fs/hmdfs/main.c @@ -904,7 +904,6 @@ static int hmdfs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_freeroot; hmdfs_set_lower_path(root_dentry, &lower_path); - d_rehash(sb->s_root); sbi->cred = get_cred(current_cred()); INIT_LIST_HEAD(&sbi->client_cache); INIT_LIST_HEAD(&sbi->server_cache); -- Gitee From e01d1f62c7e9b2f1f45875e2ce193cefabc8d1af Mon Sep 17 00:00:00 2001 From: futurezhou Date: Thu, 29 Dec 2022 07:03:37 +0000 Subject: [PATCH 093/243] Add Enhanced Proxy File System support Signed-off-by: futurezhou --- fs/Kconfig | 1 + fs/Makefile | 1 + fs/epfs/Kconfig | 12 ++ fs/epfs/Makefile | 3 + fs/epfs/dentry.c | 23 ++++ fs/epfs/dir.c | 18 +++ fs/epfs/epfs.h | 43 +++++++ fs/epfs/file.c | 296 +++++++++++++++++++++++++++++++++++++++++++++ fs/epfs/inode.c | 111 +++++++++++++++++ fs/epfs/internal.h | 39 ++++++ fs/epfs/main.c | 44 +++++++ fs/epfs/super.c | 127 +++++++++++++++++++ 12 files changed, 718 insertions(+) create mode 100644 fs/epfs/Kconfig create mode 100644 fs/epfs/Makefile create mode 100644 fs/epfs/dentry.c create mode 100644 fs/epfs/dir.c create mode 100644 fs/epfs/epfs.h create mode 100644 fs/epfs/file.c create mode 100644 fs/epfs/inode.c create mode 100644 fs/epfs/internal.h create mode 100644 fs/epfs/main.c create mode 100644 fs/epfs/super.c diff --git a/fs/Kconfig b/fs/Kconfig index b95f212be39e..25e1a90d6491 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -346,6 +346,7 @@ endif # NETWORK_FILESYSTEMS source "fs/nls/Kconfig" source "fs/dlm/Kconfig" source "fs/unicode/Kconfig" +source "fs/epfs/Kconfig" config IO_WQ bool diff --git a/fs/Makefile b/fs/Makefile index d71954aaba20..3bdbd67b67d5 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -137,3 +137,4 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ obj-$(CONFIG_EROFS_FS) += erofs/ obj-$(CONFIG_VBOXSF_FS) += vboxsf/ obj-$(CONFIG_ZONEFS_FS) += zonefs/ +obj-$(CONFIG_EPFS) += epfs/ diff --git a/fs/epfs/Kconfig b/fs/epfs/Kconfig new file mode 100644 index 000000000000..059c3a0cc10d --- /dev/null +++ b/fs/epfs/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +config EPFS + tristate "Enhanced Proxy File System support" + depends on TMPFS + help + Enhanced Proxy File System support. If unsure, say N. + +config EPFS_DEBUG + tristate "Debug message of Enhanced Proxy File System" + depends on EPFS + help + Enhanced Proxy File System debug support. diff --git a/fs/epfs/Makefile b/fs/epfs/Makefile new file mode 100644 index 000000000000..b7375e6f9ee0 --- /dev/null +++ b/fs/epfs/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_EPFS) += epfs.o +epfs-y := main.o super.o dentry.o inode.o file.o dir.o diff --git a/fs/epfs/dentry.c b/fs/epfs/dentry.c new file mode 100644 index 000000000000..62299eccd4ef --- /dev/null +++ b/fs/epfs/dentry.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/main.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include "internal.h" + +static int epfs_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + return 1; +} + +static void epfs_d_release(struct dentry *dentry) +{ +} + +const struct dentry_operations epfs_dops = { + .d_revalidate = epfs_d_revalidate, + .d_release = epfs_d_release, +}; diff --git a/fs/epfs/dir.c b/fs/epfs/dir.c new file mode 100644 index 000000000000..875057a86517 --- /dev/null +++ b/fs/epfs/dir.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/dir.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include + +#include "internal.h" + +static int epfs_iterate(struct file *file, struct dir_context *ctx) +{ + return 0; +} + +const struct file_operations epfs_dir_fops = { .iterate = epfs_iterate }; diff --git a/fs/epfs/epfs.h b/fs/epfs/epfs.h new file mode 100644 index 000000000000..19e66e145d1a --- /dev/null +++ b/fs/epfs/epfs.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/epfs/epfs.h + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#ifndef __FS_EPFS_H__ +#define __FS_EPFS_H__ + +#include +#include +#include + +#define EPFS_MAX_RANGES 127 + +struct __attribute__((__packed__)) epfs_range { + __u64 num; + __u64 reserved; + struct { + __u64 begin; + __u64 end; + } range[0]; +}; + +#define EPFS_IOCTL_MAGIC 0x71 +#define IOC_SET_ORIGIN_FD _IOW(EPFS_IOCTL_MAGIC, 1, __s32) +#define IOC_SET_EPFS_RANGE _IOW(EPFS_IOCTL_MAGIC, 2, struct epfs_range) +#define EPFS_IOCTL_MAXNR 3 + +#define EPFS_TAG "Epfs" + +#define epfs_err(fmt, ...) \ + pr_err("%s:%s:%d: " fmt, EPFS_TAG, __func__, __LINE__, ##__VA_ARGS__) +#define epfs_info(fmt, ...) \ + pr_info("%s:%s:%d: " fmt, EPFS_TAG, __func__, __LINE__, ##__VA_ARGS__) +#define epfs_warn(fmt, ...) \ + pr_warn("%s:%s:%d: " fmt, EPFS_TAG, __func__, __LINE__, ##__VA_ARGS__) +#define epfs_debug(fmt, ...) \ + pr_debug("%s:%s:%d: " fmt, EPFS_TAG, __func__, __LINE__, ##__VA_ARGS__) + +#endif // __FS_EPFS_H__ diff --git a/fs/epfs/file.c b/fs/epfs/file.c new file mode 100644 index 000000000000..5b236150f8b4 --- /dev/null +++ b/fs/epfs/file.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/file.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include +#include +#include +#include +#include + +#include "internal.h" + +long epfs_set_origin_fd(struct file *file, unsigned long arg) +{ + int fd = -1; + struct file *origin_file; + struct inode *inode = file->f_inode; + struct epfs_inode_info *info = epfs_inode_to_private(inode); + int ret = 0; + + if (copy_from_user(&fd, (int *)arg, sizeof(fd))) + return -EFAULT; + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug("original fd: %d", fd); + origin_file = fget(fd); + if (!origin_file) { + epfs_err("Original file not exist!"); + return -EBADF; + } + + mutex_lock(&info->lock); + if (info->origin_file) { + // origin_file had been set. + ret = -EEXIST; + fput(origin_file); + } else if (file_inode(origin_file) == inode) { + epfs_err("Could not set itself as origin_file!"); + fput(origin_file); + ret = -EINVAL; + } else { + info->origin_file = origin_file; + fsstack_copy_attr_all(inode, file_inode(origin_file)); + fsstack_copy_inode_size(inode, file_inode(origin_file)); + } + mutex_unlock(&info->lock); + return ret; +} + +int check_range(struct epfs_range *range) +{ + __u64 index; + + if (range->range[0].begin >= range->range[0].end) { + epfs_err("Invalid range: [%llu, %llu)", range->range[0].begin, + range->range[0].end); + return -EINVAL; + } + + for (index = 1; index < range->num; index++) { + if ((range->range[index].begin >= range->range[index].end) || + (range->range[index].begin < range->range[index - 1].end)) { + epfs_err("Invalid range: [%llu, %llu), [%llu, %llu)", + range->range[index - 1].begin, + range->range[index - 1].end, + range->range[index].begin, + range->range[index].end); + return -EINVAL; + } + } + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) { + epfs_debug("epfs_range recv %llu ranges:", range->num); + for (index = 0; index < range->num; index++) { + epfs_debug("range:[%llu %llu)", + range->range[index].begin, + range->range[index].end); + } + epfs_debug("\n"); + } + return 0; +} + +long epfs_set_range(struct file *file, unsigned long arg) +{ + struct inode *inode = file->f_inode; + struct inode *origin_inode; + struct epfs_inode_info *info = epfs_inode_to_private(inode); + int ret = 0; + struct epfs_range *range; + struct epfs_range header; + + mutex_lock(&info->lock); + if (!info->origin_file) { + epfs_err("origin file not exist!"); + ret = -EBADF; + goto out_set_range; + } + origin_inode = info->origin_file->f_inode; + if (!in_group_p(origin_inode->i_gid)) { + epfs_err("Only group member can set range: %u", + i_gid_read(origin_inode)); + ret = -EACCES; + goto out_set_range; + } + + if (copy_from_user(&header, (struct epfs_range *)arg, + sizeof(header))) { + ret = -EFAULT; + epfs_err("get header failed!"); + goto out_set_range; + } + + if (header.num > EPFS_MAX_RANGES || header.num == 0) { + ret = -EINVAL; + epfs_err("illegal num: %llu", header.num); + goto out_set_range; + } + + range = kzalloc(sizeof(header) + sizeof(header.range[0]) * header.num, + GFP_KERNEL); + if (!range) { + ret = -ENOMEM; + goto out_set_range; + } + + if (copy_from_user(range, (struct epfs_range *)arg, + sizeof(header) + sizeof(header.range[0]) * header.num)) { + ret = -EFAULT; + epfs_err("Failed to get range! num: %llu", header.num); + kfree(range); + goto out_set_range; + } + + ret = check_range(range); + if (ret) { + kfree(range); + goto out_set_range; + } + + info->range = range; +out_set_range: + mutex_unlock(&info->lock); + return ret; +} + +static long __epfs_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + long rc = -ENOTTY; + + if (unlikely(_IOC_TYPE(cmd) != EPFS_IOCTL_MAGIC)) { + epfs_err("Failed to check epfs magic: %u", _IOC_TYPE(cmd)); + return -ENOTTY; + } + if (unlikely(_IOC_NR(cmd) >= EPFS_IOCTL_MAXNR)) { + epfs_err("Failed to check ioctl number: %u", _IOC_NR(cmd)); + return -ENOTTY; + } + if (unlikely(!access_ok((void __user *)arg, _IOC_SIZE(cmd)))) { + epfs_err("Failed to check user address space range!"); + return -EFAULT; + } + + switch (cmd) { + case IOC_SET_ORIGIN_FD: + return epfs_set_origin_fd(file, arg); + case IOC_SET_EPFS_RANGE: + return epfs_set_range(file, arg); + default: + epfs_info("Exit epfs unsupported ioctl, ret: %ld", rc); + return rc; + } +} + +static long epfs_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return __epfs_ioctl(file, cmd, arg); +} + +static long epfs_unlocked_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return __epfs_ioctl(file, cmd, arg); +} + +static ssize_t epfs_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct inode *inode = file_inode(file); + struct epfs_inode_info *info = epfs_inode_to_private(inode); + struct file *origin_file; + struct epfs_range *range; + ssize_t ret = 0; + loff_t pos = *ppos; + loff_t file_size; + int current_range_index = 0; + + mutex_lock(&info->lock); + range = info->range; + if (!range) { + ret = -EINVAL; + epfs_err("Invalid inode range!"); + goto out_read; + } + + origin_file = info->origin_file; + + if (!origin_file) { + ret = -ENOENT; + epfs_err("origin file not exist!"); + goto out_read; + } + + // Reduce count when it will read over file size. + file_size = i_size_read(file_inode(origin_file)); + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + if (count > (file_size - pos)) + epfs_debug( + "count will be truncated to %llu, as file_size=%llu, pos=%llu", + file_size - pos, file_size, pos); + count = count <= (file_size - pos) ? count : (file_size - pos); + + // Skip ranges before pos. + while ((range->range[current_range_index].end <= pos) && + (current_range_index < range->num)) + current_range_index++; + + while (count > 0) { + __u64 current_begin, current_end; + + if (current_range_index >= range->num) { + // read directly when epfs range gone; + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug( + "read from %llu with len %lu at the end.", + pos, count); + ret = vfs_read(origin_file, buf, count, &pos); + break; + } + current_begin = range->range[current_range_index].begin; + current_end = range->range[current_range_index].end; + if (current_begin <= pos) { + // Clear user memory + unsigned long clear_len = current_end - pos; + + clear_len = clear_len < count ? clear_len : count; + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug( + "clear user memory from %llu with len %lu", + pos, clear_len); + if (clear_user(buf, clear_len)) { + ret = EFAULT; + break; + } + buf += clear_len; + pos += clear_len; + count -= clear_len; + current_range_index++; + } else { + // Read from pos to (next)current_begin + unsigned long read_len = current_begin - pos; + + read_len = read_len < count ? read_len : count; + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug( + "read from %llu with len %lu", + pos, read_len); + ret = vfs_read(origin_file, buf, read_len, &pos); + if (ret < 0 || ret < read_len) { + // Could not read enough bytes; + break; + } + buf += ret; + count -= ret; + } + } + + if (ret >= 0) { + ret = pos - *ppos; + *ppos = pos; + } +out_read: + mutex_unlock(&info->lock); + return ret; +} + +const struct file_operations epfs_file_fops = { + .unlocked_ioctl = epfs_unlocked_ioctl, + .compat_ioctl = epfs_compat_ioctl, + .read = epfs_read, + .llseek = generic_file_llseek, +}; diff --git a/fs/epfs/inode.c b/fs/epfs/inode.c new file mode 100644 index 000000000000..3510fdf7b467 --- /dev/null +++ b/fs/epfs/inode.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/inode.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include +#include +#include + +#include "internal.h" + +#define USER_DATA_RW 1008 +#define USER_DATA_RW_UID KUIDT_INIT(USER_DATA_RW) +#define USER_DATA_RW_GID KGIDT_INIT(USER_DATA_RW) + +struct dentry *epfs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + return ERR_PTR(-ENOENT); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) +static int epfs_tmpfile(struct user_namespace *, struct inode *dir, + struct dentry *dentry, umode_t mode) +#else +static int epfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +#endif +{ + struct inode *inode = epfs_iget(dir->i_sb, false); + + if (!inode) + return -ENOSPC; + d_tmpfile(dentry, inode); + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug("epfs: tmpfile %p", inode); + return 0; +} + +const struct inode_operations epfs_dir_iops = { + .tmpfile = epfs_tmpfile, + .lookup = epfs_lookup, +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) +static int epfs_getattr(struct user_namespace *mnt_userns, + const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +#else +static int epfs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +#endif +{ + struct dentry *dentry = path->dentry; + struct inode *inode = d_inode(dentry); + struct epfs_inode_info *info = epfs_inode_to_private(inode); + struct file *origin_file; + struct kstat origin_stat; + int ret; + + mutex_lock(&info->lock); + origin_file = info->origin_file; + if (!origin_file) { + ret = -ENOENT; + goto out_getattr; + } + ret = vfs_getattr(&(origin_file->f_path), &origin_stat, request_mask, + flags); + if (ret) + goto out_getattr; + fsstack_copy_attr_all(inode, file_inode(origin_file)); + fsstack_copy_inode_size(inode, file_inode(origin_file)); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) + generic_fillattr(mnt_userns, d_inode(dentry), stat); +#else + generic_fillattr(d_inode(dentry), stat); +#endif + stat->blocks = origin_stat.blocks; + +out_getattr: + mutex_unlock(&info->lock); + return ret; +} + +const struct inode_operations epfs_file_iops = { + .getattr = epfs_getattr, +}; + +struct inode *epfs_iget(struct super_block *sb, bool is_dir) +{ + struct inode *inode = new_inode(sb); + + if (!inode) { + epfs_err("Failed to allocate new inode"); + return NULL; + } + if (is_dir) { + inode->i_op = &epfs_dir_iops; + inode->i_fop = &epfs_dir_fops; + inode->i_mode = S_IFDIR | 0770; + } else { + inode->i_op = &epfs_file_iops; + inode->i_fop = &epfs_file_fops; + inode->i_mode = S_IFREG; + } + inode->i_uid = USER_DATA_RW_UID; + inode->i_gid = USER_DATA_RW_GID; + return inode; +} diff --git a/fs/epfs/internal.h b/fs/epfs/internal.h new file mode 100644 index 000000000000..9895ffbc015e --- /dev/null +++ b/fs/epfs/internal.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/epfs/internal.h + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#ifndef __FS_EPFS_INTERNAL_H__ +#define __FS_EPFS_INTERNAL_H__ + +#include +#include +#include + +#include "epfs.h" + +#define EPFS_SUPER_MAGIC 0x20220607 + +struct epfs_inode_info { + struct inode vfs_inode; + struct file *origin_file; + struct epfs_range *range; + struct mutex lock; +}; + +static inline struct epfs_inode_info *epfs_inode_to_private(struct inode *inode) +{ + return container_of(inode, struct epfs_inode_info, vfs_inode); +} + +struct inode *epfs_iget(struct super_block *sb, bool is_dir); +extern const struct dentry_operations epfs_dops; +extern const struct file_operations epfs_dir_fops; +extern const struct file_operations epfs_file_fops; +extern struct file_system_type epfs_fs_type; +extern struct kmem_cache *epfs_inode_cachep; + +#endif // __FS_EPFS_INTERNAL_H__ diff --git a/fs/epfs/main.c b/fs/epfs/main.c new file mode 100644 index 000000000000..c91e94f8f15e --- /dev/null +++ b/fs/epfs/main.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/main.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include +#include +#include + +#include "internal.h" + +struct kmem_cache *epfs_inode_cachep; + +static int __init epfs_init(void) +{ + int ret; + + epfs_inode_cachep = + kmem_cache_create("epfs_inode_cache", + sizeof(struct epfs_inode_info), 0, 0, + NULL); + if (!epfs_inode_cachep) + return -ENOMEM; + ret = register_filesystem(&epfs_fs_type); + if (ret) + kmem_cache_destroy(epfs_inode_cachep); + return ret; +} + +static void __exit epfs_exit(void) +{ + unregister_filesystem(&epfs_fs_type); + kmem_cache_destroy(epfs_inode_cachep); +} + +module_init(epfs_init) +module_exit(epfs_exit) +MODULE_DESCRIPTION("Enhanced Proxy File System for OpenHarmony"); +MODULE_AUTHOR("LongPing Wei weilongping@huawei.com"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_FS("epfs"); diff --git a/fs/epfs/super.c b/fs/epfs/super.c new file mode 100644 index 000000000000..4d708f855d1f --- /dev/null +++ b/fs/epfs/super.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs/epfs/super.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + * Author: weilongping@huawei.com + * Create: 2022-06-10 + */ +#include +#include +#include +#include +#include + +#include "internal.h" + +static struct inode *epfs_alloc_inode(struct super_block *sb) +{ + struct epfs_inode_info *info = + kmem_cache_zalloc(epfs_inode_cachep, GFP_KERNEL); + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug("inode info: %p", info); + inode_init_once(&info->vfs_inode); + mutex_init(&info->lock); + return &info->vfs_inode; +} + +// Free epfs_inode_info +static void epfs_free_inode(struct inode *inode) +{ + if (IS_ENABLED(CONFIG_EPFS_DEBUG)) + epfs_debug("free_inode: %p", inode); + kmem_cache_free(epfs_inode_cachep, + epfs_inode_to_private(inode)); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) +static void i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + + epfs_free_inode(inode); +} +#endif + +// Destroy epfs_range +static void epfs_destroy_inode(struct inode *inode) +{ + struct epfs_inode_info *info = epfs_inode_to_private(inode); + + mutex_lock(&info->lock); + kfree(info->range); + info->range = NULL; + mutex_unlock(&info->lock); +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) + call_rcu(&inode->i_rcu, i_callback); +#endif +} + +// Clear vfs_inode +static void epfs_evict_inode(struct inode *inode) +{ + struct epfs_inode_info *info = epfs_inode_to_private(inode); + + clear_inode(inode); + mutex_lock(&info->lock); + if (info->origin_file) { + fput(info->origin_file); + info->origin_file = NULL; + } + mutex_unlock(&info->lock); +} + +static int epfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + buf->f_type = EPFS_SUPER_MAGIC; + return 0; +} +struct super_operations epfs_sops = { + .alloc_inode = epfs_alloc_inode, + .destroy_inode = epfs_destroy_inode, +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) + .free_inode = epfs_free_inode, +#endif + .evict_inode = epfs_evict_inode, + .statfs = epfs_statfs, +}; + +static int epfs_fill_super(struct super_block *s, void *data, int silent) +{ + struct inode *inode; + + s->s_op = &epfs_sops; + s->s_d_op = &epfs_dops; + s->s_magic = EPFS_SUPER_MAGIC; + inode = epfs_iget(s, true /* dir */); + if (!inode) { + epfs_err("Failed to get root inode!"); + return -ENOMEM; + } + + s->s_root = d_make_root(inode); + if (!s->s_root) { + epfs_err("Failed to make root inode"); + return -ENOMEM; + } + + return 0; +} + +struct dentry *epfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + return mount_nodev(fs_type, flags, raw_data, epfs_fill_super); +} + +void epfs_kill_sb(struct super_block *sb) +{ + kill_anon_super(sb); +} + +struct file_system_type epfs_fs_type = { + .owner = THIS_MODULE, + .name = "epfs", + .mount = epfs_mount, + .kill_sb = epfs_kill_sb, +}; -- Gitee From 90717ed67067c0e5cb0ed53b9882dc22ceaba671 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 25 Nov 2020 23:48:40 +0100 Subject: [PATCH 094/243] net: switch to storing KCOV handle directly in sk_buff mainline inclusion from v5.11-rc1 commit fa69ee5aa48b5b52e8028c2eb486906e9998d081 category: bugfix issue: I6AUB0 CVE: NA Reference: https://patchwork.kernel.org/project/netdevbpf/patch/20201125224840.2014773-1-elver@google.com/ Signed-off-by: gaochao --------------------------------------- It turns out that usage of skb extensions can cause memory leaks. Ido Schimmel reported: "[...] there are instances that blindly overwrite 'skb->extensions' by invoking skb_copy_header() after __alloc_skb()." Therefore, give up on using skb extensions for KCOV handle, and instead directly store kcov_handle in sk_buff. Fixes: 6370cc3bbd8a ("net: add kcov handle to skb extensions") Fixes: 85ce50d337d1 ("net: kcov: don't select SKB_EXTENSIONS when there is no NET") Fixes: 97f53a08cba1 ("net: linux/skbuff.h: combine SKB_EXTENSIONS + KCOV handling") Link: https://lore.kernel.org/linux-wireless/20201121160941.GA485907@shredder.lan/ Reported-by: Ido Schimmel Signed-off-by: Marco Elver Link: https://lore.kernel.org/r/20201125224840.2014773-1-elver@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 37 +++++++++++++------------------------ lib/Kconfig.debug | 1 - net/core/skbuff.c | 6 ------ 3 files changed, 13 insertions(+), 31 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a77b38e738e9..8724736f00a7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -702,6 +702,7 @@ typedef unsigned char *sk_buff_data_t; * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header + * @kcov_handle: KCOV remote handle for remote coverage collection * @tail: Tail pointer * @end: End pointer * @head: Head of buffer @@ -906,6 +907,10 @@ struct sk_buff { __u16 network_header; __u16 mac_header; +#ifdef CONFIG_KCOV + u64 kcov_handle; +#endif + /* private: */ __u32 headers_end[0]; /* public: */ @@ -4152,9 +4157,6 @@ enum skb_ext_id { #endif #if IS_ENABLED(CONFIG_MPTCP) SKB_EXT_MPTCP, -#endif -#if IS_ENABLED(CONFIG_KCOV) - SKB_EXT_KCOV_HANDLE, #endif SKB_EXT_NUM, /* must be last */ }; @@ -4610,35 +4612,22 @@ static inline void skb_reset_redirect(struct sk_buff *skb) #endif } -#if IS_ENABLED(CONFIG_KCOV) && IS_ENABLED(CONFIG_SKB_EXTENSIONS) static inline void skb_set_kcov_handle(struct sk_buff *skb, const u64 kcov_handle) { - /* Do not allocate skb extensions only to set kcov_handle to zero - * (as it is zero by default). However, if the extensions are - * already allocated, update kcov_handle anyway since - * skb_set_kcov_handle can be called to zero a previously set - * value. - */ - if (skb_has_extensions(skb) || kcov_handle) { - u64 *kcov_handle_ptr = skb_ext_add(skb, SKB_EXT_KCOV_HANDLE); - - if (kcov_handle_ptr) - *kcov_handle_ptr = kcov_handle; - } +#ifdef CONFIG_KCOV + skb->kcov_handle = kcov_handle; +#endif } static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { - u64 *kcov_handle = skb_ext_find(skb, SKB_EXT_KCOV_HANDLE); - - return kcov_handle ? *kcov_handle : 0; -} +#ifdef CONFIG_KCOV + return skb->kcov_handle; #else -static inline void skb_set_kcov_handle(struct sk_buff *skb, - const u64 kcov_handle) { } -static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { return 0; } -#endif /* CONFIG_KCOV && CONFIG_SKB_EXTENSIONS */ + return 0; +#endif +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 95f909540587..e163560a4155 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1868,7 +1868,6 @@ config KCOV depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS select DEBUG_FS select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC - select SKB_EXTENSIONS if NET help KCOV exposes kernel code coverage information in a form suitable for coverage-guided fuzzing (randomized testing). diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0215ae898e83..bb6242373261 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4257,9 +4257,6 @@ static const u8 skb_ext_type_len[] = { #if IS_ENABLED(CONFIG_MPTCP) [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext), #endif -#if IS_ENABLED(CONFIG_KCOV) - [SKB_EXT_KCOV_HANDLE] = SKB_EXT_CHUNKSIZEOF(u64), -#endif }; static __always_inline unsigned int skb_ext_total_length(void) @@ -4276,9 +4273,6 @@ static __always_inline unsigned int skb_ext_total_length(void) #endif #if IS_ENABLED(CONFIG_MPTCP) skb_ext_type_len[SKB_EXT_MPTCP] + -#endif -#if IS_ENABLED(CONFIG_KCOV) - skb_ext_type_len[SKB_EXT_KCOV_HANDLE] + #endif 0; } -- Gitee From 2aecc4958962dbba34f191bfc1d2005b5e07f11e Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 1 Jan 2023 16:57:43 -0500 Subject: [PATCH 095/243] net: sched: atm: dont intepret cls results when asked to drop stable inclusion from stable-5.10.163 commit 5f65f48516bfeebaab1ccc52c8fad698ddf21282 category: bugfix issue: I6B4G7 CVE: CVE-2023-23455 Signed-off-by: gaochao --------------------------------------- [ Upstream commit a2965c7be0522eaa18808684b7b82b248515511b ] If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume res.class contains a valid pointer Fixes: b0188d4dbe5f ("[NET_SCHED]: sch_atm: Lindent") Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_atm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 1c281cc81f57..5e0d55ac9c5d 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -396,10 +396,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, result = tcf_classify(skb, fl, &res, true); if (result < 0) continue; + if (result == TC_ACT_SHOT) + goto done; + flow = (struct atm_flow_data *)res.class; if (!flow) flow = lookup_flow(sch, res.classid); - goto done; + goto drop; } } flow = NULL; -- Gitee From eccbb97707eb41641399e78c5e7052f8f4cd6c81 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 1 Jan 2023 16:57:44 -0500 Subject: [PATCH 096/243] net: sched: cbq: dont intepret cls results when asked to drop stable inclusion from stable-5.10.163 commit b2c917e510e5ddbc7896329c87d20036c8b82952 category: bugfix issue: I6B4G5 CVE: CVE-2023-23454 Signed-off-by: gaochao --------------------------------------- [ Upstream commit caa4b35b4317d5147b3ab0fbdc9c075c7d2e9c12 ] If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume that res.class contains a valid pointer Sample splat reported by Kyle Zeng [ 5.405624] 0: reclassify loop, rule prio 0, protocol 800 [ 5.406326] ================================================================== [ 5.407240] BUG: KASAN: slab-out-of-bounds in cbq_enqueue+0x54b/0xea0 [ 5.407987] Read of size 1 at addr ffff88800e3122aa by task poc/299 [ 5.408731] [ 5.408897] CPU: 0 PID: 299 Comm: poc Not tainted 5.10.155+ #15 [ 5.409516] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [ 5.410439] Call Trace: [ 5.410764] dump_stack+0x87/0xcd [ 5.411153] print_address_description+0x7a/0x6b0 [ 5.411687] ? vprintk_func+0xb9/0xc0 [ 5.411905] ? printk+0x76/0x96 [ 5.412110] ? cbq_enqueue+0x54b/0xea0 [ 5.412323] kasan_report+0x17d/0x220 [ 5.412591] ? cbq_enqueue+0x54b/0xea0 [ 5.412803] __asan_report_load1_noabort+0x10/0x20 [ 5.413119] cbq_enqueue+0x54b/0xea0 [ 5.413400] ? __kasan_check_write+0x10/0x20 [ 5.413679] __dev_queue_xmit+0x9c0/0x1db0 [ 5.413922] dev_queue_xmit+0xc/0x10 [ 5.414136] ip_finish_output2+0x8bc/0xcd0 [ 5.414436] __ip_finish_output+0x472/0x7a0 [ 5.414692] ip_finish_output+0x5c/0x190 [ 5.414940] ip_output+0x2d8/0x3c0 [ 5.415150] ? ip_mc_finish_output+0x320/0x320 [ 5.415429] __ip_queue_xmit+0x753/0x1760 [ 5.415664] ip_queue_xmit+0x47/0x60 [ 5.415874] __tcp_transmit_skb+0x1ef9/0x34c0 [ 5.416129] tcp_connect+0x1f5e/0x4cb0 [ 5.416347] tcp_v4_connect+0xc8d/0x18c0 [ 5.416577] __inet_stream_connect+0x1ae/0xb40 [ 5.416836] ? local_bh_enable+0x11/0x20 [ 5.417066] ? lock_sock_nested+0x175/0x1d0 [ 5.417309] inet_stream_connect+0x5d/0x90 [ 5.417548] ? __inet_stream_connect+0xb40/0xb40 [ 5.417817] __sys_connect+0x260/0x2b0 [ 5.418037] __x64_sys_connect+0x76/0x80 [ 5.418267] do_syscall_64+0x31/0x50 [ 5.418477] entry_SYSCALL_64_after_hwframe+0x61/0xc6 [ 5.418770] RIP: 0033:0x473bb7 [ 5.418952] Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2a 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 18 89 54 24 0c 48 89 34 24 89 [ 5.420046] RSP: 002b:00007fffd20eb0f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a [ 5.420472] RAX: ffffffffffffffda RBX: 00007fffd20eb578 RCX: 0000000000473bb7 [ 5.420872] RDX: 0000000000000010 RSI: 00007fffd20eb110 RDI: 0000000000000007 [ 5.421271] RBP: 00007fffd20eb150 R08: 0000000000000001 R09: 0000000000000004 [ 5.421671] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 [ 5.422071] R13: 00007fffd20eb568 R14: 00000000004fc740 R15: 0000000000000002 [ 5.422471] [ 5.422562] Allocated by task 299: [ 5.422782] __kasan_kmalloc+0x12d/0x160 [ 5.423007] kasan_kmalloc+0x5/0x10 [ 5.423208] kmem_cache_alloc_trace+0x201/0x2e0 [ 5.423492] tcf_proto_create+0x65/0x290 [ 5.423721] tc_new_tfilter+0x137e/0x1830 [ 5.423957] rtnetlink_rcv_msg+0x730/0x9f0 [ 5.424197] netlink_rcv_skb+0x166/0x300 [ 5.424428] rtnetlink_rcv+0x11/0x20 [ 5.424639] netlink_unicast+0x673/0x860 [ 5.424870] netlink_sendmsg+0x6af/0x9f0 [ 5.425100] __sys_sendto+0x58d/0x5a0 [ 5.425315] __x64_sys_sendto+0xda/0xf0 [ 5.425539] do_syscall_64+0x31/0x50 [ 5.425764] entry_SYSCALL_64_after_hwframe+0x61/0xc6 [ 5.426065] [ 5.426157] The buggy address belongs to the object at ffff88800e312200 [ 5.426157] which belongs to the cache kmalloc-128 of size 128 [ 5.426955] The buggy address is located 42 bytes to the right of [ 5.426955] 128-byte region [ffff88800e312200, ffff88800e312280) [ 5.427688] The buggy address belongs to the page: [ 5.427992] page:000000009875fabc refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xe312 [ 5.428562] flags: 0x100000000000200(slab) [ 5.428812] raw: 0100000000000200 dead000000000100 dead000000000122 ffff888007843680 [ 5.429325] raw: 0000000000000000 0000000000100010 00000001ffffffff ffff88800e312401 [ 5.429875] page dumped because: kasan: bad access detected [ 5.430214] page->mem_cgroup:ffff88800e312401 [ 5.430471] [ 5.430564] Memory state around the buggy address: [ 5.430846] ffff88800e312180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 5.431267] ffff88800e312200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fc [ 5.431705] >ffff88800e312280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 5.432123] ^ [ 5.432391] ffff88800e312300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fc [ 5.432810] ffff88800e312380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 5.433229] ================================================================== [ 5.433648] Disabling lock debugging due to kernel taint Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Kyle Zeng Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_cbq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 4a78fcf5d4f9..01853de86bc0 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -231,6 +231,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) result = tcf_classify(skb, fl, &res, true); if (!fl || result < 0) goto fallback; + if (result == TC_ACT_SHOT) + return NULL; cl = (void *)res.class; if (!cl) { @@ -251,8 +253,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; fallthrough; - case TC_ACT_SHOT: - return NULL; case TC_ACT_RECLASSIFY: return cbq_reclassify(skb, cl); } -- Gitee From df552385e7150654eb3be75de38cc8a9ec63b2a8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 11 Jan 2023 17:07:33 +0100 Subject: [PATCH 097/243] netfilter: nft_payload: incorrect arithmetics when fetching VLAN header bits stable inclusion from stable-5.10.164 commit 550efeff989b041f3746118c0ddd863c39ddc1aa category: bugfix issue: I6B4FD CVE: CVE-2023-0179 Signed-off-by: gaochao --------------------------------------- commit 696e1a48b1a1b01edad542a1ef293665864a4dd0 upstream. If the offset + length goes over the ethernet + vlan header, then the length is adjusted to copy the bytes that are within the boundaries of the vlan_ethhdr scratchpad area. The remaining bytes beyond ethernet + vlan header are copied directly from the skbuff data area. Fix incorrect arithmetic operator: subtract, not add, the size of the vlan header in case of double-tagged packets to adjust the length accordingly to address CVE-2023-0179. Reported-by: Davide Ornaghi Fixes: f6ae9f120dad ("netfilter: nft_payload: add C-VLAN support") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_payload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 6a8495bd08bb..e726c893ffbd 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -62,7 +62,7 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) return false; if (offset + len > VLAN_ETH_HLEN + vlan_hlen) - ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen; + ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen; memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen); -- Gitee From 86a10faeb0370248e03ca23f0974f0aab60ce747 Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Sun, 29 Jan 2023 06:47:43 +0000 Subject: [PATCH 098/243] [Backport] rndis_wlan: Prevent buffer overflow in rndis_query_oid maillist inclusion category: bugfix issue: I6AZ32 CVE: CVE-2023-23559 Reference: https://patchwork.kernel.org/project/linux-wireless/patch/20230111175031.7049-1-szymon.heidrich@gmail.com/ Signed-off-by: gaochao --------------------------------------- Since resplen and respoffs are signed integers sufficiently large values of unsigned int len and offset members of RNDIS response will result in negative values of prior variables. This may be utilized to bypass implemented security checks to either extract memory contents by manipulating offset or overflow the data buffer via memcpy by manipulating both offset and len. Additionally assure that sum of resplen and respoffs does not overflow so buffer boundaries are kept. Fixes: 80f8c5b434f9 ("rndis_wlan: copy only useful data from rndis_command respond") Signed-off-by: Szymon Heidrich --- drivers/net/wireless/rndis_wlan.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 75b5d545b49e..dc076d844868 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -694,8 +694,8 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len) struct rndis_query *get; struct rndis_query_c *get_c; } u; - int ret, buflen; - int resplen, respoffs, copylen; + int ret; + size_t buflen, resplen, respoffs, copylen; buflen = *len + sizeof(*u.get); if (buflen < CONTROL_BUFFER_SIZE) @@ -730,22 +730,15 @@ static int rndis_query_oid(struct usbnet *dev, u32 oid, void *data, int *len) if (respoffs > buflen) { /* Device returned data offset outside buffer, error. */ - netdev_dbg(dev->net, "%s(%s): received invalid " - "data offset: %d > %d\n", __func__, - oid_to_string(oid), respoffs, buflen); + netdev_dbg(dev->net, + "%s(%s): received invalid data offset: %zu > %zu\n", + __func__, oid_to_string(oid), respoffs, buflen); ret = -EINVAL; goto exit_unlock; } - if ((resplen + respoffs) > buflen) { - /* Device would have returned more data if buffer would - * have been big enough. Copy just the bits that we got. - */ - copylen = buflen - respoffs; - } else { - copylen = resplen; - } + copylen = min(resplen, buflen - respoffs); if (copylen > *len) copylen = *len; -- Gitee From c03daac9ff061efb375ae28f624dc239902e038e Mon Sep 17 00:00:00 2001 From: Frederick Lawler Date: Mon, 9 Jan 2023 10:39:06 -0600 Subject: [PATCH 099/243] net: sched: disallow noqueue for qdisc classes stable inclusion from stable-5.10.163 commit 9f7bc28a6b8afc2274e25650511555e93f45470f category: bugfix issue: I6BIHC CVE: CVE-2022-47929 Signed-off-by: gaochao --------------------------------------- commit 96398560f26aa07e8f2969d73c8197e6a6d10407 upstream. While experimenting with applying noqueue to a classful queue discipline, we discovered a NULL pointer dereference in the __dev_queue_xmit() path that generates a kernel OOPS: # dev=enp0s5 # tc qdisc replace dev $dev root handle 1: htb default 1 # tc class add dev $dev parent 1: classid 1:1 htb rate 10mbit # tc qdisc add dev $dev parent 1:1 handle 10: noqueue # ping -I $dev -w 1 -c 1 1.1.1.1 [ 2.172856] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 2.173217] #PF: supervisor instruction fetch in kernel mode ... [ 2.178451] Call Trace: [ 2.178577] [ 2.178686] htb_enqueue+0x1c8/0x370 [ 2.178880] dev_qdisc_enqueue+0x15/0x90 [ 2.179093] __dev_queue_xmit+0x798/0xd00 [ 2.179305] ? _raw_write_lock_bh+0xe/0x30 [ 2.179522] ? __local_bh_enable_ip+0x32/0x70 [ 2.179759] ? ___neigh_create+0x610/0x840 [ 2.179968] ? eth_header+0x21/0xc0 [ 2.180144] ip_finish_output2+0x15e/0x4f0 [ 2.180348] ? dst_output+0x30/0x30 [ 2.180525] ip_push_pending_frames+0x9d/0xb0 [ 2.180739] raw_sendmsg+0x601/0xcb0 [ 2.180916] ? _raw_spin_trylock+0xe/0x50 [ 2.181112] ? _raw_spin_unlock_irqrestore+0x16/0x30 [ 2.181354] ? get_page_from_freelist+0xcd6/0xdf0 [ 2.181594] ? sock_sendmsg+0x56/0x60 [ 2.181781] sock_sendmsg+0x56/0x60 [ 2.181958] __sys_sendto+0xf7/0x160 [ 2.182139] ? handle_mm_fault+0x6e/0x1d0 [ 2.182366] ? do_user_addr_fault+0x1e1/0x660 [ 2.182627] __x64_sys_sendto+0x1b/0x30 [ 2.182881] do_syscall_64+0x38/0x90 [ 2.183085] entry_SYSCALL_64_after_hwframe+0x63/0xcd ... [ 2.187402] Previously in commit d66d6c3152e8 ("net: sched: register noqueue qdisc"), NULL was set for the noqueue discipline on noqueue init so that __dev_queue_xmit() falls through for the noqueue case. This also sets a bypass of the enqueue NULL check in the register_qdisc() function for the struct noqueue_disc_ops. Classful queue disciplines make it past the NULL check in __dev_queue_xmit() because the discipline is set to htb (in this case), and then in the call to __dev_xmit_skb(), it calls into htb_enqueue() which grabs a leaf node for a class and then calls qdisc_enqueue() by passing in a queue discipline which assumes ->enqueue() is not set to NULL. Fix this by not allowing classes to be assigned to the noqueue discipline. Linux TC Notes states that classes cannot be set to the noqueue discipline. [1] Let's enforce that here. Links: 1. https://linux-tc-notes.sourceforge.net/tc/doc/sch_noqueue.txt Fixes: d66d6c3152e8 ("net: sched: register noqueue qdisc") Cc: stable@vger.kernel.org Signed-off-by: Frederick Lawler Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20230109163906.706000-1-fred@cloudflare.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 7b24582a8a16..faf8d0393da2 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1113,6 +1113,11 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, return -ENOENT; } + if (new && new->ops == &noqueue_qdisc_ops) { + NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class"); + return -EINVAL; + } + err = cops->graft(parent, cl, new, &old, extack); if (err) return err; -- Gitee From 1b81aa0e65b04452d448f2537d411468d4816538 Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Fri, 29 Jul 2022 19:02:16 +0800 Subject: [PATCH 100/243] i2c: ismt: Fix an out-of-bounds bug in ismt_access() mainline inclusion from mainline-6.2-rc1 commit 39244cc754829bf707dccd12e2ce37510f5b1f8d category: bugfix issue: I5O7R3 CVE: CVE-2022-2873 Signed-off-by: gaochao --------------------------------------- When the driver does not check the data from the user, the variable 'data->block[0]' may be very large to cause an out-of-bounds bug. The following log can reveal it: [ 33.995542] i2c i2c-1: ioctl, cmd=0x720, arg=0x7ffcb3dc3a20 [ 33.995978] ismt_smbus 0000:00:05.0: I2C_SMBUS_BLOCK_DATA: WRITE [ 33.996475] ================================================================== [ 33.996995] BUG: KASAN: out-of-bounds in ismt_access.cold+0x374/0x214b [ 33.997473] Read of size 18446744073709551615 at addr ffff88810efcfdb1 by task ismt_poc/485 [ 33.999450] Call Trace: [ 34.001849] memcpy+0x20/0x60 [ 34.002077] ismt_access.cold+0x374/0x214b [ 34.003382] __i2c_smbus_xfer+0x44f/0xfb0 [ 34.004007] i2c_smbus_xfer+0x10a/0x390 [ 34.004291] i2cdev_ioctl_smbus+0x2c8/0x710 [ 34.005196] i2cdev_ioctl+0x5ec/0x74c Fix this bug by checking the size of 'data->block[0]' first. Fixes: 13f35ac14cd0 ("i2c: Adding support for Intel iSMT SMBus 2.0 host controller") Signed-off-by: Zheyu Ma Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-ismt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index a35a27c320e7..2d88627d1813 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -501,6 +501,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, if (read_write == I2C_SMBUS_WRITE) { /* Block Write */ dev_dbg(dev, "I2C_SMBUS_BLOCK_DATA: WRITE\n"); + if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + dma_size = data->block[0] + 1; dma_direction = DMA_TO_DEVICE; desc->wr_len_cmd = dma_size; -- Gitee From 755efea68125214345f279b370b49117491b2c32 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Jan 2023 08:59:06 +0800 Subject: [PATCH 101/243] ipv6: raw: Deduct extension header length in rawv6_push_pending_frames stable inclusion from stable-5.10.164 commit 6c9e2c11c33c35563d34d12b343d43b5c12200b5 category: bugfix issue: I6BQ51 CVE: CVE-2023-0394 Signed-off-by: gaochao --------------------------------------- commit cb3e9864cdbe35ff6378966660edbcbac955fe17 upstream. The total cork length created by ip6_append_data includes extension headers, so we must exclude them when comparing them against the IPV6_CHECKSUM offset which does not include extension headers. Reported-by: Kyle Zeng Fixes: 357b40a18b04 ("[IPV6]: IPV6_CHECKSUM socket option can corrupt kernel memory") Signed-off-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/raw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 38349054e361..c5da559dda17 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -539,6 +539,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { + struct ipv6_txoptions *opt; struct sk_buff *skb; int err = 0; int offset; @@ -556,6 +557,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, offset = rp->offset; total_len = inet_sk(sk)->cork.base.length; + opt = inet6_sk(sk)->cork.opt; + total_len -= opt ? opt->opt_flen : 0; + if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); -- Gitee From 2c8b78a0577ef7e3662c4feea24fcad8b5432a81 Mon Sep 17 00:00:00 2001 From: zhangkaixiang Date: Mon, 20 Feb 2023 14:40:22 +0800 Subject: [PATCH 102/243] add sharefs file system Signed-off-by: zhangkaixiang --- fs/Kconfig | 1 + fs/Makefile | 1 + fs/sharefs/Kconfig | 6 + fs/sharefs/Makefile | 12 ++ fs/sharefs/authentication.c | 78 ++++++++ fs/sharefs/authentication.h | 68 +++++++ fs/sharefs/config.c | 372 ++++++++++++++++++++++++++++++++++++ fs/sharefs/dentry.c | 41 ++++ fs/sharefs/file.c | 241 +++++++++++++++++++++++ fs/sharefs/inode.c | 134 +++++++++++++ fs/sharefs/lookup.c | 329 +++++++++++++++++++++++++++++++ fs/sharefs/main.c | 193 +++++++++++++++++++ fs/sharefs/sharefs.h | 242 +++++++++++++++++++++++ fs/sharefs/super.c | 202 ++++++++++++++++++++ 14 files changed, 1920 insertions(+) create mode 100644 fs/sharefs/Kconfig create mode 100644 fs/sharefs/Makefile create mode 100644 fs/sharefs/authentication.c create mode 100644 fs/sharefs/authentication.h create mode 100644 fs/sharefs/config.c create mode 100644 fs/sharefs/dentry.c create mode 100644 fs/sharefs/file.c create mode 100644 fs/sharefs/inode.c create mode 100644 fs/sharefs/lookup.c create mode 100644 fs/sharefs/main.c create mode 100644 fs/sharefs/sharefs.h create mode 100644 fs/sharefs/super.c diff --git a/fs/Kconfig b/fs/Kconfig index b95f212be39e..05c389c7a600 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -23,6 +23,7 @@ config FS_IOMAP source "fs/ext2/Kconfig" source "fs/ext4/Kconfig" source "fs/hmdfs/Kconfig" +source "fs/sharefs/Kconfig" source "fs/jbd2/Kconfig" config FS_MBCACHE diff --git a/fs/Makefile b/fs/Makefile index d71954aaba20..d00c7a2e854e 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -72,6 +72,7 @@ obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT4_FS) += ext4/ obj-$(CONFIG_HMDFS_FS) += hmdfs/ +obj-$(CONFIG_SHARE_FS) += sharefs/ # We place ext4 before ext2 so that clean ext3 root fs's do NOT mount using the # ext2 driver, which doesn't know about journalling! Explicitly request ext2 # by giving the rootfstype= parameter. diff --git a/fs/sharefs/Kconfig b/fs/sharefs/Kconfig new file mode 100644 index 000000000000..bce24bfdc21e --- /dev/null +++ b/fs/sharefs/Kconfig @@ -0,0 +1,6 @@ +config SHARE_FS + tristate "SHAREFS filesystem support" + help + SHAREFS is an overlay file system.SHAREFS is used for file sharing + between applications. Sharefs manages permissions through different + permissions for reading and writing directories. diff --git a/fs/sharefs/Makefile b/fs/sharefs/Makefile new file mode 100644 index 000000000000..9b84e26d1cf6 --- /dev/null +++ b/fs/sharefs/Makefile @@ -0,0 +1,12 @@ +obj-$(CONFIG_SHARE_FS) += sharefs.o +ccflags-y += -I$(src) + +sharefs-y := dentry.o file.o inode.o main.o super.o lookup.o authentication.o config.o +ccflags-y += -I$(src) -Werror -Wall +export CONFIG_SHARE_FS := m +KDIR ::= /lib/modules/$(shell uname -r)/build +PWD := $(shell pwd) +all: + $(MAKE) -C $(KDIR) M=$(PWD) modules +clean: + $(MAKE) -C $(KDIR) M=$(PWD) clean \ No newline at end of file diff --git a/fs/sharefs/authentication.c b/fs/sharefs/authentication.c new file mode 100644 index 000000000000..71bd2d7275fd --- /dev/null +++ b/fs/sharefs/authentication.c @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/sharefs/authentication.c + * + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ +#include "authentication.h" + +static inline __u16 perm_get_next_level(__u16 perm) +{ + __u16 level = (perm & SHAREFS_PERM_MASK) + 1; + + if (level <= SHAREFS_PERM_OTHER) + return level; + else + return SHAREFS_PERM_OTHER; +} + +void fixup_perm_from_level(struct inode *dir, struct dentry *dentry) +{ + struct sharefs_inode_info *hii = SHAREFS_I(dir); + struct inode *dinode = d_inode(dentry); + struct sharefs_inode_info *dinfo = SHAREFS_I(dinode); + const unsigned char* cur_name = dentry->d_name.name; + __u16 level = perm_get_next_level(hii->perm); + __u16 perm = 0; + int bid = 0; + + if (IS_ERR_OR_NULL(dinode)) + return; + dinode->i_uid = dir->i_uid; + dinode->i_gid = dir->i_gid; + switch (level) + { + case SHAREFS_PERM_MNT: + bid = get_bundle_uid(SHAREFS_SB(dentry->d_sb), + dentry->d_name.name); + perm = level; + if (bid != 0) { + dinode->i_uid = KUIDT_INIT(bid); + dinode->i_gid = KGIDT_INIT(bid); + } else { + dinode->i_uid = ROOT_UID; + dinode->i_gid = ROOT_GID; + } + dinode->i_mode = (dinode->i_mode & S_IFMT) | SHAREFS_PERM_READONLY_DIR; + break; + case SHAREFS_PERM_DFS: + if (!strcmp(cur_name, SHAREFS_READ_DIR)) { + perm = SHAREFS_DIR_TYPE_READONLY | level; + sharefs_set_read_perm(dinode); + } else if (!strcmp(cur_name, SHAREFS_READWRITE_DIR)) { + perm = SHAREFS_DIR_TYPE_READWRITE | level; + sharefs_set_read_write_perm(dinode); + } + break; + case SHAREFS_PERM_OTHER: + if (is_read_only_auth(hii->perm)) { + perm = SHAREFS_DIR_TYPE_READONLY | SHAREFS_PERM_DFS; + sharefs_set_read_perm(dinode); + } else if (is_read_write_auth(hii->perm)) { + perm = SHAREFS_DIR_TYPE_READWRITE | SHAREFS_PERM_DFS; + sharefs_set_read_write_perm(dinode); + } + break; + default: + /* ! it should not get to here */ + sharefs_err("sharedfs perm incorrect got default case, level:%u", level); + break; + } + dinfo->perm = perm; +} + +void sharefs_root_inode_perm_init(struct inode *root_inode) +{ + struct sharefs_inode_info *hii = SHAREFS_I(root_inode); + hii->perm = SHAREFS_PERM_FIX; +} \ No newline at end of file diff --git a/fs/sharefs/authentication.h b/fs/sharefs/authentication.h new file mode 100644 index 000000000000..84a1a8dcd660 --- /dev/null +++ b/fs/sharefs/authentication.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/sharefs/authentication.h + * + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include "sharefs.h" + +#define OID_ROOT 0 + +#define SHAREFS_PERM_MASK 0x000F + +#define SHAREFS_PERM_FIX 0 +#define SHAREFS_PERM_MNT 1 +#define SHAREFS_PERM_DFS 2 +#define SHAREFS_PERM_OTHER 3 + +#define SHAREFS_READ_DIR "r" +#define SHAREFS_READWRITE_DIR "rw" + +#define BASE_USER_RANGE 200000 /* offset for uid ranges for each user */ + + +#define SHAREFS_DIR_TYPE_MASK 0x00F0 +#define SHAREFS_DIR_TYPE_READONLY 0x0010 +#define SHAREFS_DIR_TYPE_READWRITE 0x0020 + +#define SHAREFS_PERM_READONLY_DIR 00550 +#define SHAREFS_PERM_READONLY_FILE 00440 +#define SHAREFS_PERM_READWRITE_DIR 00550 +#define SHAREFS_PERM_READWRITE_FILE 00660 + +extern int get_bid_config(const char *bname); +extern int __init sharefs_init_configfs(void); +extern void sharefs_exit_configfs(void); + +void sharefs_root_inode_perm_init(struct inode *root_inode); +void fixup_perm_from_level(struct inode *dir, struct dentry *dentry); + +static inline bool is_read_only_auth(__u16 perm) +{ + return (perm & SHAREFS_DIR_TYPE_MASK) == SHAREFS_DIR_TYPE_READONLY; +} + +static inline bool is_read_write_auth(__u16 perm) +{ + return (perm & SHAREFS_DIR_TYPE_MASK) == SHAREFS_DIR_TYPE_READWRITE; +} + +static inline void sharefs_set_read_perm(struct inode *inode) { + if (S_ISDIR(inode->i_mode)) + inode->i_mode = (inode->i_mode & S_IFMT) | SHAREFS_PERM_READONLY_DIR; + else + inode->i_mode = (inode->i_mode & S_IFMT) | SHAREFS_PERM_READONLY_FILE; +} + +static inline void sharefs_set_read_write_perm(struct inode *inode) { + if (S_ISDIR(inode->i_mode)) + inode->i_mode = (inode->i_mode & S_IFMT) | SHAREFS_PERM_READWRITE_DIR; + else + inode->i_mode = (inode->i_mode & S_IFMT) | SHAREFS_PERM_READWRITE_FILE; +} + +static inline int get_bundle_uid(struct sharefs_sb_info *sbi, const char *bname) +{ + return sbi->user_id * BASE_USER_RANGE + get_bid_config(bname); +} diff --git a/fs/sharefs/config.c b/fs/sharefs/config.c new file mode 100644 index 000000000000..9bea53e555ad --- /dev/null +++ b/fs/sharefs/config.c @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs/sharefs/config.c + * + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include "sharefs.h" + +static struct kmem_cache *sharefs_bid_entry_cachep; + +struct sharefs_bid_entry { + struct hlist_node node; + struct qstr str; + int id; +}; + +struct sharefs_config_bitem { + struct config_item item; + struct qstr str; +}; + +static unsigned int make_hash(const char *name, unsigned int len) +{ + unsigned long hash; + + hash = init_name_hash(0); + while (len--) + hash = partial_name_hash(tolower(*name++), hash); + + return end_name_hash(hash); +} + +static struct qstr make_qstr(const char *name) +{ + struct qstr str; + str.name = name; + str.len = strlen(name); + str.hash = make_hash(str.name, str.len); + + return str; +} + +static struct sharefs_bid_entry *alloc_bid_entry(const char *name, int id) +{ + struct sharefs_bid_entry *bid_entry; + char *bid_entry_name; + + bid_entry = kmem_cache_alloc(sharefs_bid_entry_cachep, GFP_KERNEL); + if (!bid_entry) { + bid_entry = ERR_PTR(-ENOMEM); + goto out; + } + + bid_entry_name = kstrdup(name, GFP_KERNEL); + if (!bid_entry_name) { + kmem_cache_free(sharefs_bid_entry_cachep, bid_entry); + bid_entry = ERR_PTR(-ENOMEM); + goto out; + } + + INIT_HLIST_NODE(&bid_entry->node); + bid_entry->str = make_qstr(bid_entry_name); + bid_entry->id = id; +out: + return bid_entry; +} + +static void free_bid_entry(struct sharefs_bid_entry *bid_entry) +{ + if (bid_entry == NULL) + return; + + kfree(bid_entry->str.name); + kmem_cache_free(sharefs_bid_entry_cachep, bid_entry); +} + +static struct sharefs_config_bitem *alloc_bitem(const char *name) +{ + struct sharefs_config_bitem *bitem; + char *bitem_name; + + bitem = kzalloc(sizeof(*bitem), GFP_KERNEL); + if (!bitem) { + bitem = ERR_PTR(-ENOMEM); + goto out; + } + + bitem_name = kstrdup(name, GFP_KERNEL); + if (!bitem_name) { + kfree(bitem); + bitem = ERR_PTR(-ENOMEM); + goto out; + } + + bitem->str = make_qstr(bitem_name); +out: + return bitem; +} + +static void free_bitem(struct sharefs_config_bitem *bitem) +{ + if (bitem == NULL) + return; + + kfree(bitem->str.name); + kfree(bitem); +} + +#define SHAREFS_BUNDLE_ATTRIBUTE(_attr_) \ + \ +static DEFINE_HASHTABLE(sharefs_##_attr_##_hash_table, 4); \ + \ +static DEFINE_MUTEX(sharefs_##_attr_##_hash_mutex); \ + \ +static int query_##_attr_##_hash_entry(struct qstr *str) \ +{ \ + int id = 0; \ + struct sharefs_bid_entry *bid_entry; \ + struct hlist_node *hash_node; \ + \ + mutex_lock(&sharefs_##_attr_##_hash_mutex); \ + hash_for_each_possible_safe(sharefs_##_attr_##_hash_table, \ + bid_entry, hash_node, node, str->hash) { \ + if (qstr_case_eq(str, &bid_entry->str)) { \ + id = bid_entry->id; \ + break; \ + } \ + } \ + mutex_unlock(&sharefs_##_attr_##_hash_mutex); \ + \ + return id; \ +} \ + \ +static int insert_##_attr_##_hash_entry(struct qstr *str, int id) \ +{ \ + int err = 0; \ + struct sharefs_bid_entry *bid_entry; \ + struct hlist_node *hash_node; \ + \ + sharefs_info("insert name = %s", str->name); \ + \ + mutex_lock(&sharefs_##_attr_##_hash_mutex); \ + hash_for_each_possible_safe(sharefs_##_attr_##_hash_table, \ + bid_entry, hash_node, node, str->hash) { \ + if (qstr_case_eq(str, &bid_entry->str)) { \ + bid_entry->id = id; \ + mutex_unlock(&sharefs_##_attr_##_hash_mutex); \ + goto out; \ + } \ + } \ + mutex_unlock(&sharefs_##_attr_##_hash_mutex); \ + \ + bid_entry = alloc_bid_entry(str->name, id); \ + if (IS_ERR(bid_entry)) { \ + err = PTR_ERR(bid_entry); \ + goto out; \ + } \ + \ + hash_add_rcu(sharefs_##_attr_##_hash_table, &bid_entry->node, \ + bid_entry->str.hash); \ +out: \ + return err; \ +} \ + \ +static void remove_##_attr_##_hash_entry(struct qstr *str) \ +{ \ + struct sharefs_bid_entry *bid_entry; \ + struct hlist_node *hash_node; \ + \ + sharefs_info("remove name = %s", str->name); \ + \ + mutex_lock(&sharefs_##_attr_##_hash_mutex); \ + hash_for_each_possible_safe(sharefs_##_attr_##_hash_table, \ + bid_entry, hash_node, node, str->hash) { \ + if (qstr_case_eq(str, &bid_entry->str)) { \ + hash_del_rcu(&bid_entry->node); \ + free_bid_entry(bid_entry); \ + break; \ + } \ + } \ + mutex_unlock(&sharefs_##_attr_##_hash_mutex); \ +} \ + \ +static void clear_##_attr_##_hash_entry(void) \ +{ \ + int index; \ + struct sharefs_bid_entry *bid_entry; \ + struct hlist_node *hash_node; \ + \ + sharefs_info("clear bid entry"); \ + \ + mutex_lock(&sharefs_##_attr_##_hash_mutex); \ + hash_for_each_safe(sharefs_##_attr_##_hash_table, index, \ + hash_node, bid_entry, node) { \ + hash_del_rcu(&bid_entry->node); \ + kfree(bid_entry->str.name); \ + kmem_cache_free(sharefs_bid_entry_cachep, bid_entry); \ + } \ + mutex_unlock(&sharefs_##_attr_##_hash_mutex); \ +} \ + \ +static int sharefs_##_attr_##_get(const char *bname) \ +{ \ + struct qstr str; \ + \ + str = make_qstr(bname); \ + return query_##_attr_##_hash_entry(&str); \ +} \ + \ +static ssize_t sharefs_##_attr_##_show(struct config_item *item, \ + char *page) \ +{ \ + int id; \ + struct sharefs_config_bitem *bitem; \ + \ + sharefs_info("show bundle id"); \ + \ + bitem = container_of(item, struct sharefs_config_bitem, item); \ + id = query_##_attr_##_hash_entry(&bitem->str); \ + \ + return scnprintf(page, PAGE_SIZE, "%u\n", id); \ +} \ + \ +static ssize_t sharefs_##_attr_##_store(struct config_item *item, \ + const char *page, size_t count) \ +{ \ + int id; \ + int err; \ + size_t size; \ + struct sharefs_config_bitem *bitem; \ + \ + sharefs_info("store bundle id"); \ + \ + bitem = container_of(item, struct sharefs_config_bitem, item); \ + \ + if (kstrtouint(page, 10, &id)) { \ + size = -EINVAL; \ + goto out; \ + } \ + \ + err = insert_##_attr_##_hash_entry(&bitem->str, id); \ + if (err) { \ + size = err; \ + goto out; \ + } \ + \ + size = count; \ +out: \ + return size; \ +} \ + \ +static struct configfs_attribute sharefs_##_attr_##_attr = { \ + .ca_name = __stringify(_attr_), \ + .ca_mode = S_IRUGO | S_IWUGO, \ + .ca_owner = THIS_MODULE, \ + .show = sharefs_##_attr_##_show, \ + .store = sharefs_##_attr_##_store, \ +}; + +SHAREFS_BUNDLE_ATTRIBUTE(bid) + +static struct configfs_attribute *sharefs_battrs[] = { + &sharefs_bid_attr, + NULL, +}; + +static void sharefs_config_bitem_release(struct config_item *item) +{ + struct sharefs_config_bitem *bitem; + + sharefs_info("release bundle item"); + + bitem = container_of(item, struct sharefs_config_bitem, item); + remove_bid_hash_entry(&bitem->str); + remove_bid_hash_entry(&bitem->str); + free_bitem(bitem); +} + +static struct configfs_item_operations sharefs_config_bitem_ops = { + .release = sharefs_config_bitem_release, +}; + +static struct config_item_type sharefs_config_bitem_type = { + .ct_item_ops = &sharefs_config_bitem_ops, + .ct_attrs = sharefs_battrs, + .ct_owner = THIS_MODULE, +}; + +static struct config_item *sharefs_make_bitem(struct config_group *group, + const char *name) +{ + struct config_item *item; + struct sharefs_config_bitem *bitem; + + bitem = alloc_bitem(name); + if (IS_ERR(bitem)) { + item = ERR_PTR(-ENOMEM); + goto out; + } + + config_item_init_type_name(&bitem->item, name, + &sharefs_config_bitem_type); + item = &bitem->item; +out: + return item; +} + +static struct configfs_group_operations sharefs_group_ops = { + .make_item = sharefs_make_bitem, +}; + +static struct config_item_type sharefs_group_type = { + .ct_group_ops = &sharefs_group_ops, + .ct_owner = THIS_MODULE, +}; + +static struct configfs_subsystem sharefs_subsystem = { + .su_group = { + .cg_item = { + .ci_namebuf = "sharefs", + .ci_type = &sharefs_group_type, + }, + }, +}; + +int get_bid_config(const char *bname) +{ + return sharefs_bid_get(bname); +} + +int __init sharefs_init_configfs(void) +{ + int err; + struct configfs_subsystem *subsys; + + sharefs_info("init configfs"); + + sharefs_bid_entry_cachep = kmem_cache_create("sharefs_bid_entry_cachep", + sizeof(struct sharefs_bid_entry), 0, 0, NULL); + if (!sharefs_bid_entry_cachep) { + sharefs_err("failed to create bid entry cachep"); + err = -ENOMEM; + goto out; + } + + subsys = &sharefs_subsystem; + config_group_init(&subsys->su_group); + mutex_init(&subsys->su_mutex); + + err = configfs_register_subsystem(subsys); + if (err) + sharefs_err("failed to register subsystem"); + +out: + return err; +} + +void sharefs_exit_configfs(void) +{ + sharefs_info("sharefs exit configfs"); + + configfs_unregister_subsystem(&sharefs_subsystem); + clear_bid_hash_entry(); + + kmem_cache_destroy(sharefs_bid_entry_cachep); +} \ No newline at end of file diff --git a/fs/sharefs/dentry.c b/fs/sharefs/dentry.c new file mode 100644 index 000000000000..dee29cace6b7 --- /dev/null +++ b/fs/sharefs/dentry.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fs/sharefs/dentry.c + * + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include "sharefs.h" + +/* + * returns: 0: tell VFS to invalidate dentry in share directory + */ +static int sharefs_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + return 0; +} + +static void sharefs_d_release(struct dentry *dentry) +{ + /* + * It is possible that the dentry private data is NULL in case we + * ran out of memory while initializing it in + * new_dentry_private_data. So check for NULL before attempting to + * release resources. + */ + if (SHAREFS_D(dentry)) { + /* release and reset the lower paths */ + sharefs_put_reset_lower_path(dentry); + free_dentry_private_data(dentry); + } + return; +} + +const struct dentry_operations sharefs_dops = { + .d_revalidate = sharefs_d_revalidate, + .d_release = sharefs_d_release, +}; diff --git a/fs/sharefs/file.c b/fs/sharefs/file.c new file mode 100644 index 000000000000..07458f47b885 --- /dev/null +++ b/fs/sharefs/file.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fs/sharefs/file.c + * + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include "sharefs.h" + +static int sharefs_readdir(struct file *file, struct dir_context *ctx) +{ + int err; + struct file *lower_file = NULL; + struct dentry *dentry = file->f_path.dentry; + + lower_file = sharefs_lower_file(file); + err = iterate_dir(lower_file, ctx); + file->f_pos = lower_file->f_pos; + if (err >= 0) + fsstack_copy_attr_atime(d_inode(dentry), + file_inode(lower_file)); + return err; +} + +static int sharefs_open(struct inode *inode, struct file *file) +{ + int err = 0; + struct file *lower_file = NULL; + struct path lower_path; + + /* don't open unhashed/deleted files */ + if (d_unhashed(file->f_path.dentry)) { + err = -ENOENT; + goto out_err; + } + + file->private_data = + kzalloc(sizeof(struct sharefs_file_info), GFP_KERNEL); + if (!SHAREFS_F(file)) { + err = -ENOMEM; + goto out_err; + } + + /* open lower object and link sharefs's file struct to lower's */ + sharefs_get_lower_path(file->f_path.dentry, &lower_path); + lower_file = dentry_open(&lower_path, file->f_flags, current_cred()); + path_put(&lower_path); + if (IS_ERR(lower_file)) { + err = PTR_ERR(lower_file); + lower_file = sharefs_lower_file(file); + if (lower_file) { + sharefs_set_lower_file(file, NULL); + fput(lower_file); /* fput calls dput for lower_dentry */ + } + } else { + sharefs_set_lower_file(file, lower_file); + } + + if (err) + kfree(SHAREFS_F(file)); + else { + kuid_t uid = inode->i_uid; + kgid_t gid = inode->i_gid; + mode_t mode = inode->i_mode; + fsstack_copy_attr_all(inode, sharefs_lower_inode(inode)); + inode->i_uid = uid; + inode->i_gid = gid; + inode->i_mode = mode; + } +out_err: + return err; +} + +static int sharefs_flush(struct file *file, fl_owner_t id) +{ + int err = 0; + struct file *lower_file = NULL; + + lower_file = sharefs_lower_file(file); + if (lower_file && lower_file->f_op && lower_file->f_op->flush) { + filemap_write_and_wait(file->f_mapping); + err = lower_file->f_op->flush(lower_file, id); + } + + return err; +} + +/* release all lower object references & free the file info structure */ +static int sharefs_file_release(struct inode *inode, struct file *file) +{ + struct file *lower_file; + + lower_file = sharefs_lower_file(file); + if (lower_file) { + sharefs_set_lower_file(file, NULL); + fput(lower_file); + } + + kfree(SHAREFS_F(file)); + return 0; +} + +static int sharefs_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + int err; + struct file *lower_file; + struct path lower_path; + struct dentry *dentry = file->f_path.dentry; + + err = __generic_file_fsync(file, start, end, datasync); + if (err) + goto out; + lower_file = sharefs_lower_file(file); + sharefs_get_lower_path(dentry, &lower_path); + err = vfs_fsync_range(lower_file, start, end, datasync); + sharefs_put_lower_path(dentry, &lower_path); +out: + return err; +} + +static int sharefs_fasync(int fd, struct file *file, int flag) +{ + int err = 0; + struct file *lower_file = NULL; + + lower_file = sharefs_lower_file(file); + if (lower_file->f_op && lower_file->f_op->fasync) + err = lower_file->f_op->fasync(fd, lower_file, flag); + + return err; +} + +/* + * Sharefs cannot use generic_file_llseek as ->llseek, because it would + * only set the offset of the upper file. So we have to implement our + * own method to set both the upper and lower file offsets + * consistently. + */ +static loff_t sharefs_file_llseek(struct file *file, loff_t offset, int whence) +{ + int err; + struct file *lower_file; + + err = generic_file_llseek(file, offset, whence); + if (err < 0) + goto out; + + lower_file = sharefs_lower_file(file); + err = generic_file_llseek(lower_file, offset, whence); + +out: + return err; +} + +/* + * Sharefs read_iter, redirect modified iocb to lower read_iter + */ +ssize_t +sharefs_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + int err; + struct file *file = iocb->ki_filp, *lower_file; + + lower_file = sharefs_lower_file(file); + if (!lower_file->f_op->read_iter) { + err = -EINVAL; + goto out; + } + + /* prevent lower_file from being released */ + get_file(lower_file); + iocb->ki_filp = lower_file; + err = lower_file->f_op->read_iter(iocb, iter); + iocb->ki_filp = file; + fput(lower_file); + + /* update upper inode atime as needed */ + if (err >= 0 || err == -EIOCBQUEUED) + fsstack_copy_attr_atime(d_inode(file->f_path.dentry), + file_inode(lower_file)); +out: + return err; +} + +/* + * Sharefs write_iter, redirect modified iocb to lower write_iter + */ +ssize_t +sharefs_write_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + int err; + struct file *file = iocb->ki_filp, *lower_file; + + lower_file = sharefs_lower_file(file); + if (!lower_file->f_op->write_iter) { + err = -EINVAL; + goto out; + } + + get_file(lower_file); /* prevent lower_file from being released */ + iocb->ki_filp = lower_file; + err = lower_file->f_op->write_iter(iocb, iter); + iocb->ki_filp = file; + fput(lower_file); + /* update upper inode times/sizes as needed */ + if (err >= 0 || err == -EIOCBQUEUED) { + fsstack_copy_inode_size(d_inode(file->f_path.dentry), + file_inode(lower_file)); + fsstack_copy_attr_times(d_inode(file->f_path.dentry), + file_inode(lower_file)); + } +out: + return err; +} + +const struct file_operations sharefs_main_fops = { + .llseek = sharefs_file_llseek, + .open = sharefs_open, + .flush = sharefs_flush, + .release = sharefs_file_release, + .fsync = sharefs_fsync, + .fasync = sharefs_fasync, + .read_iter = sharefs_read_iter, + .write_iter = sharefs_write_iter, +}; + +/* trimmed directory options */ +const struct file_operations sharefs_dir_fops = { + .llseek = sharefs_file_llseek, + .iterate = sharefs_readdir, + .open = sharefs_open, + .release = sharefs_file_release, + .flush = sharefs_flush, + .fsync = sharefs_fsync, + .fasync = sharefs_fasync, +}; diff --git a/fs/sharefs/inode.c b/fs/sharefs/inode.c new file mode 100644 index 000000000000..985a04a643bc --- /dev/null +++ b/fs/sharefs/inode.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fs/sharefs/inode.c + * + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include "sharefs.h" + +static const char *sharefs_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *done) +{ + DEFINE_DELAYED_CALL(lower_done); + struct dentry *lower_dentry; + struct path lower_path; + char *buf; + const char *lower_link; + + if (!dentry) + return ERR_PTR(-ECHILD); + + sharefs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + + /* + * get link from lower file system, but use a separate + * delayed_call callback. + */ + lower_link = vfs_get_link(lower_dentry, &lower_done); + if (IS_ERR(lower_link)) { + buf = ERR_CAST(lower_link); + goto out; + } + + /* + * we can't pass lower link up: have to make private copy and + * pass that. + */ + buf = kstrdup(lower_link, GFP_KERNEL); + do_delayed_call(&lower_done); + if (!buf) { + buf = ERR_PTR(-ENOMEM); + goto out; + } + + fsstack_copy_attr_atime(d_inode(dentry), d_inode(lower_dentry)); + + set_delayed_call(done, kfree_link, buf); +out: + sharefs_put_lower_path(dentry, &lower_path); + return buf; +} + +static int sharefs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + struct path lower_path; + int ret; + + sharefs_get_lower_path(path->dentry, &lower_path); + ret = vfs_getattr(&lower_path, stat, request_mask, flags); + stat->ino = d_inode(path->dentry)->i_ino; + stat->uid = d_inode(path->dentry)->i_uid; + stat->gid = d_inode(path->dentry)->i_gid; + stat->mode = d_inode(path->dentry)->i_mode; + sharefs_put_lower_path(path->dentry, &lower_path); + + return ret; +} + +static ssize_t +sharefs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) +{ + int err; + struct dentry *lower_dentry; + struct path lower_path; + + sharefs_get_lower_path(dentry, &lower_path); + lower_dentry = lower_path.dentry; + if (!(d_inode(lower_dentry)->i_opflags & IOP_XATTR)) { + err = -EOPNOTSUPP; + goto out; + } + err = vfs_listxattr(lower_dentry, buffer, buffer_size); + if (err) + goto out; + fsstack_copy_attr_atime(d_inode(dentry), + d_inode(lower_path.dentry)); +out: + sharefs_put_lower_path(dentry, &lower_path); + return err; +} + +static int sharefs_permission(struct inode *inode, int mask) +{ + unsigned short mode = inode->i_mode; + kuid_t cur_uid = current_fsuid(); + if (uid_eq(cur_uid, ROOT_UID)) + return 0; + if (uid_eq(cur_uid, inode->i_uid)) { + mode >>= 6; + } else if (in_group_p(inode->i_gid)) { + mode >>= 3; + } + + if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) + return 0; + + return -EACCES; +} + +const struct inode_operations sharefs_symlink_iops = { + .permission = sharefs_permission, + .getattr = sharefs_getattr, + .get_link = sharefs_get_link, + .listxattr = sharefs_listxattr, +}; + +const struct inode_operations sharefs_dir_iops = { + .lookup = sharefs_lookup, + .permission = sharefs_permission, + .getattr = sharefs_getattr, + .listxattr = sharefs_listxattr, +}; + +const struct inode_operations sharefs_main_iops = { + .permission = sharefs_permission, + .getattr = sharefs_getattr, + .listxattr = sharefs_listxattr, +}; diff --git a/fs/sharefs/lookup.c b/fs/sharefs/lookup.c new file mode 100644 index 000000000000..47ead11f45f5 --- /dev/null +++ b/fs/sharefs/lookup.c @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fs/sharefs/lookup.c + * + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include "sharefs.h" +#include "authentication.h" + +/* The dentry cache is just so we have properly sized dentries */ +static struct kmem_cache *sharefs_dentry_cachep; + +int sharefs_init_dentry_cache(void) +{ + sharefs_dentry_cachep = + kmem_cache_create("sharefs_dentry", + sizeof(struct sharefs_dentry_info), + 0, SLAB_RECLAIM_ACCOUNT, NULL); + + return sharefs_dentry_cachep ? 0 : -ENOMEM; +} + +void sharefs_destroy_dentry_cache(void) +{ + if (sharefs_dentry_cachep) + kmem_cache_destroy(sharefs_dentry_cachep); +} + +void free_dentry_private_data(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + kmem_cache_free(sharefs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +/* allocate new dentry private data */ +int new_dentry_private_data(struct dentry *dentry) +{ + struct sharefs_dentry_info *info = SHAREFS_D(dentry); + + /* use zalloc to init dentry_info.lower_path */ + info = kmem_cache_zalloc(sharefs_dentry_cachep, GFP_ATOMIC); + if (!info) + return -ENOMEM; + + spin_lock_init(&info->lock); + dentry->d_fsdata = info; + + return 0; +} + +static int sharefs_inode_test(struct inode *inode, void *candidate_lower_inode) +{ + struct inode *current_lower_inode = sharefs_lower_inode(inode); + if (current_lower_inode == (struct inode *)candidate_lower_inode) + return 1; /* found a match */ + else + return 0; /* no match */ +} + +static int sharefs_inode_set(struct inode *inode, void *lower_inode) +{ + /* we do actual inode initialization in sharefs_iget */ + return 0; +} + +struct inode *sharefs_iget(struct super_block *sb, struct inode *lower_inode) +{ + struct inode *inode; /* the new inode to return */ + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + inode = iget5_locked(sb, /* our superblock */ + /* + * hashval: we use inode number, but we can + * also use "(unsigned long)lower_inode" + * instead. + */ + lower_inode->i_ino, /* hashval */ + sharefs_inode_test, /* inode comparison function */ + sharefs_inode_set, /* inode init function */ + lower_inode); /* data passed to test+set fxns */ + if (!inode) { + iput(lower_inode); + return ERR_PTR(-ENOMEM); + } + /* if found a cached inode, then just return it (after iput) */ + if (!(inode->i_state & I_NEW)) { + iput(lower_inode); + return inode; + } + + /* initialize new inode */ + inode->i_ino = lower_inode->i_ino; + sharefs_set_lower_inode(inode, lower_inode); + + atomic64_inc(&inode->i_version); + + /* use different set of inode ops for symlinks & directories */ + if (S_ISDIR(lower_inode->i_mode)) + inode->i_op = &sharefs_dir_iops; + else if (S_ISLNK(lower_inode->i_mode)) + inode->i_op = &sharefs_symlink_iops; + else + inode->i_op = &sharefs_main_iops; + + /* use different set of file ops for directories */ + if (S_ISDIR(lower_inode->i_mode)) + inode->i_fop = &sharefs_dir_fops; + else + inode->i_fop = &sharefs_main_fops; + + inode->i_atime.tv_sec = 0; + inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_sec = 0; + inode->i_mtime.tv_nsec = 0; + inode->i_ctime.tv_sec = 0; + inode->i_ctime.tv_nsec = 0; + + /* properly initialize special inodes */ + if (S_ISBLK(lower_inode->i_mode) || S_ISCHR(lower_inode->i_mode) || + S_ISFIFO(lower_inode->i_mode) || S_ISSOCK(lower_inode->i_mode)) + init_special_inode(inode, lower_inode->i_mode, + lower_inode->i_rdev); + + /* all well, copy inode attributes */ + fsstack_copy_attr_all(inode, lower_inode); + fsstack_copy_inode_size(inode, lower_inode); + + unlock_new_inode(inode); + return inode; +} + +/* + * Helper interpose routine, called directly by ->lookup to handle + * spliced dentries. + */ +static struct dentry *__sharefs_interpose(struct dentry *dentry, + struct super_block *sb, + struct path *lower_path) +{ + struct inode *inode; + struct inode *lower_inode; + struct super_block *lower_sb; + struct dentry *ret_dentry; + + lower_inode = d_inode(lower_path->dentry); + lower_sb = sharefs_lower_super(sb); + + /* check that the lower file system didn't cross a mount point */ + if (lower_inode->i_sb != lower_sb) { + ret_dentry = ERR_PTR(-EXDEV); + goto out; + } + + /* + * We allocate our new inode below by calling sharefs_iget, + * which will initialize some of the new inode's fields + */ + + /* inherit lower inode number for sharefs's inode */ + inode = sharefs_iget(sb, lower_inode); + if (IS_ERR(inode)) { + ret_dentry = ERR_PTR(PTR_ERR(inode)); + goto out; + } + + ret_dentry = d_splice_alias(inode, dentry); + +out: + return ret_dentry; +} + +/* + * Connect a sharefs inode dentry/inode with several lower ones. This is + * the classic stackable file system "vnode interposition" action. + * + * @dentry: sharefs's dentry which interposes on lower one + * @sb: sharefs's super_block + * @lower_path: the lower path (caller does path_get/put) + */ +int sharefs_interpose(struct dentry *dentry, struct super_block *sb, + struct path *lower_path) +{ + struct dentry *ret_dentry; + + ret_dentry = __sharefs_interpose(dentry, sb, lower_path); + return PTR_ERR(ret_dentry); +} + +/* + * Main driver function for sharefs's lookup. + * + * Returns: NULL (ok), ERR_PTR if an error occurred. + * Fills in lower_parent_path with on success. + */ +static struct dentry *__sharefs_lookup(struct dentry *dentry, + unsigned int flags, + struct path *lower_parent_path) +{ + int err = 0; + struct vfsmount *lower_dir_mnt; + struct dentry *lower_dir_dentry = NULL; + struct dentry *lower_dentry; + const char *name; + struct path lower_path; + struct qstr this; + struct dentry *ret_dentry = NULL; + + /* must initialize dentry operations */ + d_set_d_op(dentry, &sharefs_dops); + + if (IS_ROOT(dentry)) + goto out; + + name = dentry->d_name.name; + + /* now start the actual lookup procedure */ + lower_dir_dentry = lower_parent_path->dentry; + lower_dir_mnt = lower_parent_path->mnt; + + /* Use vfs_path_lookup to check if the dentry exists or not */ + err = vfs_path_lookup(lower_dir_dentry, lower_dir_mnt, name, 0, + &lower_path); + + /* no error: handle positive dentries */ + if (!err) { + sharefs_set_lower_path(dentry, &lower_path); + ret_dentry = + __sharefs_interpose(dentry, dentry->d_sb, &lower_path); + if (IS_ERR(ret_dentry)) { + err = PTR_ERR(ret_dentry); + /* path_put underlying path on error */ + sharefs_put_reset_lower_path(dentry); + } + goto out; + } + + /* + * We don't consider ENOENT an error, and we want to return a + * negative dentry. + */ + if (err && err != -ENOENT) + goto out; + + /* instantiate a new negative dentry */ + this.name = name; + this.len = strlen(name); + this.hash = full_name_hash(lower_dir_dentry, this.name, this.len); + lower_dentry = d_lookup(lower_dir_dentry, &this); + if (lower_dentry) + goto setup_lower; + + lower_dentry = d_alloc(lower_dir_dentry, &this); + if (!lower_dentry) { + err = -ENOMEM; + goto out; + } + + /* + * Calling ->lookup instead of d_add will give the lower fs a chance + * to allocate the d_fsdata field but will still instantiate and hash the + * lower_dentry. Without this, sharefs could not stack on top of itself. + */ + d_inode(lower_dir_dentry)->i_op->lookup(d_inode(lower_dir_dentry), + lower_dentry, flags); + +setup_lower: + lower_path.dentry = lower_dentry; + lower_path.mnt = mntget(lower_dir_mnt); + sharefs_set_lower_path(dentry, &lower_path); + + /* + * If the intent is to create a file, then don't return an error, so + * the VFS will continue the process of making this negative dentry + * into a positive one. + */ + if (err == -ENOENT || (flags & (LOOKUP_CREATE|LOOKUP_RENAME_TARGET))) + err = 0; + +out: + if (err) + return ERR_PTR(err); + return ret_dentry; +} + +struct dentry *sharefs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + int err; + struct dentry *ret, *parent; + struct path lower_parent_path; + + parent = dget_parent(dentry); + + sharefs_get_lower_path(parent, &lower_parent_path); + + /* allocate dentry private data. We free it in ->d_release */ + err = new_dentry_private_data(dentry); + if (err) { + ret = ERR_PTR(err); + goto out; + } + ret = __sharefs_lookup(dentry, flags, &lower_parent_path); + if (IS_ERR(ret)) { + sharefs_err("sharefs_lookup error!"); + goto out; + } + + if (ret) + dentry = ret; + if (d_inode(dentry)) + fsstack_copy_attr_times(d_inode(dentry), + sharefs_lower_inode(d_inode(dentry))); + /* update parent directory's atime */ + fsstack_copy_attr_atime(d_inode(parent), + sharefs_lower_inode(d_inode(parent))); + fixup_perm_from_level(d_inode(parent), dentry); +out: + sharefs_put_lower_path(parent, &lower_parent_path); + dput(parent); + return ret; +} diff --git a/fs/sharefs/main.c b/fs/sharefs/main.c new file mode 100644 index 000000000000..9bb2a66f3732 --- /dev/null +++ b/fs/sharefs/main.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fs/sharefs/main.c + * + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#include +#include "sharefs.h" +#include "authentication.h" + + +struct sharefs_mount_priv { + const char *dev_name; + const char *raw_data; +}; + +/* + * There is no need to lock the sharefs_super_info's rwsem as there is no + * way anyone can have a reference to the superblock at this point in time. + */ +static int sharefs_fill_super(struct super_block *sb, void *data, int silent) +{ + + struct sharefs_mount_priv *priv = (struct sharefs_mount_priv *)data; + const char *dev_name = priv->dev_name; + const char *raw_data = priv->raw_data; + + int err = 0; + struct super_block *lower_sb; + struct path lower_path; + struct inode *inode; + + /* parse lower path */ + err = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &lower_path); + if (err) { + printk(KERN_ERR "sharefs: error accessing " + "lower directory '%s'\n", dev_name); + goto out; + } + + /* allocate superblock private data */ + sb->s_fs_info = kzalloc(sizeof(struct sharefs_sb_info), GFP_KERNEL); + if (!SHAREFS_SB(sb)) { + printk(KERN_CRIT "sharefs: fill_super: out of memory\n"); + err = -ENOMEM; + goto out_pput; + } + + /* set the lower superblock field of upper superblock */ + lower_sb = lower_path.dentry->d_sb; + atomic_inc(&lower_sb->s_active); + sharefs_set_lower_super(sb, lower_sb); + + /* inherit maxbytes from lower file system */ + sb->s_maxbytes = lower_sb->s_maxbytes; + + /* + * Our c/m/atime granularity is 1 ns because we may stack on file + * systems whose granularity is as good. + */ + sb->s_time_gran = 1; + + sb->s_op = &sharefs_sops; + + /* get a new inode and allocate our root dentry */ + inode = sharefs_iget(sb, d_inode(lower_path.dentry)); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_pput; + } + sharefs_root_inode_perm_init(inode); + sb->s_root = d_make_root(inode); + if (!sb->s_root) { + err = -ENOMEM; + goto out_pput; + } + d_set_d_op(sb->s_root, &sharefs_dops); + + err = sharefs_parse_options(sb->s_fs_info, raw_data); + if (err) + goto out_pput; + + /* link the upper and lower dentries */ + sb->s_root->d_fsdata = NULL; + err = new_dentry_private_data(sb->s_root); + if (err) + goto out_pput; + + /* if get here: cannot have error */ + + /* set the lower dentries for s_root */ + sharefs_set_lower_path(sb->s_root, &lower_path); + + /* + * No need to call interpose because we already have a positive + * dentry, which was instantiated by d_make_root. Just need to + * d_rehash it. + */ + d_rehash(sb->s_root); + if (!silent) + printk(KERN_INFO + "sharefs: mounted on top of %s type %s\n", + dev_name, lower_sb->s_type->name); + goto out; /* all is well */ + + /* + * path_put is the only resource we need to free if an error occurred + * because returning an error from this function will cause + * generic_shutdown_super to be called, which will call + * sharefs_put_super, and that function will release any other + * resources we took. + */ +out_pput: + path_put(&lower_path); +out: + return err; +} + +struct dentry *sharefs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + struct sharefs_mount_priv priv = { + .dev_name = dev_name, + .raw_data = raw_data, + }; + + /* sharefs needs a valid dev_name to get the lower_sb's metadata */ + if (!dev_name || !*dev_name) + return ERR_PTR(-EINVAL); + + return mount_nodev(fs_type, flags, &priv, + sharefs_fill_super); +} + +static struct file_system_type sharefs_fs_type = { + .owner = THIS_MODULE, + .name = SHAREFS_NAME, + .mount = sharefs_mount, + .kill_sb = generic_shutdown_super, + .fs_flags = 0, +}; +MODULE_ALIAS_FS(SHAREFS_NAME); + +static int __init init_sharefs_fs(void) +{ + int err; + + pr_info("Registering sharefs"); + + err = sharefs_init_inode_cache(); + if (err) + goto out_err; + err = sharefs_init_dentry_cache(); + if (err) + goto out_err; + err = register_filesystem(&sharefs_fs_type); + if (err) { + sharefs_err("share register failed!"); + goto out_err; + } + + err = sharefs_init_configfs(); + if (err) + goto out_err; + return 0; +out_err: + sharefs_exit_configfs(); + sharefs_destroy_inode_cache(); + sharefs_destroy_dentry_cache(); + sharefs_err("sharefs init failed!"); + return err; +} + +static void __exit exit_sharefs_fs(void) +{ + sharefs_destroy_inode_cache(); + sharefs_destroy_dentry_cache(); + unregister_filesystem(&sharefs_fs_type); + pr_info("Completed sharefs module unload\n"); +} + +MODULE_AUTHOR("Jingjing Mao"); +MODULE_DESCRIPTION("Sharefs"); +MODULE_LICENSE("GPL"); + +module_init(init_sharefs_fs); +module_exit(exit_sharefs_fs); diff --git a/fs/sharefs/sharefs.h b/fs/sharefs/sharefs.h new file mode 100644 index 000000000000..1f40b3ad0a36 --- /dev/null +++ b/fs/sharefs/sharefs.h @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + * Copyright (c) 2023 Huawei Device Co., Ltd. + */ + +#ifndef _SHAREFS_H_ +#define _SHAREFS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* the file system name */ +#define SHAREFS_NAME "sharefs" + +/* sharefs root inode number */ +#define SHAREFS_ROOT_INO 1 +#define OID_ROOT 0 +#define ROOT_UID KUIDT_INIT(OID_ROOT) +#define ROOT_GID KGIDT_INIT(OID_ROOT) +#define SHAREFS_SUPER_MAGIC 0x20230212 + +/* useful for tracking code reachability */ +#define UDBG printk(KERN_DEFAULT "DBG:%s:%s:%d\n", __FILE__, __func__, __LINE__) + +/* file private data */ +struct sharefs_file_info { + struct file *lower_file; + const struct vm_operations_struct *lower_vm_ops; +}; + +/* sharefs inode data in memory */ +struct sharefs_inode_info { + struct inode *lower_inode; + struct inode vfs_inode; + __u16 perm; +}; + +/* sharefs dentry data in memory */ +struct sharefs_dentry_info { + spinlock_t lock; /* protects lower_path */ + struct path lower_path; +}; + +/* sharefs super-block data in memory */ +struct sharefs_sb_info { + struct super_block *lower_sb; + /* multi user */ + unsigned int user_id; +}; + +/* operations vectors defined in specific files */ +extern const struct file_operations sharefs_main_fops; +extern const struct file_operations sharefs_dir_fops; +extern const struct inode_operations sharefs_main_iops; +extern const struct inode_operations sharefs_dir_iops; +extern const struct inode_operations sharefs_symlink_iops; +extern const struct super_operations sharefs_sops; +extern const struct dentry_operations sharefs_dops; + +extern int sharefs_init_inode_cache(void); +extern void sharefs_destroy_inode_cache(void); +extern int sharefs_init_dentry_cache(void); +extern void sharefs_destroy_dentry_cache(void); +extern int new_dentry_private_data(struct dentry *dentry); +extern void free_dentry_private_data(struct dentry *dentry); +extern struct dentry *sharefs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags); +extern struct inode *sharefs_iget(struct super_block *sb, + struct inode *lower_inode); +extern int sharefs_interpose(struct dentry *dentry, struct super_block *sb, + struct path *lower_path); +extern int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, + const char *name, unsigned int flags, + struct path *path); +extern int sharefs_parse_options(struct sharefs_sb_info *sbi, + const char *data); + +/* + * inode to private data + * + * Since we use containers and the struct inode is _inside_ the + * sharefs_inode_info structure, SHAREFS_I will always (given a non-NULL + * inode pointer), return a valid non-NULL pointer. + */ +static inline struct sharefs_inode_info *SHAREFS_I(const struct inode *inode) +{ + return container_of(inode, struct sharefs_inode_info, vfs_inode); +} + +/* dentry to private data */ +#define SHAREFS_D(dent) ((struct sharefs_dentry_info *)(dent)->d_fsdata) + +/* superblock to private data */ +#define SHAREFS_SB(super) ((struct sharefs_sb_info *)(super)->s_fs_info) + +/* file to private Data */ +#define SHAREFS_F(file) ((struct sharefs_file_info *)((file)->private_data)) + +/* file to lower file */ +static inline struct file *sharefs_lower_file(const struct file *f) +{ + return SHAREFS_F(f)->lower_file; +} + +static inline void sharefs_set_lower_file(struct file *f, struct file *val) +{ + SHAREFS_F(f)->lower_file = val; +} + +/* inode to lower inode. */ +static inline struct inode *sharefs_lower_inode(const struct inode *i) +{ + return SHAREFS_I(i)->lower_inode; +} + +static inline void sharefs_set_lower_inode(struct inode *i, struct inode *val) +{ + SHAREFS_I(i)->lower_inode = val; +} + +/* superblock to lower superblock */ +static inline struct super_block *sharefs_lower_super( + const struct super_block *sb) +{ + return SHAREFS_SB(sb)->lower_sb; +} + +static inline void sharefs_set_lower_super(struct super_block *sb, + struct super_block *val) +{ + SHAREFS_SB(sb)->lower_sb = val; +} + +/* path based (dentry/mnt) macros */ +static inline void pathcpy(struct path *dst, const struct path *src) +{ + dst->dentry = src->dentry; + dst->mnt = src->mnt; +} +/* Returns struct path. Caller must path_put it. */ +static inline void sharefs_get_lower_path(const struct dentry *dent, + struct path *lower_path) +{ + spin_lock(&SHAREFS_D(dent)->lock); + pathcpy(lower_path, &SHAREFS_D(dent)->lower_path); + path_get(lower_path); + spin_unlock(&SHAREFS_D(dent)->lock); + return; +} +static inline void sharefs_put_lower_path(const struct dentry *dent, + struct path *lower_path) +{ + path_put(lower_path); + return; +} +static inline void sharefs_set_lower_path(const struct dentry *dent, + struct path *lower_path) +{ + spin_lock(&SHAREFS_D(dent)->lock); + pathcpy(&SHAREFS_D(dent)->lower_path, lower_path); + spin_unlock(&SHAREFS_D(dent)->lock); + return; +} +static inline void sharefs_reset_lower_path(const struct dentry *dent) +{ + spin_lock(&SHAREFS_D(dent)->lock); + SHAREFS_D(dent)->lower_path.dentry = NULL; + SHAREFS_D(dent)->lower_path.mnt = NULL; + spin_unlock(&SHAREFS_D(dent)->lock); + return; +} +static inline void sharefs_put_reset_lower_path(const struct dentry *dent) +{ + struct path lower_path; + spin_lock(&SHAREFS_D(dent)->lock); + pathcpy(&lower_path, &SHAREFS_D(dent)->lower_path); + SHAREFS_D(dent)->lower_path.dentry = NULL; + SHAREFS_D(dent)->lower_path.mnt = NULL; + spin_unlock(&SHAREFS_D(dent)->lock); + path_put(&lower_path); + return; +} + +/* locking helpers */ +static inline struct dentry *lock_parent(struct dentry *dentry) +{ + struct dentry *dir = dget_parent(dentry); + inode_lock_nested(d_inode(dir), I_MUTEX_PARENT); + return dir; +} + +static inline void unlock_dir(struct dentry *dir) +{ + inode_unlock(d_inode(dir)); + dput(dir); +} + +static inline bool str_n_case_eq(const char *s1, const char *s2, size_t len) +{ + return !strncasecmp(s1, s2, len); +} + +static inline bool qstr_case_eq(const struct qstr *q1, const struct qstr *q2) +{ + return q1->len == q2->len && str_n_case_eq(q1->name, q2->name, q2->len); +} +/***************************************************************************** + * log print helpers + *****************************************************************************/ +__printf(4, 5) void __sharefs_log(const char *level, const bool ratelimited, + const char *function, const char *fmt, ...); +#define sharefs_err(fmt, ...) \ + __sharefs_log(KERN_ERR, false, __func__, fmt, ##__VA_ARGS__) +#define sharefs_warning(fmt, ...) \ + __sharefs_log(KERN_WARNING, false, __func__, fmt, ##__VA_ARGS__) +#define sharefs_info(fmt, ...) \ + __sharefs_log(KERN_INFO, false, __func__, fmt, ##__VA_ARGS__) +#define sharefs_err_ratelimited(fmt, ...) \ + __sharefs_log(KERN_ERR, true, __func__, fmt, ##__VA_ARGS__) +#define sharefs_warning_ratelimited(fmt, ...) \ + __sharefs_log(KERN_WARNING, true, __func__, fmt, ##__VA_ARGS__) +#define sharefs_info_ratelimited(fmt, ...) \ + __sharefs_log(KERN_INFO, true, __func__, fmt, ##__VA_ARGS__) + +#endif /* not _SHAREFS_H_ */ diff --git a/fs/sharefs/super.c b/fs/sharefs/super.c new file mode 100644 index 000000000000..e130126496a6 --- /dev/null +++ b/fs/sharefs/super.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 1998-2022 Erez Zadok + * Copyright (c) 2009 Shrikar Archak + * Copyright (c) 2003-2022 Stony Brook University + * Copyright (c) 2003-2022 The Research Foundation of SUNY + */ +#include +#include +#include +#include +#include "sharefs.h" + +enum { + OPT_USER_ID, +}; + +static match_table_t sharefs_tokens = { + { OPT_USER_ID, "user_id=%s"}, +}; + +int sharefs_parse_options(struct sharefs_sb_info *sbi, const char *data) +{ + char *p = NULL; + char *name = NULL; + char *options = NULL; + char *options_src = NULL; + substring_t args[MAX_OPT_ARGS]; + unsigned int user_id = 0; + int err = 0; + + options = kstrdup(data, GFP_KERNEL); + if (data && !options) { + err = -ENOMEM; + goto out; + } + options_src = options; + + while ((p = strsep(&options_src, ",")) != NULL) { + int token; + + if (!*p) + continue; + args[0].to = args[0].from = NULL; + token = match_token(p, sharefs_tokens, args); + + switch (token) { + case OPT_USER_ID: + name = match_strdup(&args[0]); + if (name) { + err = kstrtouint(name, 10, &user_id); + kfree(name); + name = NULL; + if (err) + goto out; + sbi->user_id = user_id; + } + break; + default: + err = -EINVAL; + goto out; + } + } +out: + kfree(options); + + return err; +} + +/* + * The inode cache is used with alloc_inode for both our inode info and the + * vfs inode. + */ +static struct kmem_cache *sharefs_inode_cachep; + +/* final actions when unmounting a file system */ +static void sharefs_put_super(struct super_block *sb) +{ + struct sharefs_sb_info *spd; + struct super_block *s; + + spd = SHAREFS_SB(sb); + if (!spd) + return; + + /* decrement lower super references */ + s = sharefs_lower_super(sb); + sharefs_set_lower_super(sb, NULL); + atomic_dec(&s->s_active); + + kfree(spd); + sb->s_fs_info = NULL; +} + +static int sharefs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + int err; + struct path lower_path; + + sharefs_get_lower_path(dentry, &lower_path); + err = vfs_statfs(&lower_path, buf); + sharefs_put_lower_path(dentry, &lower_path); + + /* set return buf to our f/s to avoid confusing user-level utils */ + buf->f_type = SHAREFS_SUPER_MAGIC; + + return err; +} + +/* + * Called by iput() when the inode reference count reached zero + * and the inode is not hashed anywhere. Used to clear anything + * that needs to be, before the inode is completely destroyed and put + * on the inode free list. + */ +static void sharefs_evict_inode(struct inode *inode) +{ + struct inode *lower_inode; + + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); + /* + * Decrement a reference to a lower_inode, which was incremented + * by our read_inode when it was created initially. + */ + lower_inode = sharefs_lower_inode(inode); + sharefs_set_lower_inode(inode, NULL); + iput(lower_inode); +} + +void __sharefs_log(const char *level, const bool ratelimited, + const char *function, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (ratelimited) + printk_ratelimited("%s sharefs: %s() %pV\n", level, + function, &vaf); + else + printk("%s sharefs: %s() %pV\n", level, function, &vaf); + va_end(args); +} + +static struct inode *sharefs_alloc_inode(struct super_block *sb) +{ + struct sharefs_inode_info *i; + + i = kmem_cache_alloc(sharefs_inode_cachep, GFP_KERNEL); + if (!i) + return NULL; + + /* memset everything up to the inode to 0 */ + memset(i, 0, offsetof(struct sharefs_inode_info, vfs_inode)); + + atomic64_set(&i->vfs_inode.i_version, 1); + return &i->vfs_inode; +} + +static void sharefs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(sharefs_inode_cachep, SHAREFS_I(inode)); +} + +/* sharefs inode cache constructor */ +static void init_once(void *obj) +{ + struct sharefs_inode_info *i = obj; + + inode_init_once(&i->vfs_inode); +} + +int sharefs_init_inode_cache(void) +{ + int err = 0; + + sharefs_inode_cachep = + kmem_cache_create("sharefs_inode_cache", + sizeof(struct sharefs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT, init_once); + if (!sharefs_inode_cachep) + err = -ENOMEM; + return err; +} + +/* sharefs inode cache destructor */ +void sharefs_destroy_inode_cache(void) +{ + if (sharefs_inode_cachep) + kmem_cache_destroy(sharefs_inode_cachep); +} + +const struct super_operations sharefs_sops = { + .put_super = sharefs_put_super, + .statfs = sharefs_statfs, + .evict_inode = sharefs_evict_inode, + .alloc_inode = sharefs_alloc_inode, + .destroy_inode = sharefs_destroy_inode, +}; -- Gitee From bcf8eca748a723f8ab271b604135371bc62a20aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=96=87=E8=BF=AA?= Date: Fri, 17 Feb 2023 10:24:33 +0800 Subject: [PATCH 103/243] fixed 115315a from https://gitee.com/wz109/kernel_linux_5.10/pulls/681 change bid to appid in order to fix hmdfs in ohcore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 张文迪 --- fs/hmdfs/authority/config.c | 12 ++++++------ fs/sharefs/config.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/hmdfs/authority/config.c b/fs/hmdfs/authority/config.c index 2daadd40704e..1610ca90272e 100644 --- a/fs/hmdfs/authority/config.c +++ b/fs/hmdfs/authority/config.c @@ -266,10 +266,10 @@ static struct configfs_attribute hmdfs_##_attr_##_attr = { \ .store = hmdfs_##_attr_##_store, \ }; -HMDFS_BUNDLE_ATTRIBUTE(bid) +HMDFS_BUNDLE_ATTRIBUTE(appid) static struct configfs_attribute *hmdfs_battrs[] = { - &hmdfs_bid_attr, + &hmdfs_appid_attr, NULL, }; @@ -280,8 +280,8 @@ static void hmdfs_config_bitem_release(struct config_item *item) hmdfs_info("release bundle item"); bitem = container_of(item, struct hmdfs_config_bitem, item); - remove_bid_hash_entry(&bitem->str); - remove_bid_hash_entry(&bitem->str); + remove_appid_hash_entry(&bitem->str); + remove_appid_hash_entry(&bitem->str); free_bitem(bitem); } @@ -336,7 +336,7 @@ static struct configfs_subsystem hmdfs_subsystem = { int get_bid(const char *bname) { - return hmdfs_bid_get(bname); + return hmdfs_appid_get(bname); } int __init hmdfs_init_configfs(void) @@ -371,7 +371,7 @@ void hmdfs_exit_configfs(void) hmdfs_info("hmdfs exit configfs"); configfs_unregister_subsystem(&hmdfs_subsystem); - clear_bid_hash_entry(); + clear_appid_hash_entry(); kmem_cache_destroy(hmdfs_bid_entry_cachep); } \ No newline at end of file diff --git a/fs/sharefs/config.c b/fs/sharefs/config.c index 9bea53e555ad..874574c4e58f 100644 --- a/fs/sharefs/config.c +++ b/fs/sharefs/config.c @@ -263,10 +263,10 @@ static struct configfs_attribute sharefs_##_attr_##_attr = { \ .store = sharefs_##_attr_##_store, \ }; -SHAREFS_BUNDLE_ATTRIBUTE(bid) +SHAREFS_BUNDLE_ATTRIBUTE(appid) static struct configfs_attribute *sharefs_battrs[] = { - &sharefs_bid_attr, + &sharefs_appid_attr, NULL, }; @@ -277,8 +277,8 @@ static void sharefs_config_bitem_release(struct config_item *item) sharefs_info("release bundle item"); bitem = container_of(item, struct sharefs_config_bitem, item); - remove_bid_hash_entry(&bitem->str); - remove_bid_hash_entry(&bitem->str); + remove_appid_hash_entry(&bitem->str); + remove_appid_hash_entry(&bitem->str); free_bitem(bitem); } @@ -331,7 +331,7 @@ static struct configfs_subsystem sharefs_subsystem = { int get_bid_config(const char *bname) { - return sharefs_bid_get(bname); + return sharefs_appid_get(bname); } int __init sharefs_init_configfs(void) @@ -366,7 +366,7 @@ void sharefs_exit_configfs(void) sharefs_info("sharefs exit configfs"); configfs_unregister_subsystem(&sharefs_subsystem); - clear_bid_hash_entry(); + clear_appid_hash_entry(); kmem_cache_destroy(sharefs_bid_entry_cachep); } \ No newline at end of file -- Gitee From 1a61e802dd19696994ca547237bd02dda5621e62 Mon Sep 17 00:00:00 2001 From: zhangkaixiang Date: Tue, 21 Feb 2023 20:20:50 +0800 Subject: [PATCH 104/243] fixed b8432f5 from https://gitee.com/zkx48/kernel_linux_5.10/pulls/692 fix up the function name Signed-off-by: zhangkaixiang --- fs/hmdfs/inode_remote.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/hmdfs/inode_remote.c b/fs/hmdfs/inode_remote.c index c9bd5275c8fd..6a19e6e6b135 100644 --- a/fs/hmdfs/inode_remote.c +++ b/fs/hmdfs/inode_remote.c @@ -329,7 +329,7 @@ static void hmdfs_update_inode(struct inode *inode, hmdfs_update_inode_size(inode, lookup_result->i_size); } -static void hmdfs_fill_inode_android(struct inode *inode, struct inode *dir, +static void hmdfs_fill_inode_remote(struct inode *inode, struct inode *dir, umode_t mode) { #ifdef CONFIG_HMDFS_FS_PERMISSION @@ -355,7 +355,7 @@ struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, if (con->version > USERSPACE_MAX_VER) { /* the inode was found in cache */ if (!(inode->i_state & I_NEW)) { - hmdfs_fill_inode_android(inode, dir, mode); + hmdfs_fill_inode_remote(inode, dir, mode); hmdfs_update_inode(inode, res); return inode; } @@ -396,7 +396,7 @@ struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, inode->i_mapping->a_ops = con->conn_operations->remote_file_aops; - hmdfs_fill_inode_android(inode, dir, mode); + hmdfs_fill_inode_remote(inode, dir, mode); unlock_new_inode(inode); return inode; bad_inode: -- Gitee From ff5e6921ae457af5c64f942e39668fbf0ea43eeb Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Fri, 30 Dec 2022 00:56:41 +0800 Subject: [PATCH 105/243] drm/i915/gvt: fix double free bug in split_2MB_gtt_entry mainline inclusion from v6.2-rc3 commit 4a61648af68f5ba4884f0e3b494ee1cabc4b6620 category: bugfix issue: I5ZBBS CVE: CVE-2022-3707 Signed-off-by: gaochao --------------------------------------- If intel_gvt_dma_map_guest_page failed, it will call ppgtt_invalidate_spt, which will finally free the spt. But the caller function ppgtt_populate_spt_by_guest_entry does not notice that, it will free spt again in its error path. Fix this by canceling the mapping of DMA address and freeing sub_spt. Besides, leave the handle of spt destroy to caller function instead of callee function when error occurs. Fixes: b901b252b6cf ("drm/i915/gvt: Add 2M huge gtt support") Signed-off-by: Zheng Wang Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20221229165641.1192455-1-zyytlz.wz@163.com Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- drivers/gpu/drm/i915/gvt/gtt.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index a3a4305eda01..0201f9b5f87e 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1192,10 +1192,8 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, for_each_shadow_entry(sub_spt, &sub_se, sub_index) { ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, start_gfn + sub_index, PAGE_SIZE, &dma_addr); - if (ret) { - ppgtt_invalidate_spt(spt); - return ret; - } + if (ret) + goto err; sub_se.val64 = se->val64; /* Copy the PAT field from PDE. */ @@ -1214,6 +1212,17 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, ops->set_pfn(se, sub_spt->shadow_page.mfn); ppgtt_set_shadow_entry(spt, se, index); return 0; +err: + /* Cancel the existing addess mappings of DMA addr. */ + for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) { + gvt_vdbg_mm("invalidate 4K entry\n"); + ppgtt_invalidate_pte(sub_spt, &sub_se); + } + /* Release the new allocated spt. */ + trace_spt_change(sub_spt->vgpu->id, "release", sub_spt, + sub_spt->guest_page.gfn, sub_spt->shadow_page.type); + ppgtt_free_spt(sub_spt); + return ret; } static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, -- Gitee From 1ba78179f7b34881a671c95e98bd2b14ae08283b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Oct 2022 20:32:58 +0000 Subject: [PATCH 106/243] net: sched: fix race condition in qdisc_graft() stable inclusion from stable-5.10.152 commit 7aa3d623c11b9ab60f86b7833666e5d55bac4be9 category: bugfix issue: #I6DPAE CVE: CVE-2023-0590 Signed-off-by: gaochao --------------------------------------- [ Upstream commit ebda44da44f6f309d302522b049f43d6f829f7aa ] We had one syzbot report [1] in syzbot queue for a while. I was waiting for more occurrences and/or a repro but Dmitry Vyukov spotted the issue right away. qdisc_graft() drops reference to qdisc in notify_and_destroy while it's still assigned to dev->qdisc Indeed, RCU rules are clear when replacing a data structure. The visible pointer (dev->qdisc in this case) must be updated to the new object _before_ RCU grace period is started (qdisc_put(old) in this case). [1] BUG: KASAN: use-after-free in __tcf_qdisc_find.part.0+0xa3a/0xac0 net/sched/cls_api.c:1066 Read of size 4 at addr ffff88802065e038 by task syz-executor.4/21027 CPU: 0 PID: 21027 Comm: syz-executor.4 Not tainted 6.0.0-rc3-syzkaller-00363-g7726d4c3e60b #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:317 [inline] print_report.cold+0x2ba/0x719 mm/kasan/report.c:433 kasan_report+0xb1/0x1e0 mm/kasan/report.c:495 __tcf_qdisc_find.part.0+0xa3a/0xac0 net/sched/cls_api.c:1066 __tcf_qdisc_find net/sched/cls_api.c:1051 [inline] tc_new_tfilter+0x34f/0x2200 net/sched/cls_api.c:2018 rtnetlink_rcv_msg+0x955/0xca0 net/core/rtnetlink.c:6081 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2482 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f5efaa89279 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f5efbc31168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f5efab9bf80 RCX: 00007f5efaa89279 RDX: 0000000000000000 RSI: 0000000020000140 RDI: 0000000000000005 RBP: 00007f5efaae32e9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007f5efb0cfb1f R14: 00007f5efbc31300 R15: 0000000000022000 Allocated by task 21027: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track mm/kasan/common.c:45 [inline] set_alloc_info mm/kasan/common.c:437 [inline] ____kasan_kmalloc mm/kasan/common.c:516 [inline] ____kasan_kmalloc mm/kasan/common.c:475 [inline] __kasan_kmalloc+0xa9/0xd0 mm/kasan/common.c:525 kmalloc_node include/linux/slab.h:623 [inline] kzalloc_node include/linux/slab.h:744 [inline] qdisc_alloc+0xb0/0xc50 net/sched/sch_generic.c:938 qdisc_create_dflt+0x71/0x4a0 net/sched/sch_generic.c:997 attach_one_default_qdisc net/sched/sch_generic.c:1152 [inline] netdev_for_each_tx_queue include/linux/netdevice.h:2437 [inline] attach_default_qdiscs net/sched/sch_generic.c:1170 [inline] dev_activate+0x760/0xcd0 net/sched/sch_generic.c:1229 __dev_open+0x393/0x4d0 net/core/dev.c:1441 __dev_change_flags+0x583/0x750 net/core/dev.c:8556 rtnl_configure_link+0xee/0x240 net/core/rtnetlink.c:3189 rtnl_newlink_create net/core/rtnetlink.c:3371 [inline] __rtnl_newlink+0x10b8/0x17e0 net/core/rtnetlink.c:3580 rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3593 rtnetlink_rcv_msg+0x43a/0xca0 net/core/rtnetlink.c:6090 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2482 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 21020: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 kasan_set_track+0x21/0x30 mm/kasan/common.c:45 kasan_set_free_info+0x20/0x30 mm/kasan/generic.c:370 ____kasan_slab_free mm/kasan/common.c:367 [inline] ____kasan_slab_free+0x166/0x1c0 mm/kasan/common.c:329 kasan_slab_free include/linux/kasan.h:200 [inline] slab_free_hook mm/slub.c:1754 [inline] slab_free_freelist_hook+0x8b/0x1c0 mm/slub.c:1780 slab_free mm/slub.c:3534 [inline] kfree+0xe2/0x580 mm/slub.c:4562 rcu_do_batch kernel/rcu/tree.c:2245 [inline] rcu_core+0x7b5/0x1890 kernel/rcu/tree.c:2505 __do_softirq+0x1d3/0x9c6 kernel/softirq.c:571 Last potentially related work creation: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 __kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348 call_rcu+0x99/0x790 kernel/rcu/tree.c:2793 qdisc_put+0xcd/0xe0 net/sched/sch_generic.c:1083 notify_and_destroy net/sched/sch_api.c:1012 [inline] qdisc_graft+0xeb1/0x1270 net/sched/sch_api.c:1084 tc_modify_qdisc+0xbb7/0x1a00 net/sched/sch_api.c:1671 rtnetlink_rcv_msg+0x43a/0xca0 net/core/rtnetlink.c:6090 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2501 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x917/0xe10 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 ____sys_sendmsg+0x6eb/0x810 net/socket.c:2482 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2536 __sys_sendmsg+0xf3/0x1c0 net/socket.c:2565 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Second to last potentially related work creation: kasan_save_stack+0x1e/0x40 mm/kasan/common.c:38 __kasan_record_aux_stack+0xbe/0xd0 mm/kasan/generic.c:348 kvfree_call_rcu+0x74/0x940 kernel/rcu/tree.c:3322 neigh_destroy+0x431/0x630 net/core/neighbour.c:912 neigh_release include/net/neighbour.h:454 [inline] neigh_cleanup_and_release+0x1f8/0x330 net/core/neighbour.c:103 neigh_del net/core/neighbour.c:225 [inline] neigh_remove_one+0x37d/0x460 net/core/neighbour.c:246 neigh_forced_gc net/core/neighbour.c:276 [inline] neigh_alloc net/core/neighbour.c:447 [inline] ___neigh_create+0x18b5/0x29a0 net/core/neighbour.c:642 ip6_finish_output2+0xfb8/0x1520 net/ipv6/ip6_output.c:125 __ip6_finish_output net/ipv6/ip6_output.c:195 [inline] ip6_finish_output+0x690/0x1160 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:296 [inline] ip6_output+0x1ed/0x540 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:451 [inline] NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] mld_sendpack+0xa09/0xe70 net/ipv6/mcast.c:1820 mld_send_cr net/ipv6/mcast.c:2121 [inline] mld_ifc_work+0x71c/0xdc0 net/ipv6/mcast.c:2653 process_one_work+0x991/0x1610 kernel/workqueue.c:2289 worker_thread+0x665/0x1080 kernel/workqueue.c:2436 kthread+0x2e4/0x3a0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306 The buggy address belongs to the object at ffff88802065e000 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 56 bytes inside of 1024-byte region [ffff88802065e000, ffff88802065e400) The buggy address belongs to the physical page: page:ffffea0000819600 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x20658 head:ffffea0000819600 order:3 compound_mapcount:0 compound_pincount:0 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 0000000000000000 dead000000000001 ffff888011841dc0 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 3, migratetype Unmovable, gfp_mask 0xd20c0(__GFP_IO|__GFP_FS|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_NOMEMALLOC), pid 3523, tgid 3523 (sshd), ts 41495190986, free_ts 41417713212 prep_new_page mm/page_alloc.c:2532 [inline] get_page_from_freelist+0x109b/0x2ce0 mm/page_alloc.c:4283 __alloc_pages+0x1c7/0x510 mm/page_alloc.c:5515 alloc_pages+0x1a6/0x270 mm/mempolicy.c:2270 alloc_slab_page mm/slub.c:1824 [inline] allocate_slab+0x27e/0x3d0 mm/slub.c:1969 new_slab mm/slub.c:2029 [inline] ___slab_alloc+0x7f1/0xe10 mm/slub.c:3031 __slab_alloc.constprop.0+0x4d/0xa0 mm/slub.c:3118 slab_alloc_node mm/slub.c:3209 [inline] __kmalloc_node_track_caller+0x2f2/0x380 mm/slub.c:4955 kmalloc_reserve net/core/skbuff.c:358 [inline] __alloc_skb+0xd9/0x2f0 net/core/skbuff.c:430 alloc_skb_fclone include/linux/skbuff.h:1307 [inline] tcp_stream_alloc_skb+0x38/0x580 net/ipv4/tcp.c:861 tcp_sendmsg_locked+0xc36/0x2f80 net/ipv4/tcp.c:1325 tcp_sendmsg+0x2b/0x40 net/ipv4/tcp.c:1483 inet_sendmsg+0x99/0xe0 net/ipv4/af_inet.c:819 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:734 sock_write_iter+0x291/0x3d0 net/socket.c:1108 call_write_iter include/linux/fs.h:2187 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9e9/0xdd0 fs/read_write.c:578 ksys_write+0x1e8/0x250 fs/read_write.c:631 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1449 [inline] free_pcp_prepare+0x5e4/0xd20 mm/page_alloc.c:1499 free_unref_page_prepare mm/page_alloc.c:3380 [inline] free_unref_page+0x19/0x4d0 mm/page_alloc.c:3476 __unfreeze_partials+0x17c/0x1a0 mm/slub.c:2548 qlink_free mm/kasan/quarantine.c:168 [inline] qlist_free_all+0x6a/0x170 mm/kasan/quarantine.c:187 kasan_quarantine_reduce+0x180/0x200 mm/kasan/quarantine.c:294 __kasan_slab_alloc+0xa2/0xc0 mm/kasan/common.c:447 kasan_slab_alloc include/linux/kasan.h:224 [inline] slab_post_alloc_hook mm/slab.h:727 [inline] slab_alloc_node mm/slub.c:3243 [inline] slab_alloc mm/slub.c:3251 [inline] __kmem_cache_alloc_lru mm/slub.c:3258 [inline] kmem_cache_alloc+0x267/0x3b0 mm/slub.c:3268 kmem_cache_zalloc include/linux/slab.h:723 [inline] alloc_buffer_head+0x20/0x140 fs/buffer.c:2974 alloc_page_buffers+0x280/0x790 fs/buffer.c:829 create_empty_buffers+0x2c/0xee0 fs/buffer.c:1558 ext4_block_write_begin+0x1004/0x1530 fs/ext4/inode.c:1074 ext4_da_write_begin+0x422/0xae0 fs/ext4/inode.c:2996 generic_perform_write+0x246/0x560 mm/filemap.c:3738 ext4_buffered_write_iter+0x15b/0x460 fs/ext4/file.c:270 ext4_file_write_iter+0x44a/0x1660 fs/ext4/file.c:679 call_write_iter include/linux/fs.h:2187 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x9e9/0xdd0 fs/read_write.c:578 Fixes: af356afa010f ("net_sched: reintroduce dev->qdisc for use by sch_api") Reported-by: syzbot Diagnosed-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20221018203258.2793282-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- net/sched/sch_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index faf8d0393da2..9e54be38b02e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1081,12 +1081,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, skip: if (!ingress) { - notify_and_destroy(net, skb, n, classid, - dev->qdisc, new); + old = rtnl_dereference(dev->qdisc); if (new && !new->ops->attach) qdisc_refcount_inc(new); dev->qdisc = new ? : &noop_qdisc; + notify_and_destroy(net, skb, n, classid, old, new); + if (new && new->ops->attach) new->ops->attach(new); } else { -- Gitee From dbea03f1e9a209ba71909cefbcf1e0f8faa11e8b Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 30 Nov 2022 03:58:00 +0000 Subject: [PATCH 107/243] binder: avoid potential data leakage when copying txn stable inclusion from stable-5.10.156 commit 23e9d815fad84c1bee3742a8de4bd39510435362 category: bugfix issue: #I6EXSP CVE: CVE-2023-20938 Signed-off-by: gaochao --------------------------------------- commit 6d98eb95b450a75adb4516a1d33652dc78d2b20c upstream. Transactions are copied from the sender to the target first and objects like BINDER_TYPE_PTR and BINDER_TYPE_FDA are then fixed up. This means there is a short period where the sender's version of these objects are visible to the target prior to the fixups. Instead of copying all of the data first, copy data only after any needed fixups have been applied. Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Reviewed-by: Martijn Coenen Acked-by: Christian Brauner Signed-off-by: Todd Kjos Link: https://lore.kernel.org/r/20211130185152.437403-3-tkjos@google.com Signed-off-by: Greg Kroah-Hartman [cmllamas: fix trivial merge conflict] Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- drivers/android/binder.c | 94 ++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 24 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index e310641c78eb..924f47cf87c2 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2048,15 +2048,21 @@ static void binder_cleanup_transaction(struct binder_transaction *t, /** * binder_get_object() - gets object and checks for valid metadata * @proc: binder_proc owning the buffer + * @u: sender's user pointer to base of buffer * @buffer: binder_buffer that we're parsing. * @offset: offset in the @buffer at which to validate an object. * @object: struct binder_object to read into * - * Return: If there's a valid metadata object at @offset in @buffer, the + * Copy the binder object at the given offset into @object. If @u is + * provided then the copy is from the sender's buffer. If not, then + * it is copied from the target's @buffer. + * + * Return: If there's a valid metadata object at @offset, the * size of that object. Otherwise, it returns zero. The object * is read into the struct binder_object pointed to by @object. */ static size_t binder_get_object(struct binder_proc *proc, + const void __user *u, struct binder_buffer *buffer, unsigned long offset, struct binder_object *object) @@ -2066,10 +2072,16 @@ static size_t binder_get_object(struct binder_proc *proc, size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); - if (offset > buffer->data_size || read_size < sizeof(*hdr) || - binder_alloc_copy_from_buffer(&proc->alloc, object, buffer, - offset, read_size)) + if (offset > buffer->data_size || read_size < sizeof(*hdr)) return 0; + if (u) { + if (copy_from_user(object, u + offset, read_size)) + return 0; + } else { + if (binder_alloc_copy_from_buffer(&proc->alloc, object, buffer, + offset, read_size)) + return 0; + } /* Ok, now see if we read a complete object. */ hdr = &object->hdr; @@ -2142,7 +2154,7 @@ static struct binder_buffer_object *binder_validate_ptr( b, buffer_offset, sizeof(object_offset))) return NULL; - object_size = binder_get_object(proc, b, object_offset, object); + object_size = binder_get_object(proc, NULL, b, object_offset, object); if (!object_size || object->hdr.type != BINDER_TYPE_PTR) return NULL; if (object_offsetp) @@ -2207,7 +2219,8 @@ static bool binder_validate_fixup(struct binder_proc *proc, unsigned long buffer_offset; struct binder_object last_object; struct binder_buffer_object *last_bbo; - size_t object_size = binder_get_object(proc, b, last_obj_offset, + size_t object_size = binder_get_object(proc, NULL, b, + last_obj_offset, &last_object); if (object_size != sizeof(*last_bbo)) return false; @@ -2322,7 +2335,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, if (!binder_alloc_copy_from_buffer(&proc->alloc, &object_offset, buffer, buffer_offset, sizeof(object_offset))) - object_size = binder_get_object(proc, buffer, + object_size = binder_get_object(proc, NULL, buffer, object_offset, &object); if (object_size == 0) { pr_err("transaction release %d bad object at offset %lld, size %zd\n", @@ -2888,6 +2901,7 @@ static void binder_transaction(struct binder_proc *proc, binder_size_t off_start_offset, off_end_offset; binder_size_t off_min; binder_size_t sg_buf_offset, sg_buf_end_offset; + binder_size_t user_offset = 0; struct binder_proc *target_proc = NULL; struct binder_thread *target_thread = NULL; struct binder_node *target_node = NULL; @@ -2902,6 +2916,8 @@ static void binder_transaction(struct binder_proc *proc, int t_debug_id = atomic_inc_return(&binder_last_id); char *secctx = NULL; u32 secctx_sz = 0; + const void __user *user_buffer = (const void __user *) + (uintptr_t)tr->data.ptr.buffer; e = binder_transaction_log_add(&binder_transaction_log); e->debug_id = t_debug_id; @@ -3226,19 +3242,6 @@ static void binder_transaction(struct binder_proc *proc, t->buffer->clear_on_free = !!(t->flags & TF_CLEAR_BUF); trace_binder_transaction_alloc_buf(t->buffer); - if (binder_alloc_copy_user_to_buffer( - &target_proc->alloc, - t->buffer, 0, - (const void __user *) - (uintptr_t)tr->data.ptr.buffer, - tr->data_size)) { - binder_user_error("%d:%d got transaction with invalid data ptr\n", - proc->pid, thread->pid); - return_error = BR_FAILED_REPLY; - return_error_param = -EFAULT; - return_error_line = __LINE__; - goto err_copy_data_failed; - } if (binder_alloc_copy_user_to_buffer( &target_proc->alloc, t->buffer, @@ -3283,6 +3286,7 @@ static void binder_transaction(struct binder_proc *proc, size_t object_size; struct binder_object object; binder_size_t object_offset; + binder_size_t copy_size; if (binder_alloc_copy_from_buffer(&target_proc->alloc, &object_offset, @@ -3294,8 +3298,27 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } - object_size = binder_get_object(target_proc, t->buffer, - object_offset, &object); + + /* + * Copy the source user buffer up to the next object + * that will be processed. + */ + copy_size = object_offset - user_offset; + if (copy_size && (user_offset > object_offset || + binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, user_offset, + user_buffer + user_offset, + copy_size))) { + binder_user_error("%d:%d got transaction with invalid data ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; + goto err_copy_data_failed; + } + object_size = binder_get_object(target_proc, user_buffer, + t->buffer, object_offset, &object); if (object_size == 0 || object_offset < off_min) { binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n", proc->pid, thread->pid, @@ -3307,6 +3330,11 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } + /* + * Set offset to the next buffer fragment to be + * copied + */ + user_offset = object_offset + object_size; hdr = &object.hdr; off_min = object_offset + object_size; @@ -3402,9 +3430,14 @@ static void binder_transaction(struct binder_proc *proc, } ret = binder_translate_fd_array(fda, parent, t, thread, in_reply_to); - if (ret < 0) { + if (!ret) + ret = binder_alloc_copy_to_buffer(&target_proc->alloc, + t->buffer, + object_offset, + fda, sizeof(*fda)); + if (ret) { return_error = BR_FAILED_REPLY; - return_error_param = ret; + return_error_param = ret > 0 ? -EINVAL : ret; return_error_line = __LINE__; goto err_translate_failed; } @@ -3474,6 +3507,19 @@ static void binder_transaction(struct binder_proc *proc, goto err_bad_object_type; } } + /* Done processing objects, copy the rest of the buffer */ + if (binder_alloc_copy_user_to_buffer( + &target_proc->alloc, + t->buffer, user_offset, + user_buffer + user_offset, + tr->data_size - user_offset)) { + binder_user_error("%d:%d got transaction with invalid data ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = -EFAULT; + return_error_line = __LINE__; + goto err_copy_data_failed; + } tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; t->work.type = BINDER_WORK_TRANSACTION; -- Gitee From b7742c48611c686757436f6db7bd1049a1fe80f8 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 30 Nov 2022 03:58:01 +0000 Subject: [PATCH 108/243] binder: read pre-translated fds from sender buffer stable inclusion from stable-5.10.156 commit 5204296fc76623552d53f042e2dc411b49c151f2 category: bugfix issue: #I6EXSP CVE: CVE-2023-20938 Signed-off-by: gaochao --------------------------------------- commit 656e01f3ab54afe71bed066996fc2640881e1220 upstream. This patch is to prepare for an up coming patch where we read pre-translated fds from the sender buffer and translate them before copying them to the target. It does not change run time. The patch adds two new parameters to binder_translate_fd_array() to hold the sender buffer and sender buffer parent. These parameters let us call copy_from_user() directly from the sender instead of using binder_alloc_copy_from_buffer() to copy from the target. Also the patch adds some new alignment checks. Previously the alignment checks would have been done in a different place, but this lets us print more useful error messages. Reviewed-by: Martijn Coenen Acked-by: Christian Brauner Signed-off-by: Todd Kjos Link: https://lore.kernel.org/r/20211130185152.437403-4-tkjos@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- drivers/android/binder.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 924f47cf87c2..2128976db01b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2674,15 +2674,17 @@ static int binder_translate_fd(u32 fd, binder_size_t fd_offset, } static int binder_translate_fd_array(struct binder_fd_array_object *fda, + const void __user *sender_ubuffer, struct binder_buffer_object *parent, + struct binder_buffer_object *sender_uparent, struct binder_transaction *t, struct binder_thread *thread, struct binder_transaction *in_reply_to) { binder_size_t fdi, fd_buf_size; binder_size_t fda_offset; + const void __user *sender_ufda_base; struct binder_proc *proc = thread->proc; - struct binder_proc *target_proc = t->to_proc; fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { @@ -2706,7 +2708,10 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, */ fda_offset = (parent->buffer - (uintptr_t)t->buffer->user_data) + fda->parent_offset; - if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32))) { + sender_ufda_base = (void __user *)sender_uparent->buffer + fda->parent_offset; + + if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) || + !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) { binder_user_error("%d:%d parent offset not aligned correctly.\n", proc->pid, thread->pid); return -EINVAL; @@ -2715,10 +2720,9 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, u32 fd; int ret; binder_size_t offset = fda_offset + fdi * sizeof(fd); + binder_size_t sender_uoffset = fdi * sizeof(fd); - ret = binder_alloc_copy_from_buffer(&target_proc->alloc, - &fd, t->buffer, - offset, sizeof(fd)); + ret = copy_from_user(&fd, sender_ufda_base + sender_uoffset, sizeof(fd)); if (!ret) ret = binder_translate_fd(fd, offset, t, thread, in_reply_to); @@ -3397,6 +3401,8 @@ static void binder_transaction(struct binder_proc *proc, case BINDER_TYPE_FDA: { struct binder_object ptr_object; binder_size_t parent_offset; + struct binder_object user_object; + size_t user_parent_size; struct binder_fd_array_object *fda = to_binder_fd_array_object(hdr); size_t num_valid = (buffer_offset - off_start_offset) / @@ -3428,8 +3434,27 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_parent; } - ret = binder_translate_fd_array(fda, parent, t, thread, - in_reply_to); + /* + * We need to read the user version of the parent + * object to get the original user offset + */ + user_parent_size = + binder_get_object(proc, user_buffer, t->buffer, + parent_offset, &user_object); + if (user_parent_size != sizeof(user_object.bbo)) { + binder_user_error("%d:%d invalid ptr object size: %zd vs %zd\n", + proc->pid, thread->pid, + user_parent_size, + sizeof(user_object.bbo)); + return_error = BR_FAILED_REPLY; + return_error_param = -EINVAL; + return_error_line = __LINE__; + goto err_bad_parent; + } + ret = binder_translate_fd_array(fda, user_buffer, + parent, + &user_object.bbo, t, + thread, in_reply_to); if (!ret) ret = binder_alloc_copy_to_buffer(&target_proc->alloc, t->buffer, -- Gitee From 4a395cc2f2cae560da7bea8ddaa3782d3b4b46c8 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 30 Nov 2022 03:58:02 +0000 Subject: [PATCH 109/243] binder: defer copies of pre-patched txn data stable inclusion from stable-5.10.156 commit c9d3f25a7f4e3aab3dfd91885e3d428bccdcb0e1 category: bugfix issue: #I6EXSP CVE: CVE-2023-20938 Signed-off-by: gaochao --------------------------------------- commit 09184ae9b5756cc469db6fd1d1cfdcffbf627c2d upstream. BINDER_TYPE_PTR objects point to memory areas in the source process to be copied into the target buffer as part of a transaction. This implements a scatter- gather model where non-contiguous memory in a source process is "gathered" into a contiguous region in the target buffer. The data can include pointers that must be fixed up to correctly point to the copied data. To avoid making source process pointers visible to the target process, this patch defers the copy until the fixups are known and then copies and fixeups are done together. There is a special case of BINDER_TYPE_FDA which applies the fixup later in the target process context. In this case the user data is skipped (so no untranslated fds become visible to the target). Reviewed-by: Martijn Coenen Signed-off-by: Todd Kjos Link: https://lore.kernel.org/r/20211130185152.437403-5-tkjos@google.com Signed-off-by: Greg Kroah-Hartman [cmllamas: fix trivial merge conflict] Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- drivers/android/binder.c | 299 +++++++++++++++++++++++++++++++++++---- 1 file changed, 274 insertions(+), 25 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 2128976db01b..bb252dd87541 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2673,7 +2673,246 @@ static int binder_translate_fd(u32 fd, binder_size_t fd_offset, return ret; } -static int binder_translate_fd_array(struct binder_fd_array_object *fda, +/** + * struct binder_ptr_fixup - data to be fixed-up in target buffer + * @offset offset in target buffer to fixup + * @skip_size bytes to skip in copy (fixup will be written later) + * @fixup_data data to write at fixup offset + * @node list node + * + * This is used for the pointer fixup list (pf) which is created and consumed + * during binder_transaction() and is only accessed locally. No + * locking is necessary. + * + * The list is ordered by @offset. + */ +struct binder_ptr_fixup { + binder_size_t offset; + size_t skip_size; + binder_uintptr_t fixup_data; + struct list_head node; +}; + +/** + * struct binder_sg_copy - scatter-gather data to be copied + * @offset offset in target buffer + * @sender_uaddr user address in source buffer + * @length bytes to copy + * @node list node + * + * This is used for the sg copy list (sgc) which is created and consumed + * during binder_transaction() and is only accessed locally. No + * locking is necessary. + * + * The list is ordered by @offset. + */ +struct binder_sg_copy { + binder_size_t offset; + const void __user *sender_uaddr; + size_t length; + struct list_head node; +}; + +/** + * binder_do_deferred_txn_copies() - copy and fixup scatter-gather data + * @alloc: binder_alloc associated with @buffer + * @buffer: binder buffer in target process + * @sgc_head: list_head of scatter-gather copy list + * @pf_head: list_head of pointer fixup list + * + * Processes all elements of @sgc_head, applying fixups from @pf_head + * and copying the scatter-gather data from the source process' user + * buffer to the target's buffer. It is expected that the list creation + * and processing all occurs during binder_transaction() so these lists + * are only accessed in local context. + * + * Return: 0=success, else -errno + */ +static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, + struct binder_buffer *buffer, + struct list_head *sgc_head, + struct list_head *pf_head) +{ + int ret = 0; + struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *pf = + list_first_entry_or_null(pf_head, struct binder_ptr_fixup, + node); + + list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { + size_t bytes_copied = 0; + + while (bytes_copied < sgc->length) { + size_t copy_size; + size_t bytes_left = sgc->length - bytes_copied; + size_t offset = sgc->offset + bytes_copied; + + /* + * We copy up to the fixup (pointed to by pf) + */ + copy_size = pf ? min(bytes_left, (size_t)pf->offset - offset) + : bytes_left; + if (!ret && copy_size) + ret = binder_alloc_copy_user_to_buffer( + alloc, buffer, + offset, + sgc->sender_uaddr + bytes_copied, + copy_size); + bytes_copied += copy_size; + if (copy_size != bytes_left) { + BUG_ON(!pf); + /* we stopped at a fixup offset */ + if (pf->skip_size) { + /* + * we are just skipping. This is for + * BINDER_TYPE_FDA where the translated + * fds will be fixed up when we get + * to target context. + */ + bytes_copied += pf->skip_size; + } else { + /* apply the fixup indicated by pf */ + if (!ret) + ret = binder_alloc_copy_to_buffer( + alloc, buffer, + pf->offset, + &pf->fixup_data, + sizeof(pf->fixup_data)); + bytes_copied += sizeof(pf->fixup_data); + } + list_del(&pf->node); + kfree(pf); + pf = list_first_entry_or_null(pf_head, + struct binder_ptr_fixup, node); + } + } + list_del(&sgc->node); + kfree(sgc); + } + BUG_ON(!list_empty(pf_head)); + BUG_ON(!list_empty(sgc_head)); + + return ret > 0 ? -EINVAL : ret; +} + +/** + * binder_cleanup_deferred_txn_lists() - free specified lists + * @sgc_head: list_head of scatter-gather copy list + * @pf_head: list_head of pointer fixup list + * + * Called to clean up @sgc_head and @pf_head if there is an + * error. + */ +static void binder_cleanup_deferred_txn_lists(struct list_head *sgc_head, + struct list_head *pf_head) +{ + struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *pf, *tmppf; + + list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { + list_del(&sgc->node); + kfree(sgc); + } + list_for_each_entry_safe(pf, tmppf, pf_head, node) { + list_del(&pf->node); + kfree(pf); + } +} + +/** + * binder_defer_copy() - queue a scatter-gather buffer for copy + * @sgc_head: list_head of scatter-gather copy list + * @offset: binder buffer offset in target process + * @sender_uaddr: user address in source process + * @length: bytes to copy + * + * Specify a scatter-gather block to be copied. The actual copy must + * be deferred until all the needed fixups are identified and queued. + * Then the copy and fixups are done together so un-translated values + * from the source are never visible in the target buffer. + * + * We are guaranteed that repeated calls to this function will have + * monotonically increasing @offset values so the list will naturally + * be ordered. + * + * Return: 0=success, else -errno + */ +static int binder_defer_copy(struct list_head *sgc_head, binder_size_t offset, + const void __user *sender_uaddr, size_t length) +{ + struct binder_sg_copy *bc = kzalloc(sizeof(*bc), GFP_KERNEL); + + if (!bc) + return -ENOMEM; + + bc->offset = offset; + bc->sender_uaddr = sender_uaddr; + bc->length = length; + INIT_LIST_HEAD(&bc->node); + + /* + * We are guaranteed that the deferred copies are in-order + * so just add to the tail. + */ + list_add_tail(&bc->node, sgc_head); + + return 0; +} + +/** + * binder_add_fixup() - queue a fixup to be applied to sg copy + * @pf_head: list_head of binder ptr fixup list + * @offset: binder buffer offset in target process + * @fixup: bytes to be copied for fixup + * @skip_size: bytes to skip when copying (fixup will be applied later) + * + * Add the specified fixup to a list ordered by @offset. When copying + * the scatter-gather buffers, the fixup will be copied instead of + * data from the source buffer. For BINDER_TYPE_FDA fixups, the fixup + * will be applied later (in target process context), so we just skip + * the bytes specified by @skip_size. If @skip_size is 0, we copy the + * value in @fixup. + * + * This function is called *mostly* in @offset order, but there are + * exceptions. Since out-of-order inserts are relatively uncommon, + * we insert the new element by searching backward from the tail of + * the list. + * + * Return: 0=success, else -errno + */ +static int binder_add_fixup(struct list_head *pf_head, binder_size_t offset, + binder_uintptr_t fixup, size_t skip_size) +{ + struct binder_ptr_fixup *pf = kzalloc(sizeof(*pf), GFP_KERNEL); + struct binder_ptr_fixup *tmppf; + + if (!pf) + return -ENOMEM; + + pf->offset = offset; + pf->fixup_data = fixup; + pf->skip_size = skip_size; + INIT_LIST_HEAD(&pf->node); + + /* Fixups are *mostly* added in-order, but there are some + * exceptions. Look backwards through list for insertion point. + */ + list_for_each_entry_reverse(tmppf, pf_head, node) { + if (tmppf->offset < pf->offset) { + list_add(&pf->node, &tmppf->node); + return 0; + } + } + /* + * if we get here, then the new offset is the lowest so + * insert at the head + */ + list_add(&pf->node, pf_head); + return 0; +} + +static int binder_translate_fd_array(struct list_head *pf_head, + struct binder_fd_array_object *fda, const void __user *sender_ubuffer, struct binder_buffer_object *parent, struct binder_buffer_object *sender_uparent, @@ -2685,6 +2924,7 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, binder_size_t fda_offset; const void __user *sender_ufda_base; struct binder_proc *proc = thread->proc; + int ret; fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { @@ -2716,9 +2956,12 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, proc->pid, thread->pid); return -EINVAL; } + ret = binder_add_fixup(pf_head, fda_offset, 0, fda->num_fds * sizeof(u32)); + if (ret) + return ret; + for (fdi = 0; fdi < fda->num_fds; fdi++) { u32 fd; - int ret; binder_size_t offset = fda_offset + fdi * sizeof(fd); binder_size_t sender_uoffset = fdi * sizeof(fd); @@ -2732,7 +2975,8 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, return 0; } -static int binder_fixup_parent(struct binder_transaction *t, +static int binder_fixup_parent(struct list_head *pf_head, + struct binder_transaction *t, struct binder_thread *thread, struct binder_buffer_object *bp, binder_size_t off_start_offset, @@ -2778,14 +3022,7 @@ static int binder_fixup_parent(struct binder_transaction *t, } buffer_offset = bp->parent_offset + (uintptr_t)parent->buffer - (uintptr_t)b->user_data; - if (binder_alloc_copy_to_buffer(&target_proc->alloc, b, buffer_offset, - &bp->buffer, sizeof(bp->buffer))) { - binder_user_error("%d:%d got transaction with invalid parent offset\n", - proc->pid, thread->pid); - return -EINVAL; - } - - return 0; + return binder_add_fixup(pf_head, buffer_offset, bp->buffer, 0); } /** @@ -2920,8 +3157,12 @@ static void binder_transaction(struct binder_proc *proc, int t_debug_id = atomic_inc_return(&binder_last_id); char *secctx = NULL; u32 secctx_sz = 0; + struct list_head sgc_head; + struct list_head pf_head; const void __user *user_buffer = (const void __user *) (uintptr_t)tr->data.ptr.buffer; + INIT_LIST_HEAD(&sgc_head); + INIT_LIST_HEAD(&pf_head); e = binder_transaction_log_add(&binder_transaction_log); e->debug_id = t_debug_id; @@ -3451,8 +3692,8 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_parent; } - ret = binder_translate_fd_array(fda, user_buffer, - parent, + ret = binder_translate_fd_array(&pf_head, fda, + user_buffer, parent, &user_object.bbo, t, thread, in_reply_to); if (!ret) @@ -3484,19 +3725,14 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_bad_offset; } - if (binder_alloc_copy_user_to_buffer( - &target_proc->alloc, - t->buffer, - sg_buf_offset, - (const void __user *) - (uintptr_t)bp->buffer, - bp->length)) { - binder_user_error("%d:%d got transaction with invalid offsets ptr\n", - proc->pid, thread->pid); - return_error_param = -EFAULT; + ret = binder_defer_copy(&sgc_head, sg_buf_offset, + (const void __user *)(uintptr_t)bp->buffer, + bp->length); + if (ret) { return_error = BR_FAILED_REPLY; + return_error_param = ret; return_error_line = __LINE__; - goto err_copy_data_failed; + goto err_translate_failed; } /* Fixup buffer pointer to target proc address space */ bp->buffer = (uintptr_t) @@ -3505,7 +3741,8 @@ static void binder_transaction(struct binder_proc *proc, num_valid = (buffer_offset - off_start_offset) / sizeof(binder_size_t); - ret = binder_fixup_parent(t, thread, bp, + ret = binder_fixup_parent(&pf_head, t, + thread, bp, off_start_offset, num_valid, last_fixup_obj_off, @@ -3545,6 +3782,17 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_copy_data_failed; } + + ret = binder_do_deferred_txn_copies(&target_proc->alloc, t->buffer, + &sgc_head, &pf_head); + if (ret) { + binder_user_error("%d:%d got transaction with invalid offsets ptr\n", + proc->pid, thread->pid); + return_error = BR_FAILED_REPLY; + return_error_param = ret; + return_error_line = __LINE__; + goto err_copy_data_failed; + } tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; t->work.type = BINDER_WORK_TRANSACTION; @@ -3620,6 +3868,7 @@ static void binder_transaction(struct binder_proc *proc, err_bad_offset: err_bad_parent: err_copy_data_failed: + binder_cleanup_deferred_txn_lists(&sgc_head, &pf_head); binder_free_txn_fixups(t); trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, NULL, t->buffer, -- Gitee From 94b92a99065cffe765825e6bec6869dc2b9dce83 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Nov 2022 03:58:03 +0000 Subject: [PATCH 110/243] binder: fix pointer cast warning stable inclusion from stable-5.10.156 commit 2e3c27f24173c6f3d799080da82126fa044a2f5e category: bugfix issue: #I6EXSP CVE: CVE-2023-20938 Signed-off-by: gaochao --------------------------------------- commit 9a0a930fe2535a76ad70d3f43caeccf0d86a3009 upstream. binder_uintptr_t is not the same as uintptr_t, so converting it into a pointer requires a second cast: drivers/android/binder.c: In function 'binder_translate_fd_array': drivers/android/binder.c:2511:28: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] 2511 | sender_ufda_base = (void __user *)sender_uparent->buffer + fda->parent_offset; | ^ Fixes: 656e01f3ab54 ("binder: read pre-translated fds from sender buffer") Acked-by: Todd Kjos Acked-by: Randy Dunlap # build-tested Acked-by: Christian Brauner Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211207122448.1185769-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- drivers/android/binder.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index bb252dd87541..c86f8868e65a 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2948,7 +2948,8 @@ static int binder_translate_fd_array(struct list_head *pf_head, */ fda_offset = (parent->buffer - (uintptr_t)t->buffer->user_data) + fda->parent_offset; - sender_ufda_base = (void __user *)sender_uparent->buffer + fda->parent_offset; + sender_ufda_base = (void __user *)(uintptr_t)sender_uparent->buffer + + fda->parent_offset; if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) || !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) { -- Gitee From 61366123c483310dfc5edcf6071968176ca0778d Mon Sep 17 00:00:00 2001 From: Alessandro Astone Date: Wed, 30 Nov 2022 03:58:04 +0000 Subject: [PATCH 111/243] binder: Address corner cases in deferred copy and fixup stable inclusion from stable-5.10.156 commit 017de842533f4334d646f1d480f591f4ca9f5c7a category: bugfix issue: #I6EXSP CVE: CVE-2023-20938 Signed-off-by: gaochao --------------------------------------- commit 2d1746e3fda0c3612143d7c06f8e1d1830c13e23 upstream. When handling BINDER_TYPE_FDA object we are pushing a parent fixup with a certain skip_size but no scatter-gather copy object, since the copy is handled standalone. If BINDER_TYPE_FDA is the last children the scatter-gather copy loop will never stop to skip it, thus we are left with an item in the parent fixup list. This will trigger the BUG_ON(). This is reproducible in android when playing a video. We receive a transaction that looks like this: obj[0] BINDER_TYPE_PTR, parent obj[1] BINDER_TYPE_PTR, child obj[2] BINDER_TYPE_PTR, child obj[3] BINDER_TYPE_FDA, child Fixes: 09184ae9b575 ("binder: defer copies of pre-patched txn data") Acked-by: Todd Kjos Cc: stable Signed-off-by: Alessandro Astone Link: https://lore.kernel.org/r/20220415120015.52684-2-ales.astone@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- drivers/android/binder.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index c86f8868e65a..540c2160eda6 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2735,6 +2735,7 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, { int ret = 0; struct binder_sg_copy *sgc, *tmpsgc; + struct binder_ptr_fixup *tmppf; struct binder_ptr_fixup *pf = list_first_entry_or_null(pf_head, struct binder_ptr_fixup, node); @@ -2789,7 +2790,11 @@ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, list_del(&sgc->node); kfree(sgc); } - BUG_ON(!list_empty(pf_head)); + list_for_each_entry_safe(pf, tmppf, pf_head, node) { + BUG_ON(pf->skip_size == 0); + list_del(&pf->node); + kfree(pf); + } BUG_ON(!list_empty(sgc_head)); return ret > 0 ? -EINVAL : ret; -- Gitee From 3bd92b61ec4394df9c48487b18dd62c20aa9b911 Mon Sep 17 00:00:00 2001 From: Alessandro Astone Date: Wed, 30 Nov 2022 03:58:05 +0000 Subject: [PATCH 112/243] binder: Gracefully handle BINDER_TYPE_FDA objects with num_fds=0 stable inclusion from stable-5.10.156 commit ae9e0cc973fb7499ea1b1a8dfd0795f728b84faf category: bugfix issue: #I6EXSP CVE: CVE-2023-20938 Signed-off-by: gaochao --------------------------------------- commit ef38de9217a04c9077629a24652689d8fdb4c6c6 upstream. Some android userspace is sending BINDER_TYPE_FDA objects with num_fds=0. Like the previous patch, this is reproducible when playing a video. Before commit 09184ae9b575 BINDER_TYPE_FDA objects with num_fds=0 were 'correctly handled', as in no fixup was performed. After commit 09184ae9b575 we aggregate fixup and skip regions in binder_ptr_fixup structs and distinguish between the two by using the skip_size field: if it's 0, then it's a fixup, otherwise skip. When processing BINDER_TYPE_FDA objects with num_fds=0 we add a skip region of skip_size=0, and this causes issues because now binder_do_deferred_txn_copies will think this was a fixup region. To address that, return early from binder_translate_fd_array to avoid adding an empty skip region. Fixes: 09184ae9b575 ("binder: defer copies of pre-patched txn data") Acked-by: Todd Kjos Cc: stable Signed-off-by: Alessandro Astone Link: https://lore.kernel.org/r/20220415120015.52684-1-ales.astone@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- drivers/android/binder.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 540c2160eda6..6db5fe2a47ca 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2931,6 +2931,9 @@ static int binder_translate_fd_array(struct list_head *pf_head, struct binder_proc *proc = thread->proc; int ret; + if (fda->num_fds == 0) + return 0; + fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n", -- Gitee From 47080b4c48a5eeec6af8a29df0c69af326af4f80 Mon Sep 17 00:00:00 2001 From: Rodrigo Branco Date: Tue, 3 Jan 2023 14:17:51 -0600 Subject: [PATCH 113/243] x86/bugs: Flush IBP in ib_prctl_set() stable inclusion from stable-5.10.163 commit 67e39c4f4cb318cfbbf8982ab016c649ed97edaf category: bugfix issue: #I6EBNO CVE: CVE-2023-0045 Signed-off-by: gaochao --------------------------------------- commit a664ec9158eeddd75121d39c9a0758016097fa96 upstream. We missed the window between the TIF flag update and the next reschedule. Signed-off-by: Rodrigo Branco Reviewed-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Cc: Signed-off-by: Greg Kroah-Hartman Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- arch/x86/kernel/cpu/bugs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d4cb9ff639aa..20611c1be29d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1872,6 +1872,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); task_update_spec_tif(task); + if (task == current) + indirect_branch_prediction_barrier(); break; default: return -ERANGE; -- Gitee From b8c7bd9ef5420bfeac8b1841505f27e69fe679b0 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Thu, 27 Oct 2022 20:38:55 +0800 Subject: [PATCH 114/243] media: vivid: fix compose size exceed boundary stable inclusion from stable-5.10.163 commit f9d19f3a044ca651b0be52a4bf951ffe74259b9f category: bugfix issue: #I6DNNL CVE: CVE-2023-0615 Signed-off-by: gaochao --------------------------------------- [ Upstream commit 94a7ad9283464b75b12516c5512541d467cefcf8 ] syzkaller found a bug: BUG: unable to handle page fault for address: ffffc9000a3b1000 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 100000067 P4D 100000067 PUD 10015f067 PMD 1121ca067 PTE 0 Oops: 0002 [#1] PREEMPT SMP CPU: 0 PID: 23489 Comm: vivid-000-vid-c Not tainted 6.1.0-rc1+ #512 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 RIP: 0010:memcpy_erms+0x6/0x10 [...] Call Trace: ? tpg_fill_plane_buffer+0x856/0x15b0 vivid_fillbuff+0x8ac/0x1110 vivid_thread_vid_cap_tick+0x361/0xc90 vivid_thread_vid_cap+0x21a/0x3a0 kthread+0x143/0x180 ret_from_fork+0x1f/0x30 This is because we forget to check boundary after adjust compose->height int V4L2_SEL_TGT_CROP case. Add v4l2_rect_map_inside() to fix this problem for this case. Fixes: ef834f7836ec ("[media] vivid: add the video capture and output parts") Signed-off-by: Liu Shixin Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- drivers/media/test-drivers/vivid/vivid-vid-cap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/test-drivers/vivid/vivid-vid-cap.c b/drivers/media/test-drivers/vivid/vivid-vid-cap.c index eadf28ab1e39..eeb0aeb62f79 100644 --- a/drivers/media/test-drivers/vivid/vivid-vid-cap.c +++ b/drivers/media/test-drivers/vivid/vivid-vid-cap.c @@ -953,6 +953,7 @@ int vivid_vid_cap_s_selection(struct file *file, void *fh, struct v4l2_selection if (dev->has_compose_cap) { v4l2_rect_set_min_size(compose, &min_rect); v4l2_rect_set_max_size(compose, &max_rect); + v4l2_rect_map_inside(compose, &fmt); } dev->fmt_cap_rect = fmt; tpg_s_buf_height(&dev->tpg, fmt.height); -- Gitee From 4f265c494ea9fdd7e60206d9bd491c457f403d09 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 27 Oct 2022 14:54:41 -0700 Subject: [PATCH 115/243] x86/mm: Randomize per-cpu entry area mainline inclusion from mainline-v6.2-rc1 commit 97e3d26b5e5f371b3ee223d94dd123e6c442ba80 category: bugfix issue: I6DPAM CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=97e3d26b5e5f371b3ee223d94dd123e6c442ba80 Signed-off-by: gaochao --------------------------------------- Seth found that the CPU-entry-area; the piece of per-cpu data that is mapped into the userspace page-tables for kPTI is not subject to any randomization -- irrespective of kASLR settings. On x86_64 a whole P4D (512 GB) of virtual address space is reserved for this structure, which is plenty large enough to randomize things a little. As such, use a straight forward randomization scheme that avoids duplicates to spread the existing CPUs over the available space. [ bp: Fix le build. ] Reported-by: Seth Jenkins Reviewed-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Confilict: arch/x86/mm/cpu_entry_area.c Use get_random_u32() instead of prandom_u32_max() in init_cea_offsets(). With CONFIG_RANDOMIZE_BASE=y, KASLR use prandom_seed_state() init prandom seed before init_cea_offsets(). But when CONFIG_RANDOMIZE_BASE=n, prandom seed init after init_cea_offsets() cause cea is always 0. The patch d4150779e60f("random32: use real rng for non-deterministic randomness") use get_random_u32() instead of prandom_u32() in prandom_u32_max() that make prandom_u32_max() don't need to wait prandom seed init(). But the patch has many pre-patches that have not been merged, So,we adopt the current solution as a workaround. directly use get_random_u32() in init_cea_offsets() to simplify code. Signed-off-by: Ke Liu Signed-off-by: wanxiaoqing --- arch/x86/include/asm/cpu_entry_area.h | 4 --- arch/x86/include/asm/pgtable_areas.h | 8 ++++- arch/x86/kernel/hw_breakpoint.c | 2 +- arch/x86/mm/cpu_entry_area.c | 51 ++++++++++++++++++++++++--- 4 files changed, 55 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index dd5ea1bdf04c..e2c04a5015b0 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -130,10 +130,6 @@ struct cpu_entry_area { }; #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) - -/* Total size includes the readonly IDT mapping page as well: */ -#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h index d34cce1b995c..4f056fb88174 100644 --- a/arch/x86/include/asm/pgtable_areas.h +++ b/arch/x86/include/asm/pgtable_areas.h @@ -11,6 +11,12 @@ #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) -#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) +#ifdef CONFIG_X86_32 +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \ + (CPU_ENTRY_AREA_SIZE * NR_CPUS) - \ + CPU_ENTRY_AREA_BASE) +#else +#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE +#endif #endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 668a4a6533d9..bbb0f737aab1 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) /* CPU entry erea is always used for CPU entry */ if (within_area(addr, end, CPU_ENTRY_AREA_BASE, - CPU_ENTRY_AREA_TOTAL_SIZE)) + CPU_ENTRY_AREA_MAP_SIZE)) return true; /* diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 6c2f1b76a0b6..98396a67958c 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -15,16 +16,57 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage) #ifdef CONFIG_X86_64 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); -#endif -#ifdef CONFIG_X86_32 +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return per_cpu(_cea_offset, cpu); +} + +static __init void init_cea_offsets(void) +{ + unsigned int max_cea; + unsigned int i, j; + + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; + + /* O(sodding terrible) */ + for_each_possible_cpu(i) { + unsigned int cea; + +again: + /* + * Directly use get_random_u32() instead of prandom_u32_max + * to avoid seed can't be generated when CONFIG_RANDOMIZE_BASE=n. + */ + cea = (u32)(((u64) get_random_u32() * max_cea) >> 32); + + for_each_possible_cpu(j) { + if (cea_offset(j) == cea) + goto again; + + if (i == j) + break; + } + + per_cpu(_cea_offset, i) = cea; + } +} +#else /* !X86_64 */ DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return cpu; +} +static inline void init_cea_offsets(void) { } #endif /* Is called from entry code, so must be noinstr */ noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu) { - unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE; BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); return (struct cpu_entry_area *) va; @@ -205,7 +247,6 @@ static __init void setup_cpu_entry_area_ptes(void) /* The +1 is for the readonly IDT: */ BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); - BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); start = CPU_ENTRY_AREA_BASE; @@ -221,6 +262,8 @@ void __init setup_cpu_entry_areas(void) { unsigned int cpu; + init_cea_offsets(); + setup_cpu_entry_area_ptes(); for_each_possible_cpu(cpu) -- Gitee From 7036b481e787c681b5a374cbe16b28d516bc3860 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 28 Oct 2022 00:31:04 +0300 Subject: [PATCH 116/243] x86/kasan: Map shadow for percpu pages on demand mainline inclusion from mainline-v6.2-rc1 commit 3f148f3318140035e87decc1214795ff0755757b category: bugfix issue: I6DPAM CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3f148f3318140035e87decc1214795ff0755757b Signed-off-by: gaochao --------------------------------------- KASAN maps shadow for the entire CPU-entry-area: [CPU_ENTRY_AREA_BASE, CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE] This will explode once the per-cpu entry areas are randomized since it will increase CPU_ENTRY_AREA_MAP_SIZE to 512 GB and KASAN fails to allocate shadow for such big area. Fix this by allocating KASAN shadow only for really used cpu entry area addresses mapped by cea_map_percpu_pages() Thanks to the 0day folks for finding and reporting this to be an issue. [ dhansen: tweak changelog since this will get committed before peterz's actual cpu-entry-area randomization ] Signed-off-by: Andrey Ryabinin Signed-off-by: Dave Hansen Tested-by: Yujie Liu Cc: kernel test robot Link: https://lore.kernel.org/r/202210241508.2e203c3d-yujie.liu@intel.com Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- arch/x86/include/asm/kasan.h | 3 +++ arch/x86/mm/cpu_entry_area.c | 8 +++++++- arch/x86/mm/kasan_init_64.c | 15 ++++++++++++--- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 13e70da38bed..de75306b932e 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -28,9 +28,12 @@ #ifdef CONFIG_KASAN void __init kasan_early_init(void); void __init kasan_init(void); +void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid); #else static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } +static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size, + int nid) { } #endif #endif diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 98396a67958c..bf5786f3f3c4 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -10,6 +10,7 @@ #include #include #include +#include static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); @@ -95,8 +96,13 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) static void __init cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) { + phys_addr_t pa = per_cpu_ptr_to_phys(ptr); + + kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE, + early_pfn_to_nid(PFN_DOWN(pa))); + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) - cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); + cea_set_pte(cea_vaddr, pa, prot); } static void __init percpu_setup_debug_store(unsigned int cpu) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 1a50434c8a4d..18d574af30ef 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -318,6 +318,18 @@ void __init kasan_early_init(void) kasan_map_early_shadow(init_top_pgt); } +void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) +{ + unsigned long shadow_start, shadow_end; + + shadow_start = (unsigned long)kasan_mem_to_shadow(va); + shadow_start = round_down(shadow_start, PAGE_SIZE); + shadow_end = (unsigned long)kasan_mem_to_shadow(va + size); + shadow_end = round_up(shadow_end, PAGE_SIZE); + + kasan_populate_shadow(shadow_start, shadow_end, nid); +} + void __init kasan_init(void) { int i; @@ -395,9 +407,6 @@ void __init kasan_init(void) kasan_mem_to_shadow((void *)VMALLOC_END + 1), shadow_cpu_entry_begin); - kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, - (unsigned long)shadow_cpu_entry_end, 0); - kasan_populate_early_shadow(shadow_cpu_entry_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); -- Gitee From bc11153b11fd089b647eae2dad42f22abf668484 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:00 +0000 Subject: [PATCH 117/243] x86/mm: Recompute physical address for every page of per-CPU CEA mapping mainline inclusion from mainline-v6.2-rc1 commit 80d72a8f76e8f3f0b5a70b8c7022578e17bde8e7 category: bugfix issue: I6DPAM CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=80d72a8f76e8f3f0b5a70b8c7022578e17bde8e7 Signed-off-by: gaochao --------------------------------------- Recompute the physical address for each per-CPU page in the CPU entry area, a recent commit inadvertantly modified cea_map_percpu_pages() such that every PTE is mapped to the physical address of the first page. Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-2-seanjc@google.com Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- arch/x86/mm/cpu_entry_area.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index bf5786f3f3c4..2eaf9ee41495 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -102,7 +102,7 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) early_pfn_to_nid(PFN_DOWN(pa))); for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) - cea_set_pte(cea_vaddr, pa, prot); + cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } static void __init percpu_setup_debug_store(unsigned int cpu) -- Gitee From ac0173db075b6857afd383ff6f8fb8f74fed9527 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:01 +0000 Subject: [PATCH 118/243] x86/mm: Populate KASAN shadow for entire per-CPU range of CPU entry area mainline inclusion from mainline-v6.2-rc1 commit 97650148a15e0b30099d6175ffe278b9f55ec66a category: bugfix issue: I6DPAM CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=97650148a15e0b30099d6175ffe278b9f55ec66a Signed-off-by: gaochao --------------------------------------- Populate a KASAN shadow for the entire possible per-CPU range of the CPU entry area instead of requiring that each individual chunk map a shadow. Mapping shadows individually is error prone, e.g. the per-CPU GDT mapping was left behind, which can lead to not-present page faults during KASAN validation if the kernel performs a software lookup into the GDT. The DS buffer is also likely affected. The motivation for mapping the per-CPU areas on-demand was to avoid mapping the entire 512GiB range that's reserved for the CPU entry area, shaving a few bytes by not creating shadows for potentially unused memory was not a goal. The bug is most easily reproduced by doing a sigreturn with a garbage CS in the sigcontext, e.g. int main(void) { struct sigcontext regs; syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); memset(®s, 0, sizeof(regs)); regs.cs = 0x1d0; syscall(__NR_rt_sigreturn); return 0; } to coerce the kernel into doing a GDT lookup to compute CS.base when reading the instruction bytes on the subsequent #GP to determine whether or not the #GP is something the kernel should handle, e.g. to fixup UMIP violations or to emulate CLI/STI for IOPL=3 applications. BUG: unable to handle page fault for address: fffffbc8379ace00 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 16c03a067 P4D 16c03a067 PUD 15b990067 PMD 15b98f067 PTE 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 3 PID: 851 Comm: r2 Not tainted 6.1.0-rc3-next-20221103+ #432 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kasan_check_range+0xdf/0x190 Call Trace: get_desc+0xb0/0x1d0 insn_get_seg_base+0x104/0x270 insn_fetch_from_user+0x66/0x80 fixup_umip_exception+0xb1/0x530 exc_general_protection+0x181/0x210 asm_exc_general_protection+0x22/0x30 RIP: 0003:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0003:0000000000000000 EFLAGS: 00000202 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000000000001d0 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Reported-by: syzbot+ffb4f000dc2872c93f62@syzkaller.appspotmail.com Suggested-by: Andrey Ryabinin Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-3-seanjc@google.com Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- arch/x86/mm/cpu_entry_area.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 2eaf9ee41495..88e2cc4d4e75 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -96,11 +96,6 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) static void __init cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) { - phys_addr_t pa = per_cpu_ptr_to_phys(ptr); - - kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE, - early_pfn_to_nid(PFN_DOWN(pa))); - for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } @@ -200,6 +195,9 @@ static void __init setup_cpu_entry_area(unsigned int cpu) pgprot_t tss_prot = PAGE_KERNEL; #endif + kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE, + early_cpu_to_node(cpu)); + cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot); cea_map_percpu_pages(&cea->entry_stack_page, -- Gitee From 580bf8e7cc07b0f8e608bbc9998bd0ec69e8677d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:02 +0000 Subject: [PATCH 119/243] x86/kasan: Rename local CPU_ENTRY_AREA variables to shorten names mainline inclusion from mainline-v6.2-rc1 commit 7077d2ccb94dafd00b29cc2d601c9f6891648f5b category: bugfix issue: I6DPAM CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=7077d2ccb94dafd00b29cc2d601c9f6891648f5b Signed-off-by: gaochao --------------------------------------- Rename the CPU entry area variables in kasan_init() to shorten their names, a future fix will reference the beginning of the per-CPU portion of the CPU entry area, and shadow_cpu_entry_per_cpu_begin is a bit much. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-4-seanjc@google.com Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- arch/x86/mm/kasan_init_64.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 18d574af30ef..75b0eb482487 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -333,7 +333,7 @@ void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) void __init kasan_init(void) { int i; - void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; + void *shadow_cea_begin, *shadow_cea_end; memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); @@ -374,16 +374,16 @@ void __init kasan_init(void) map_range(&pfn_mapped[i]); } - shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE; - shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); - shadow_cpu_entry_begin = (void *)round_down( - (unsigned long)shadow_cpu_entry_begin, PAGE_SIZE); + shadow_cea_begin = (void *)CPU_ENTRY_AREA_BASE; + shadow_cea_begin = kasan_mem_to_shadow(shadow_cea_begin); + shadow_cea_begin = (void *)round_down( + (unsigned long)shadow_cea_begin, PAGE_SIZE); - shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE + + shadow_cea_end = (void *)(CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE); - shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); - shadow_cpu_entry_end = (void *)round_up( - (unsigned long)shadow_cpu_entry_end, PAGE_SIZE); + shadow_cea_end = kasan_mem_to_shadow(shadow_cea_end); + shadow_cea_end = (void *)round_up( + (unsigned long)shadow_cea_end, PAGE_SIZE); kasan_populate_early_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), @@ -405,9 +405,9 @@ void __init kasan_init(void) kasan_populate_early_shadow( kasan_mem_to_shadow((void *)VMALLOC_END + 1), - shadow_cpu_entry_begin); + shadow_cea_begin); - kasan_populate_early_shadow(shadow_cpu_entry_end, + kasan_populate_early_shadow(shadow_cea_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), -- Gitee From e6a28182581a38564bd74d22dd048d63a31dc1fb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:03 +0000 Subject: [PATCH 120/243] x86/kasan: Add helpers to align shadow addresses up and down mainline inclusion from mainline-v6.2-rc1 commit bde258d97409f2a45243cb393a55ea9ecfc7aba5 category: bugfix issue: I6DPAM CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bde258d97409f2a45243cb393a55ea9ecfc7aba5 Signed-off-by: gaochao --------------------------------------- Add helpers to dedup code for aligning shadow address up/down to page boundaries when translating an address to its shadow. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-5-seanjc@google.com Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- arch/x86/mm/kasan_init_64.c | 40 ++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 75b0eb482487..c1d99556c024 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -318,22 +318,33 @@ void __init kasan_early_init(void) kasan_map_early_shadow(init_top_pgt); } +static unsigned long kasan_mem_to_shadow_align_down(unsigned long va) +{ + unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); + + return round_down(shadow, PAGE_SIZE); +} + +static unsigned long kasan_mem_to_shadow_align_up(unsigned long va) +{ + unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); + + return round_up(shadow, PAGE_SIZE); +} + void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) { unsigned long shadow_start, shadow_end; - shadow_start = (unsigned long)kasan_mem_to_shadow(va); - shadow_start = round_down(shadow_start, PAGE_SIZE); - shadow_end = (unsigned long)kasan_mem_to_shadow(va + size); - shadow_end = round_up(shadow_end, PAGE_SIZE); - + shadow_start = kasan_mem_to_shadow_align_down((unsigned long)va); + shadow_end = kasan_mem_to_shadow_align_up((unsigned long)va + size); kasan_populate_shadow(shadow_start, shadow_end, nid); } void __init kasan_init(void) { + unsigned long shadow_cea_begin, shadow_cea_end; int i; - void *shadow_cea_begin, *shadow_cea_end; memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); @@ -374,16 +385,9 @@ void __init kasan_init(void) map_range(&pfn_mapped[i]); } - shadow_cea_begin = (void *)CPU_ENTRY_AREA_BASE; - shadow_cea_begin = kasan_mem_to_shadow(shadow_cea_begin); - shadow_cea_begin = (void *)round_down( - (unsigned long)shadow_cea_begin, PAGE_SIZE); - - shadow_cea_end = (void *)(CPU_ENTRY_AREA_BASE + - CPU_ENTRY_AREA_MAP_SIZE); - shadow_cea_end = kasan_mem_to_shadow(shadow_cea_end); - shadow_cea_end = (void *)round_up( - (unsigned long)shadow_cea_end, PAGE_SIZE); + shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE); + shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE + + CPU_ENTRY_AREA_MAP_SIZE); kasan_populate_early_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), @@ -405,9 +409,9 @@ void __init kasan_init(void) kasan_populate_early_shadow( kasan_mem_to_shadow((void *)VMALLOC_END + 1), - shadow_cea_begin); + (void *)shadow_cea_begin); - kasan_populate_early_shadow(shadow_cea_end, + kasan_populate_early_shadow((void *)shadow_cea_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), -- Gitee From 8b9ad3262d93f0db460c15c9eef7506e4121bf78 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:04 +0000 Subject: [PATCH 121/243] x86/kasan: Populate shadow for shared chunk of the CPU entry area mainline inclusion from mainline-v6.2-rc1 commit 1cfaac2400c73378e78182a706be0f3ac8b93cd7 category: bugfix issue: I6DPAM CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1cfaac2400c73378e78182a706be0f3ac8b93cd7 Signed-off-by: gaochao --------------------------------------- Popuplate the shadow for the shared portion of the CPU entry area, i.e. the read-only IDT mapping, during KASAN initialization. A recent change modified KASAN to map the per-CPU areas on-demand, but forgot to keep a shadow for the common area that is shared amongst all CPUs. Map the common area in KASAN init instead of letting idt_map_in_cea() do the dirty work so that it Just Works in the unlikely event more shared data is shoved into the CPU entry area. The bug manifests as a not-present #PF when software attempts to lookup an IDT entry, e.g. when KVM is handling IRQs on Intel CPUs (KVM performs direct CALL to the IRQ handler to avoid the overhead of INTn): BUG: unable to handle page fault for address: fffffbc0000001d8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 16c03a067 P4D 16c03a067 PUD 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 5 PID: 901 Comm: repro Tainted: G W 6.1.0-rc3+ #410 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kasan_check_range+0xdf/0x190 vmx_handle_exit_irqoff+0x152/0x290 [kvm_intel] vcpu_run+0x1d89/0x2bd0 [kvm] kvm_arch_vcpu_ioctl_run+0x3ce/0xa70 [kvm] kvm_vcpu_ioctl+0x349/0x900 [kvm] __x64_sys_ioctl+0xb8/0xf0 do_syscall_64+0x2b/0x50 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Reported-by: syzbot+8cdd16fd5a6c0565e227@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221110203504.1985010-6-seanjc@google.com Signed-off-by: gaochao Signed-off-by: wanxiaoqing --- arch/x86/mm/kasan_init_64.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index c1d99556c024..b4b187960dd0 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -343,7 +343,7 @@ void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) void __init kasan_init(void) { - unsigned long shadow_cea_begin, shadow_cea_end; + unsigned long shadow_cea_begin, shadow_cea_per_cpu_begin, shadow_cea_end; int i; memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); @@ -386,6 +386,7 @@ void __init kasan_init(void) } shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE); + shadow_cea_per_cpu_begin = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_PER_CPU); shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE); @@ -411,6 +412,15 @@ void __init kasan_init(void) kasan_mem_to_shadow((void *)VMALLOC_END + 1), (void *)shadow_cea_begin); + /* + * Populate the shadow for the shared portion of the CPU entry area. + * Shadows for the per-CPU areas are mapped on-demand, as each CPU's + * area is randomly placed somewhere in the 512GiB range and mapping + * the entire 512GiB range is prohibitively expensive. + */ + kasan_populate_shadow(shadow_cea_begin, + shadow_cea_per_cpu_begin, 0); + kasan_populate_early_shadow((void *)shadow_cea_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); -- Gitee From f675d0f8822b281c2e1f690d6827639acfd8ebdc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 13 Feb 2023 22:53:55 -0800 Subject: [PATCH 122/243] net: mpls: fix stale pointer if allocation fails during device rename stable inclusion from stable-5.10.169 commit 7ff0fdba82298d1f456c685e24930da89703c0fb category: bugfix issue: #I6IBC8 CVE: CVE-2023-26545 Signed-off-by: wanxiaoqing --------------------------------------- commit fda6c89fe3d9aca073495a664e1d5aea28cd4377 upstream. lianhui reports that when MPLS fails to register the sysctl table under new location (during device rename) the old pointers won't get overwritten and may be freed again (double free). Handle this gracefully. The best option would be unregistering the MPLS from the device completely on failure, but unfortunately mpls_ifdown() can fail. So failing fully is also unreliable. Another option is to register the new table first then only remove old one if the new one succeeds. That requires more code, changes order of notifications and two tables may be visible at the same time. sysctl point is not used in the rest of the code - set to NULL on failures and skip unregister if already NULL. Reported-by: lianhui tang Fixes: 0fae3bf018d9 ("mpls: handle device renames for per-device sysctls") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/mpls/af_mpls.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 9c047c148a11..d226de7f12d3 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1427,6 +1427,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev, free: kfree(table); out: + mdev->sysctl = NULL; return -ENOBUFS; } @@ -1436,6 +1437,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev, struct net *net = dev_net(dev); struct ctl_table *table; + if (!mdev->sysctl) + return; + table = mdev->sysctl->ctl_table_arg; unregister_net_sysctl_table(mdev->sysctl); kfree(table); -- Gitee From 79951727330474f46d5c1a8823f9837b834675aa Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 24 Jan 2023 08:55:33 +0100 Subject: [PATCH 123/243] media: rc: Fix use-after-free bugs caused by ene_tx_irqsim() mainline inclusion from mainline-v6.3-rc1 commit 29b0589a865b6f66d141d79b2dd1373e4e50fe17 category: bugfix issue: #I6JH2B CVE: CVE-2023-1118 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=29b0589a865b6f66d141d79b2dd1373e4e50fe17 Signed-off-by: wanxiaoqing --------------------------------------- When the ene device is detaching, function ene_remove() will be called. But there is no function to cancel tx_sim_timer in ene_remove(), the timer handler ene_tx_irqsim() could race with ene_remove(). As a result, the UAF bugs could happen, the process is shown below. (cleanup routine) | (timer routine) | mod_timer(&dev->tx_sim_timer, ..) ene_remove() | (wait a time) | ene_tx_irqsim() | dev->hw_lock //USE | ene_tx_sample(dev) //USE Fix by adding del_timer_sync(&dev->tx_sim_timer) in ene_remove(), The tx_sim_timer could stop before ene device is deallocated. What's more, The rc_unregister_device() and del_timer_sync() should be called first in ene_remove() and the deallocated functions such as free_irq(), release_region() and so on should be called behind them. Because the rc_unregister_device() is well synchronized. Otherwise, race conditions may happen. The situations that may lead to race conditions are shown below. Firstly, the rx receiver is disabled with ene_rx_disable() before rc_unregister_device() in ene_remove(), which means it can be enabled again if a process opens /dev/lirc0 between ene_rx_disable() and rc_unregister_device(). Secondly, the irqaction descriptor is freed by free_irq() before the rc device is unregistered, which means irqaction descriptor may be accessed again after it is deallocated. Thirdly, the timer can call ene_tx_sample() that can write to the io ports, which means the io ports could be accessed again after they are deallocated by release_region(). Therefore, the rc_unregister_device() and del_timer_sync() should be called first in ene_remove(). Suggested by: Sean Young Fixes: 9ea53b74df9c ("V4L/DVB: STAGING: remove lirc_ene0100 driver") Signed-off-by: Duoming Zhou Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab Signed-off-by: wanxiaoqing --- drivers/media/rc/ene_ir.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/media/rc/ene_ir.c b/drivers/media/rc/ene_ir.c index 6049e5c95394..5aa3953cab82 100644 --- a/drivers/media/rc/ene_ir.c +++ b/drivers/media/rc/ene_ir.c @@ -1106,6 +1106,8 @@ static void ene_remove(struct pnp_dev *pnp_dev) struct ene_device *dev = pnp_get_drvdata(pnp_dev); unsigned long flags; + rc_unregister_device(dev->rdev); + del_timer_sync(&dev->tx_sim_timer); spin_lock_irqsave(&dev->hw_lock, flags); ene_rx_disable(dev); ene_rx_restore_hw_buffer(dev); @@ -1113,7 +1115,6 @@ static void ene_remove(struct pnp_dev *pnp_dev) free_irq(dev->irq, dev); release_region(dev->hw_io, ENE_IO_SIZE); - rc_unregister_device(dev->rdev); kfree(dev); } -- Gitee From ce5769396bc9cb5358e5485981ac593026385942 Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Sat, 4 Feb 2023 17:39:22 +0000 Subject: [PATCH 124/243] tap: tap_open(): correctly initialize socket uid mainline inclusion from mainline-v6.3-rc1 commit 66b2c338adce580dfce2199591e65e2bab889cff category: bugfix issue: #I6K31U CVE: CVE-2023-1076 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=66b2c338adce580dfce2199591e65e2bab889cff Signed-off-by: wanxiaoqing --------------------------------------- sock_init_data() assumes that the `struct socket` passed in input is contained in a `struct socket_alloc` allocated with sock_alloc(). However, tap_open() passes a `struct socket` embedded in a `struct tap_queue` allocated with sk_alloc(). This causes a type confusion when issuing a container_of() with SOCK_INODE() in sock_init_data() which results in assigning a wrong sk_uid to the `struct sock` in input. On default configuration, the type confused field overlaps with padding bytes between `int vnet_hdr_sz` and `struct tap_dev __rcu *tap` in `struct tap_queue`, which makes the uid of all tap sockets 0, i.e., the root one. Fix the assignment by using sock_init_data_uid(). Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Signed-off-by: Pietro Borrello Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: wanxiaoqing --- drivers/net/tap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index f549d3a8e59c..e8a7d56befb2 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -523,7 +523,7 @@ static int tap_open(struct inode *inode, struct file *file) q->sock.state = SS_CONNECTED; q->sock.file = file; q->sock.ops = &tap_socket_ops; - sock_init_data(&q->sock, &q->sk); + sock_init_data_uid(&q->sock, &q->sk, inode->i_uid); q->sk.sk_write_space = tap_sock_write_space; q->sk.sk_destruct = tap_sock_destruct; q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; -- Gitee From 0cc4602b9ab14dc6c8ec9f2de8f1a02ea8b5d596 Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Sat, 4 Feb 2023 17:39:21 +0000 Subject: [PATCH 125/243] tun: tun_chr_open(): correctly initialize socket uid mainline inclusion from mainline-v6.3-rc1 commit a096ccca6e503a5c575717ff8a36ace27510ab0a category: bugfix issue: #I6K31U CVE: CVE-2023-1076 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a096ccca6e503a5c575717ff8a36ace27510ab0a Signed-off-by: wanxiaoqing --------------------------------------- sock_init_data() assumes that the `struct socket` passed in input is contained in a `struct socket_alloc` allocated with sock_alloc(). However, tun_chr_open() passes a `struct socket` embedded in a `struct tun_file` allocated with sk_alloc(). This causes a type confusion when issuing a container_of() with SOCK_INODE() in sock_init_data() which results in assigning a wrong sk_uid to the `struct sock` in input. On default configuration, the type confused field overlaps with the high 4 bytes of `struct tun_struct __rcu *tun` of `struct tun_file`, NULL at the time of call, which makes the uid of all tun sockets 0, i.e., the root one. Fix the assignment by using sock_init_data_uid(). Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Signed-off-by: Pietro Borrello Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: wanxiaoqing --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ffbc7eda95ee..fe230c554e3c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3430,7 +3430,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) tfile->socket.file = file; tfile->socket.ops = &tun_socket_ops; - sock_init_data(&tfile->socket, &tfile->sk); + sock_init_data_uid(&tfile->socket, &tfile->sk, inode->i_uid); tfile->sk.sk_write_space = tun_sock_write_space; tfile->sk.sk_sndbuf = INT_MAX; -- Gitee From 240d66fcfc9f2aa1b594849ada0bfbc652459808 Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Sat, 4 Feb 2023 17:39:20 +0000 Subject: [PATCH 126/243] net: add sock_init_data_uid() mainline inclusion from mainline-v6.3-rc1 commit 584f3742890e966d2f0a1f3c418c9ead70b2d99e category: bugfix issue: #I6K31U CVE: CVE-2023-1076 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=584f3742890e966d2f0a1f3c418c9ead70b2d99e Signed-off-by: wanxiaoqing --------------------------------------- Add sock_init_data_uid() to explicitly initialize the socket uid. To initialise the socket uid, sock_init_data() assumes a the struct socket* sock is always embedded in a struct socket_alloc, used to access the corresponding inode uid. This may not be true. Examples are sockets created in tun_chr_open() and tap_open(). Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Signed-off-by: Pietro Borrello Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: wanxiaoqing --- include/net/sock.h | 7 ++++++- net/core/sock.c | 15 ++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 7ba93f164d6b..4424242ebae2 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1754,7 +1754,12 @@ void sk_common_release(struct sock *sk); * Default socket callbacks and setup code */ -/* Initialise core socket variables */ +/* Initialise core socket variables using an explicit uid. */ +void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid); + +/* Initialise core socket variables. + * Assumes struct socket *sock is embedded in a struct socket_alloc. + */ void sock_init_data(struct socket *sock, struct sock *sk); /* diff --git a/net/core/sock.c b/net/core/sock.c index d75b3af50864..6dc499bd9ea4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2965,7 +2965,7 @@ void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer) } EXPORT_SYMBOL(sk_stop_timer_sync); -void sock_init_data(struct socket *sock, struct sock *sk) +void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid) { sk_init_common(sk); sk->sk_send_head = NULL; @@ -2984,11 +2984,10 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_type = sock->type; RCU_INIT_POINTER(sk->sk_wq, &sock->wq); sock->sk = sk; - sk->sk_uid = SOCK_INODE(sock)->i_uid; } else { RCU_INIT_POINTER(sk->sk_wq, NULL); - sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0); } + sk->sk_uid = uid; rwlock_init(&sk->sk_callback_lock); if (sk->sk_kern_sock) @@ -3046,6 +3045,16 @@ void sock_init_data(struct socket *sock, struct sock *sk) refcount_set(&sk->sk_refcnt, 1); atomic_set(&sk->sk_drops, 0); } +EXPORT_SYMBOL(sock_init_data_uid); + +void sock_init_data(struct socket *sock, struct sock *sk) +{ + kuid_t uid = sock ? + SOCK_INODE(sock)->i_uid : + make_kuid(sock_net(sk)->user_ns, 0); + + sock_init_data_uid(sock, sk, uid); +} EXPORT_SYMBOL(sock_init_data); void lock_sock_nested(struct sock *sk, int subclass) -- Gitee From 1920be28da0ddbcbe5e0f4d4d0403535367fb868 Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Tue, 7 Feb 2023 18:26:34 +0000 Subject: [PATCH 127/243] rds: rds_rm_zerocopy_callback() use list_first_entry() stable inclusion from stable-5.10.168 commit c53f34ec3fbf3e9f67574118a6bb35ae1146f7ca category: bugfix issue: #I6K330 CVE: CVE-2023-1078 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit f753a68980cf4b59a80fe677619da2b1804f526d ] rds_rm_zerocopy_callback() uses list_entry() on the head of a list causing a type confusion. Use list_first_entry() to actually access the first element of the rs_zcookie_queue list. Fixes: 9426bbc6de99 ("rds: use list structure to track information for zerocopy completion notification") Reviewed-by: Willem de Bruijn Signed-off-by: Pietro Borrello Link: https://lore.kernel.org/r/20230202-rds-zerocopy-v3-1-83b0df974f9a@diag.uniroma1.it Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/rds/message.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/rds/message.c b/net/rds/message.c index 799034e0f513..b363ef13c75e 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -104,9 +104,9 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, spin_lock_irqsave(&q->lock, flags); head = &q->zcookie_head; if (!list_empty(head)) { - info = list_entry(head, struct rds_msg_zcopy_info, - rs_zcookie_next); - if (info && rds_zcookie_add(info, cookie)) { + info = list_first_entry(head, struct rds_msg_zcopy_info, + rs_zcookie_next); + if (rds_zcookie_add(info, cookie)) { spin_unlock_irqrestore(&q->lock, flags); kfree(rds_info_from_znotifier(znotif)); /* caller invokes rds_wake_sk_sleep() */ -- Gitee From 0e0edc6708b2a7fc3f785e4f93ab54a879fb5b33 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 13 Dec 2021 02:05:07 +0000 Subject: [PATCH 128/243] phy: tegra: xusb: Fix return value of tegra_xusb_find_port_node function mainline inclusion from mainline-v5.17-rc1 commit 045a31b95509c8f25f5f04ec5e0dec5cd09f2c5f category: bugfix issue: #I6K33P CVE: CVE-2023-23000 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=045a31b95509c8f25f5f04ec5e0dec5cd09f2c5f Signed-off-by: wanxiaoqing --------------------------------------- callers of tegra_xusb_find_port_node() function only do NULL checking for the return value. return NULL instead of ERR_PTR(-ENOMEM) to keep consistent. Signed-off-by: Miaoqian Lin Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20211213020507.1458-1-linmq006@gmail.com Signed-off-by: Vinod Koul Signed-off-by: wanxiaoqing --- drivers/phy/tegra/xusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c index 181a1be5f491..02da8c0a14ff 100644 --- a/drivers/phy/tegra/xusb.c +++ b/drivers/phy/tegra/xusb.c @@ -449,7 +449,7 @@ tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type, name = kasprintf(GFP_KERNEL, "%s-%u", type, index); if (!name) { of_node_put(ports); - return ERR_PTR(-ENOMEM); + return NULL; } np = of_get_child_by_name(ports, name); kfree(name); -- Gitee From d998a2c7f1fa19f4c61b4b4c8755f80301421fac Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 13 Dec 2021 07:21:15 +0000 Subject: [PATCH 129/243] malidp: Fix NULL vs IS_ERR() checking mainline inclusion from mainline-v5.19-rc1 commit 15342f930ebebcfe36f2415049736a77d7d2e045 category: bugfix issue: #I6K33X CVE: CVE-2023-23004 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=15342f930ebebcfe36f2415049736a77d7d2e045 Signed-off-by: wanxiaoqing --------------------------------------- The get_sg_table() function does not return NULL. It returns error pointers. Signed-off-by: Miaoqian Lin Signed-off-by: Liviu Dudau Link: https://lore.kernel.org/dri-devel/20211213072115.18098-1-linmq006@gmail.com/ Signed-off-by: wanxiaoqing --- drivers/gpu/drm/arm/malidp_planes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index f1e8bc39b16d..24604b410372 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -348,7 +348,7 @@ static bool malidp_check_pages_threshold(struct malidp_plane_state *ms, else sgt = obj->funcs->get_sg_table(obj); - if (!sgt) + if (IS_ERR(sgt)) return false; sgl = sgt->sgl; -- Gitee From 1e98ec2feb28d78147bae42bef735ba22044e425 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 3 Jan 2023 12:19:17 +0100 Subject: [PATCH 130/243] net/ulp: prevent ULP without clone op from entering the LISTEN status stable inclusion from stable-5.10.163 commit f8ed0a93b5d576bbaf01639ad816473bdfd1dcb0 category: bugfix issue: #I6JH1F CVE: CVE-2023-0461 Signed-off-by: wanxiaoqing --------------------------------------- commit 2c02d41d71f90a5168391b6a5f2954112ba2307c upstream. When an ULP-enabled socket enters the LISTEN status, the listener ULP data pointer is copied inside the child/accepted sockets by sk_clone_lock(). The relevant ULP can take care of de-duplicating the context pointer via the clone() operation, but only MPTCP and SMC implement such op. Other ULPs may end-up with a double-free at socket disposal time. We can't simply clear the ULP data at clone time, as TLS replaces the socket ops with custom ones assuming a valid TLS ULP context is available. Instead completely prevent clone-less ULP sockets from entering the LISTEN status. Fixes: 734942cc4ea6 ("tcp: ULP infrastructure") Reported-by: slipper Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/4b80c3d1dbe3d0ab072f80450c202d9bc88b4b03.1672740602.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/ipv4/inet_connection_sock.c | 16 +++++++++++++++- net/ipv4/tcp_ulp.c | 4 ++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f359192a4632..148a1b1ece80 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -938,11 +938,25 @@ void inet_csk_prepare_forced_close(struct sock *sk) } EXPORT_SYMBOL(inet_csk_prepare_forced_close); +static int inet_ulp_can_listen(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone) + return -EINVAL; + + return 0; +} + int inet_csk_listen_start(struct sock *sk, int backlog) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); - int err = -EADDRINUSE; + int err; + + err = inet_ulp_can_listen(sk); + if (unlikely(err)) + return err; reqsk_queue_alloc(&icsk->icsk_accept_queue); diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 7c27aa629af1..b5d707a5a31b 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -136,6 +136,10 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) if (icsk->icsk_ulp_ops) goto out_err; + err = -EINVAL; + if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN) + goto out_err; + err = ulp_ops->init(sk); if (err) goto out_err; -- Gitee From 92440de3e5593e79864c6f5fe22662c863c371c5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 13 Feb 2023 20:45:48 +0000 Subject: [PATCH 131/243] tcp: Fix listen() regression in 5.15.88. stable inclusion from stable-5.15.95 commit fdaf88531cfd17b2a710cceb3141ef6f9085ff40 category: bugfix issue: #I6JH1F CVE: CVE-2023-0461 Signed-off-by: wanxiaoqing --------------------------------------- When we backport dadd0dcaa67d ("net/ulp: prevent ULP without clone op from entering the LISTEN status"), we have accidentally backported a part of 7a7160edf1bf ("net: Return errno in sk->sk_prot->get_port().") and removed err = -EADDRINUSE in inet_csk_listen_start(). Thus, listen() no longer returns -EADDRINUSE even if ->get_port() failed as reported in [0]. We set -EADDRINUSE to err just before ->get_port() to fix the regression. [0]: https://lore.kernel.org/stable/EF8A45D0-768A-4CD5-9A8A-0FA6E610ABF7@winter.cafe/ Reported-by: Winter Signed-off-by: Kuniyuki Iwashima Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/ipv4/inet_connection_sock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 148a1b1ece80..008300eda84d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -968,6 +968,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog) * It is OK, because this socket enters to hash table only * after validation is complete. */ + err = -EADDRINUSE; inet_sk_state_store(sk, TCP_LISTEN); if (!sk->sk_prot->get_port(sk, inet->inet_num)) { inet->inet_sport = htons(inet->inet_num); -- Gitee From 62cc511c5391fca9ca5b6a7fad70ee86cf8b3a24 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 17 Feb 2023 12:09:20 -0800 Subject: [PATCH 132/243] net/ulp: Remove redundant ->clone() test in inet_clone_ulp(). mainline inclusion from mainline-v6.3-rc1 commit be9832c2e9cc4c15906a77baddcd906fb4bb864b category: bugfix issue: #I6JH1F CVE: CVE-2023-0461 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=be9832c2e9cc4c15906a77baddcd906fb4bb864b Signed-off-by: wanxiaoqing --------------------------------------- Commit 2c02d41d71f9 ("net/ulp: prevent ULP without clone op from entering the LISTEN status") guarantees that all ULP listeners have clone() op, so we no longer need to test it in inet_clone_ulp(). Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230217200920.85306-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: wanxiaoqing --- net/ipv4/inet_connection_sock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 008300eda84d..a08263883880 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -840,8 +840,7 @@ static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk, if (!icsk->icsk_ulp_ops) return; - if (icsk->icsk_ulp_ops->clone) - icsk->icsk_ulp_ops->clone(req, newsk, priority); + icsk->icsk_ulp_ops->clone(req, newsk, priority); } /** -- Gitee From fe5bd8eccc5a95f26aaa52e0b267deafb600f282 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 18 Jan 2023 13:24:12 +0100 Subject: [PATCH 133/243] net/ulp: use consistent error code when blocking ULP stable inclusion from stable-5.10.165 commit f6c201b4382d1536f44b922b8f16dcb4772cc82c category: bugfix issue: #I6JH1F CVE: CVE-2023-0461 Signed-off-by: wanxiaoqing --------------------------------------- commit 8ccc99362b60c6f27bb46f36fdaaccf4ef0303de upstream. The referenced commit changed the error code returned by the kernel when preventing a non-established socket from attaching the ktls ULP. Before to such a commit, the user-space got ENOTCONN instead of EINVAL. The existing self-tests depend on such error code, and the change caused a failure: RUN global.non_established ... tls.c:1673:non_established:Expected errno (22) == ENOTCONN (107) non_established: Test failed at step #3 FAIL global.non_established In the unlikely event existing applications do the same, address the issue by restoring the prior error code in the above scenario. Note that the only other ULP performing similar checks at init time - smc_ulp_ops - also fails with ENOTCONN when trying to attach the ULP to a non-established socket. Reported-by: Sabrina Dubroca Fixes: 2c02d41d71f9 ("net/ulp: prevent ULP without clone op from entering the LISTEN status") Signed-off-by: Paolo Abeni Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/7bb199e7a93317fb6f8bf8b9b2dc71c18f337cde.1674042685.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/ipv4/tcp_ulp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index b5d707a5a31b..8e135af0d4f7 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -136,7 +136,7 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) if (icsk->icsk_ulp_ops) goto out_err; - err = -EINVAL; + err = -ENOTCONN; if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN) goto out_err; -- Gitee From 2de0f5c478f1d3f219a3d68fa4844c28935256bb Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 8 Mar 2023 11:59:03 +0800 Subject: [PATCH 134/243] usb: dwc3: dwc3-qcom: Add missing platform_device_put() in dwc3_qcom_acpi_register_core mainline inclusion from mainline-v5.17-rc1 commit fa0ef93868a6062babe1144df2807a8b1d4924d2 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6IEH4 CVE: CVE-2023-22995 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fa0ef93868a6062babe1144df2807a8b1d4924d2 Signed-off-by: wanxiaoqing --------------------------------------- mainline inclusion from mainline-v5.17-rc1 commit fa0ef93868a6062babe1144df2807a8b1d4924d2 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6IEH4 CVE: CVE-2023-22995 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fa0ef93868a6062babe1144df2807a8b1d4924d2 -------------------------------- Add the missing platform_device_put() before return from dwc3_qcom_acpi_register_core in the error handling case. Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20211231113641.31474-1-linmq006@gmail.com Signed-off-by: Greg Kroah-Hartman Fixes: 2bc02355f8ba ("usb: dwc3: qcom: Add support for booting with ACPI") [Fix conflict due to lack of 8dc6e6dd1bee39cd65a232a17d51240fc65a0f4a] Conflict: drivers/usb/dwc3/dwc3-qcom.c Signed-off-by: Zheng Yejian Reviewed-by: Xu Kuohai Reviewed-by: Xiu Jianfeng Signed-off-by: Jialin Zhang Signed-off-by: wanxiaoqing --- drivers/usb/dwc3/dwc3-qcom.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 504f8af4d0f8..a1c4e3df5626 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -594,8 +594,10 @@ static int dwc3_qcom_acpi_register_core(struct platform_device *pdev) qcom->dwc3->dev.coherent_dma_mask = dev->coherent_dma_mask; child_res = kcalloc(2, sizeof(*child_res), GFP_KERNEL); - if (!child_res) + if (!child_res) { + platform_device_put(qcom->dwc3); return -ENOMEM; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { @@ -631,10 +633,15 @@ static int dwc3_qcom_acpi_register_core(struct platform_device *pdev) } ret = platform_device_add(qcom->dwc3); - if (ret) + if (ret) { dev_err(&pdev->dev, "failed to add device\n"); + goto out; + } + kfree(child_res); + return 0; out: + platform_device_put(qcom->dwc3); kfree(child_res); return ret; } -- Gitee From 0f65804f8cf67eda860ce7b924bc453e1a409b8f Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Mon, 16 Jan 2023 11:11:24 +0000 Subject: [PATCH 135/243] HID: check empty report_list in hid_validate_values() stable inclusion from stable-5.10.166 commit 5dc3469a1170dd1344d262a332b26994214eeb58 category: bugfix issue: #I6K311 CVE: CVE-2023-1073 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit b12fece4c64857e5fab4290bf01b2e0317a88456 ] Add a check for empty report_list in hid_validate_values(). The missing check causes a type confusion when issuing a list_entry() on an empty report_list. The problem is caused by the assumption that the device must have valid report_list. While this will be true for all normal HID devices, a suitably malicious device can violate the assumption. Fixes: 1b15d2e5b807 ("HID: core: fix validation of report id 0") Signed-off-by: Pietro Borrello Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/hid/hid-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 5550c943f985..2f512814a111 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -988,8 +988,8 @@ struct hid_report *hid_validate_values(struct hid_device *hid, * Validating on id 0 means we should examine the first * report in the list. */ - report = list_entry( - hid->report_enum[type].report_list.next, + report = list_first_entry_or_null( + &hid->report_enum[type].report_list, struct hid_report, list); } else { report = hid->report_enum[type].report_id_hash[id]; -- Gitee From 9ebc679ff1c344d261e4ebbba9dc43ac125b3848 Mon Sep 17 00:00:00 2001 From: Pietro Borrello Date: Mon, 16 Jan 2023 11:11:25 +0000 Subject: [PATCH 136/243] HID: check empty report_list in bigben_probe() stable inclusion from stable-5.10.166 commit c7bf714f875531f227f2ef1fdcc8f4d44e7c7d9d category: bugfix issue: #I6K311 CVE: CVE-2023-1073 Signed-off-by: wanxiaoqing --------------------------------------- Add a check for empty report_list in bigben_probe(). The missing check causes a type confusion when issuing a list_entry() on an empty report_list. The problem is caused by the assumption that the device must have valid report_list. While this will be true for all normal HID devices, a suitably malicious device can violate the assumption. Fixes: 256a90ed9e46 ("HID: hid-bigbenff: driver for BigBen Interactive PS3OFMINIPAD gamepad") Signed-off-by: Pietro Borrello Signed-off-by: Jiri Kosina Signed-off-by: wanxiaoqing --- drivers/hid/hid-bigbenff.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index 74ad8bf98bfd..a4f5a8b5d177 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -344,6 +344,11 @@ static int bigben_probe(struct hid_device *hid, } report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; + if (list_empty(report_list)) { + hid_err(hid, "no output report found\n"); + error = -ENODEV; + goto error_hw_stop; + } bigben->report = list_entry(report_list->next, struct hid_report, list); -- Gitee From 895bba73dad72a3ffcf5f3a499334d798ba66ef0 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 23 Jan 2023 14:59:33 -0300 Subject: [PATCH 137/243] sctp: fail if no bound addresses can be used for a given scope stable inclusion from stable-5.10.166 commit 6ef652f35dcfaa1ab2b2cf6c1694718595148eee category: bugfix issue: #I6K31A CVE: CVE-2023-1074 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 458e279f861d3f61796894cd158b780765a1569f ] Currently, if you bind the socket to something like: servaddr.sin6_family = AF_INET6; servaddr.sin6_port = htons(0); servaddr.sin6_scope_id = 0; inet_pton(AF_INET6, "::1", &servaddr.sin6_addr); And then request a connect to: connaddr.sin6_family = AF_INET6; connaddr.sin6_port = htons(20000); connaddr.sin6_scope_id = if_nametoindex("lo"); inet_pton(AF_INET6, "fe88::1", &connaddr.sin6_addr); What the stack does is: - bind the socket - create a new asoc - to handle the connect - copy the addresses that can be used for the given scope - try to connect But the copy returns 0 addresses, and the effect is that it ends up trying to connect as if the socket wasn't bound, which is not the desired behavior. This unexpected behavior also allows KASLR leaks through SCTP diag interface. The fix here then is, if when trying to copy the addresses that can be used for the scope used in connect() it returns 0 addresses, bail out. This is what TCP does with a similar reproducer. Reported-by: Pietro Borrello Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Link: https://lore.kernel.org/r/9fcd182f1099f86c6661f3717f63712ddd1c676c.1674496737.git.marcelo.leitner@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/sctp/bind_addr.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 59e653b528b1..6b95d3ba8fe1 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -73,6 +73,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, } } + /* If somehow no addresses were found that can be used with this + * scope, it's an error. + */ + if (list_empty(&dest->address_list)) + error = -ENETUNREACH; + out: if (error) sctp_bind_addr_clean(dest); -- Gitee From 12ed9298a84e19345e0d5cd6ef595b5a6b46ec5f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 23 Dec 2022 09:59:09 -0500 Subject: [PATCH 138/243] USB: gadgetfs: Fix race between mounting and unmounting stable inclusion from stable-5.10.165 commit 856e4b5e53f21edbd15d275dde62228dd94fb2b4 category: bugfix issue: #I6740C CVE: CVE-2022-4382 Signed-off-by: wanxiaoqing --------------------------------------- commit d18dcfe9860e842f394e37ba01ca9440ab2178f4 upstream. The syzbot fuzzer and Gerald Lee have identified a use-after-free bug in the gadgetfs driver, involving processes concurrently mounting and unmounting the gadgetfs filesystem. In particular, gadgetfs_fill_super() can race with gadgetfs_kill_sb(), causing the latter to deallocate the_device while the former is using it. The output from KASAN says, in part: BUG: KASAN: use-after-free in instrument_atomic_read_write include/linux/instrumented.h:102 [inline] BUG: KASAN: use-after-free in atomic_fetch_sub_release include/linux/atomic/atomic-instrumented.h:176 [inline] BUG: KASAN: use-after-free in __refcount_sub_and_test include/linux/refcount.h:272 [inline] BUG: KASAN: use-after-free in __refcount_dec_and_test include/linux/refcount.h:315 [inline] BUG: KASAN: use-after-free in refcount_dec_and_test include/linux/refcount.h:333 [inline] BUG: KASAN: use-after-free in put_dev drivers/usb/gadget/legacy/inode.c:159 [inline] BUG: KASAN: use-after-free in gadgetfs_kill_sb+0x33/0x100 drivers/usb/gadget/legacy/inode.c:2086 Write of size 4 at addr ffff8880276d7840 by task syz-executor126/18689 CPU: 0 PID: 18689 Comm: syz-executor126 Not tainted 6.1.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: ... atomic_fetch_sub_release include/linux/atomic/atomic-instrumented.h:176 [inline] __refcount_sub_and_test include/linux/refcount.h:272 [inline] __refcount_dec_and_test include/linux/refcount.h:315 [inline] refcount_dec_and_test include/linux/refcount.h:333 [inline] put_dev drivers/usb/gadget/legacy/inode.c:159 [inline] gadgetfs_kill_sb+0x33/0x100 drivers/usb/gadget/legacy/inode.c:2086 deactivate_locked_super+0xa7/0xf0 fs/super.c:332 vfs_get_super fs/super.c:1190 [inline] get_tree_single+0xd0/0x160 fs/super.c:1207 vfs_get_tree+0x88/0x270 fs/super.c:1531 vfs_fsconfig_locked fs/fsopen.c:232 [inline] The simplest solution is to ensure that gadgetfs_fill_super() and gadgetfs_kill_sb() are serialized by making them both acquire a new mutex. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+33d7ad66d65044b93f16@syzkaller.appspotmail.com Reported-and-tested-by: Gerald Lee Link: https://lore.kernel.org/linux-usb/CAO3qeMVzXDP-JU6v1u5Ags6Q-bb35kg3=C6d04DjzA9ffa5x1g@mail.gmail.com/ Fixes: e5d82a7360d1 ("vfs: Convert gadgetfs to use the new mount API") CC: Link: https://lore.kernel.org/r/Y6XCPXBpn3tmjdCC@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/usb/gadget/legacy/inode.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 454860d52ce7..a926baca2b51 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -229,6 +229,7 @@ static void put_ep (struct ep_data *data) */ static const char *CHIP; +static DEFINE_MUTEX(sb_mutex); /* Serialize superblock operations */ /*----------------------------------------------------------------------*/ @@ -2011,13 +2012,20 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc) { struct inode *inode; struct dev_data *dev; + int rc; - if (the_device) - return -ESRCH; + mutex_lock(&sb_mutex); + + if (the_device) { + rc = -ESRCH; + goto Done; + } CHIP = usb_get_gadget_udc_name(); - if (!CHIP) - return -ENODEV; + if (!CHIP) { + rc = -ENODEV; + goto Done; + } /* superblock */ sb->s_blocksize = PAGE_SIZE; @@ -2054,13 +2062,17 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc) * from binding to a controller. */ the_device = dev; - return 0; + rc = 0; + goto Done; -Enomem: + Enomem: kfree(CHIP); CHIP = NULL; + rc = -ENOMEM; - return -ENOMEM; + Done: + mutex_unlock(&sb_mutex); + return rc; } /* "mount -t gadgetfs path /dev/gadget" ends up here */ @@ -2082,6 +2094,7 @@ static int gadgetfs_init_fs_context(struct fs_context *fc) static void gadgetfs_kill_sb (struct super_block *sb) { + mutex_lock(&sb_mutex); kill_litter_super (sb); if (the_device) { put_dev (the_device); @@ -2089,6 +2102,7 @@ gadgetfs_kill_sb (struct super_block *sb) } kfree(CHIP); CHIP = NULL; + mutex_unlock(&sb_mutex); } /*----------------------------------------------------------------------*/ -- Gitee From e9f1b1c0655a6ca4efb7e9e3aad4b2a05b4c1857 Mon Sep 17 00:00:00 2001 From: zhangkaixiang Date: Wed, 22 Mar 2023 15:25:21 +0800 Subject: [PATCH 139/243] fixed f433091 from https://gitee.com/zkx48/kernel_linux_5.10/pulls/730 do not open file when nlink is zero Signed-off-by: zhangkaixiang --- fs/sharefs/file.c | 16 ---------------- fs/sharefs/lookup.c | 7 +++++++ 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/fs/sharefs/file.c b/fs/sharefs/file.c index 07458f47b885..5e62122f98b2 100644 --- a/fs/sharefs/file.c +++ b/fs/sharefs/file.c @@ -11,21 +11,6 @@ #include "sharefs.h" -static int sharefs_readdir(struct file *file, struct dir_context *ctx) -{ - int err; - struct file *lower_file = NULL; - struct dentry *dentry = file->f_path.dentry; - - lower_file = sharefs_lower_file(file); - err = iterate_dir(lower_file, ctx); - file->f_pos = lower_file->f_pos; - if (err >= 0) - fsstack_copy_attr_atime(d_inode(dentry), - file_inode(lower_file)); - return err; -} - static int sharefs_open(struct inode *inode, struct file *file) { int err = 0; @@ -232,7 +217,6 @@ const struct file_operations sharefs_main_fops = { /* trimmed directory options */ const struct file_operations sharefs_dir_fops = { .llseek = sharefs_file_llseek, - .iterate = sharefs_readdir, .open = sharefs_open, .release = sharefs_file_release, .flush = sharefs_flush, diff --git a/fs/sharefs/lookup.c b/fs/sharefs/lookup.c index 47ead11f45f5..fb6da6b11be3 100644 --- a/fs/sharefs/lookup.c +++ b/fs/sharefs/lookup.c @@ -90,6 +90,13 @@ struct inode *sharefs_iget(struct super_block *sb, struct inode *lower_inode) iput(lower_inode); return ERR_PTR(-ENOMEM); } + + if (lower_inode->i_nlink == 0) { + iput(lower_inode); + iput(inode); + return ERR_PTR(-ENOENT); + } + /* if found a cached inode, then just return it (after iput) */ if (!(inode->i_state & I_NEW)) { iput(lower_inode); -- Gitee From 006c34c423712608012478243d8f714109203cf0 Mon Sep 17 00:00:00 2001 From: zhangkaixiang Date: Mon, 27 Mar 2023 10:38:28 +0800 Subject: [PATCH 140/243] fixed 3b41850 from https://gitee.com/zkx48/kernel_linux_5.10/pulls/747 modify the ugo while creating a dir or file in merge_view Signed-off-by: zhangkaixiang --- fs/hmdfs/inode_merge.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/hmdfs/inode_merge.c b/fs/hmdfs/inode_merge.c index c5cd7bdc4905..ab6ea9660458 100644 --- a/fs/hmdfs/inode_merge.c +++ b/fs/hmdfs/inode_merge.c @@ -869,6 +869,8 @@ int do_mkdir_merge(struct inode *parent_inode, struct dentry *child_dentry, ret = PTR_ERR(child_inode); goto out; } + child_inode->i_uid = parent_inode->i_uid; + child_inode->i_gid = parent_inode->i_gid; d_add(child_dentry, child_inode); /* nlink should be increased with the joining of children */ @@ -895,6 +897,8 @@ int do_create_merge(struct inode *parent_inode, struct dentry *child_dentry, ret = PTR_ERR(child_inode); goto out; } + child_inode->i_uid = parent_inode->i_uid; + child_inode->i_gid = parent_inode->i_gid; d_add(child_dentry, child_inode); /* nlink should be increased with the joining of children */ -- Gitee From d8b43be74f298fe4aaef25f0d08e2cf63ace0562 Mon Sep 17 00:00:00 2001 From: zhangkaixiang Date: Sat, 25 Mar 2023 18:44:49 +0800 Subject: [PATCH 141/243] fixed 9d2ac7c from https://gitee.com/zkx48/kernel_linux_5.10/pulls/747 add merge_view set xattr Signed-off-by: zhangkaixiang --- fs/hmdfs/dentry.c | 31 ++++++++++++++++++++++++++++++- fs/hmdfs/hmdfs_device_view.h | 3 ++- fs/hmdfs/main.c | 18 ++++++++++++++++++ 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/fs/hmdfs/dentry.c b/fs/hmdfs/dentry.c index 8ec7ab33412f..fb7c4749bee0 100644 --- a/fs/hmdfs/dentry.c +++ b/fs/hmdfs/dentry.c @@ -273,7 +273,36 @@ void clear_comrades(struct dentry *dentry) */ static int d_revalidate_merge(struct dentry *direntry, unsigned int flags) { - return 0; + struct hmdfs_dentry_info_merge *dim = hmdfs_dm(direntry); + struct hmdfs_dentry_comrade *comrade = NULL; + struct dentry *parent_dentry = NULL; + struct dentry *lower_cur_parent_dentry = NULL; + int ret = 1; + + if (flags & LOOKUP_RCU) { + return -ECHILD; + } + + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET | LOOKUP_REVAL)) { + return 0; + } + + parent_dentry = dget_parent(direntry); + list_for_each_entry(comrade, &(dim->comrade_list), list) { + lower_cur_parent_dentry = dget_parent(comrade->lo_d); + if ((comrade->lo_d->d_flags & DCACHE_OP_REVALIDATE)) { + ret = comrade->lo_d->d_op->d_revalidate( + comrade->lo_d, flags); + if (ret == 0) { + dput(lower_cur_parent_dentry); + goto out; + } + } + dput(lower_cur_parent_dentry); + } +out: + dput(parent_dentry); + return ret; } static void d_release_merge(struct dentry *dentry) diff --git a/fs/hmdfs/hmdfs_device_view.h b/fs/hmdfs/hmdfs_device_view.h index 0e397254c8ea..76be42a7481c 100644 --- a/fs/hmdfs/hmdfs_device_view.h +++ b/fs/hmdfs/hmdfs_device_view.h @@ -233,7 +233,8 @@ static inline bool hmdfs_support_xattr(struct dentry *dentry) struct hmdfs_dentry_info *gdi = hmdfs_d(dentry); if (info->inode_type != HMDFS_LAYER_OTHER_LOCAL && - info->inode_type != HMDFS_LAYER_OTHER_REMOTE) + info->inode_type != HMDFS_LAYER_OTHER_REMOTE && + info->inode_type != HMDFS_LAYER_OTHER_MERGE) return false; if (!S_ISREG(inode->i_mode)) diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c index cb3034e95651..f57d67562486 100644 --- a/fs/hmdfs/main.c +++ b/fs/hmdfs/main.c @@ -161,6 +161,22 @@ static int hmdfs_xattr_remote_set(struct dentry *dentry, const char *name, return res; } +static int hmdfs_xattr_merge_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + int err = 0; + struct dentry *lower_dentry = hmdfs_get_lo_d(dentry, HMDFS_DEVID_LOCAL); + + if (!lower_dentry) { + err = -EOPNOTSUPP; + goto out; + } + err = hmdfs_xattr_local_set(lower_dentry, name, value, size, flags); +out: + dput(lower_dentry); + return err; +} + static int hmdfs_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, @@ -181,6 +197,8 @@ static int hmdfs_xattr_set(const struct xattr_handler *handler, if (info->inode_type == HMDFS_LAYER_OTHER_LOCAL) return hmdfs_xattr_local_set(dentry, name, value, size, flags); + else if (info->inode_type == HMDFS_LAYER_OTHER_MERGE) + return hmdfs_xattr_merge_set(dentry, name, value, size, flags); return hmdfs_xattr_remote_set(dentry, name, value, size, flags); } -- Gitee From a87fec1cf8df39918cbdc0428fa0449c6277adb2 Mon Sep 17 00:00:00 2001 From: Clement Lecigne Date: Fri, 31 Mar 2023 20:49:16 +0800 Subject: [PATCH 142/243] ALSA: pcm: Move rwsem lock inside snd_ctl_elem_read to prevent UAF stable inclusion from stable-v5.10.162 commit df02234e6b87d2a9a82acd3198e44bdeff8488c6 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6AOWP CVE: CVE-2023-0266 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=df02234e6b87d2a9a82acd3198e44bdeff8488c6 -------------------------------- [ Note: this is a fix that works around the bug equivalently as the two upstream commits: 1fa4445f9adf ("ALSA: control - introduce snd_ctl_notify_one() helper") 56b88b50565c ("ALSA: pcm: Move rwsem lock inside snd_ctl_elem_read to prevent UAF") but in a simpler way to fit with older stable trees -- tiwai ] Add missing locking in ctl_elem_read_user/ctl_elem_write_user which can be easily triggered and turned into an use-after-free. Example code paths with SNDRV_CTL_IOCTL_ELEM_READ: 64-bits: snd_ctl_ioctl snd_ctl_elem_read_user [takes controls_rwsem] snd_ctl_elem_read [lock properly held, all good] [drops controls_rwsem] 32-bits (compat): snd_ctl_ioctl_compat snd_ctl_elem_write_read_compat ctl_elem_write_read snd_ctl_elem_read [missing lock, not good] CVE-2023-0266 was assigned for this issue. Signed-off-by: Clement Lecigne Cc: stable@kernel.org # 5.12 and older Signed-off-by: Takashi Iwai Reviewed-by: Jaroslav Kysela Signed-off-by: Greg Kroah-Hartman Signed-off-by: Hui Tang Signed-off-by: Cheng Jian --- sound/core/control_compat.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index 97467f6a32a1..980ab3580f1b 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -304,7 +304,9 @@ static int ctl_elem_read_user(struct snd_card *card, err = snd_power_wait(card, SNDRV_CTL_POWER_D0); if (err < 0) goto error; + down_read(&card->controls_rwsem); err = snd_ctl_elem_read(card, data); + up_read(&card->controls_rwsem); if (err < 0) goto error; err = copy_ctl_value_to_user(userdata, valuep, data, type, count); @@ -332,7 +334,9 @@ static int ctl_elem_write_user(struct snd_ctl_file *file, err = snd_power_wait(card, SNDRV_CTL_POWER_D0); if (err < 0) goto error; + down_write(&card->controls_rwsem); err = snd_ctl_elem_write(card, file, data); + up_write(&card->controls_rwsem); if (err < 0) goto error; err = copy_ctl_value_to_user(userdata, valuep, data, type, count); -- Gitee From 9dc1b2ca5d3da470ea4289a408f42466aa19f34d Mon Sep 17 00:00:00 2001 From: Ke Liu Date: Tue, 28 Mar 2023 10:30:43 +0800 Subject: [PATCH 143/243] fixed c59516a from https://gitee.com/KAZIMIYA/kernel_linux_5.10/pulls/757 hyperhold: fix memory leak in soft_crypt_page ohos inclusion category: bugfix issue: #I6QVSD CVE: NA -------------------------------- Only skcipher_request_alloc but no skcipher_request_free. Fix it by use skcipher_request_free. Signed-off-by: Ke Liu Change-Id: If5c70275a95deab9d6e46dde0b7e0af70e1efde9 --- drivers/hyperhold/hp_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hyperhold/hp_device.c b/drivers/hyperhold/hp_device.c index 5b818265c26d..47c5dd390477 100644 --- a/drivers/hyperhold/hp_device.c +++ b/drivers/hyperhold/hp_device.c @@ -120,6 +120,8 @@ int soft_crypt_page(struct crypto_skcipher *ctfm, struct page *dst_page, else BUG(); + skcipher_request_free(req); + if (ret) pr_err("%scrypt failed!\n", op == HP_DEV_ENCRYPT ? "en" : "de"); -- Gitee From ed0841d513df071b8dc9304e6d01321520a356b6 Mon Sep 17 00:00:00 2001 From: zhangkaixiang Date: Thu, 6 Apr 2023 20:22:53 +0800 Subject: [PATCH 144/243] fixed e5b0171 from https://gitee.com/zkx48/kernel_linux_5.10/pulls/771 fix up the df bug Signed-off-by: zhangkaixiang --- fs/sharefs/inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/sharefs/inode.c b/fs/sharefs/inode.c index 985a04a643bc..1ffee295b2f3 100644 --- a/fs/sharefs/inode.c +++ b/fs/sharefs/inode.c @@ -67,6 +67,8 @@ static int sharefs_getattr(const struct path *path, struct kstat *stat, stat->uid = d_inode(path->dentry)->i_uid; stat->gid = d_inode(path->dentry)->i_gid; stat->mode = d_inode(path->dentry)->i_mode; + stat->dev = 0; + stat->rdev = 0; sharefs_put_lower_path(path->dentry, &lower_path); return ret; -- Gitee From 8b7816a83110e42ee03f838c0850ce3adff8f168 Mon Sep 17 00:00:00 2001 From: waterwin Date: Thu, 25 May 2023 04:02:34 +0000 Subject: [PATCH 145/243] fixed 8831780 from https://gitee.com/waterwin/kernel_linux_5.10/pulls/865 hmdfs:prepare_to_wait_event UAF when dentry info switch in merge view Signed-off-by: waterwin --- fs/hmdfs/dentry.c | 2 ++ fs/hmdfs/inode_merge.c | 20 ++------------------ 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/fs/hmdfs/dentry.c b/fs/hmdfs/dentry.c index fb7c4749bee0..df45b6b413cf 100644 --- a/fs/hmdfs/dentry.c +++ b/fs/hmdfs/dentry.c @@ -288,6 +288,7 @@ static int d_revalidate_merge(struct dentry *direntry, unsigned int flags) } parent_dentry = dget_parent(direntry); + mutex_lock(&dim->comrade_list_lock); list_for_each_entry(comrade, &(dim->comrade_list), list) { lower_cur_parent_dentry = dget_parent(comrade->lo_d); if ((comrade->lo_d->d_flags & DCACHE_OP_REVALIDATE)) { @@ -301,6 +302,7 @@ static int d_revalidate_merge(struct dentry *direntry, unsigned int flags) dput(lower_cur_parent_dentry); } out: + mutex_unlock(&dim->comrade_list_lock); dput(parent_dentry); return ret; } diff --git a/fs/hmdfs/inode_merge.c b/fs/hmdfs/inode_merge.c index ab6ea9660458..f3a53be91c72 100644 --- a/fs/hmdfs/inode_merge.c +++ b/fs/hmdfs/inode_merge.c @@ -694,23 +694,6 @@ int init_hmdfs_dentry_info_merge(struct hmdfs_sb_info *sbi, return 0; } -static void update_dm(struct dentry *dst, struct dentry *src) -{ - struct hmdfs_dentry_info_merge *dmi_dst = hmdfs_dm(dst); - struct hmdfs_dentry_info_merge *dmi_src = hmdfs_dm(src); - - trace_hmdfs_merge_update_dentry_info_enter(src, dst); - - spin_lock(&dst->d_lock); - spin_lock(&src->d_lock); - dst->d_fsdata = dmi_src; - src->d_fsdata = dmi_dst; - spin_unlock(&src->d_lock); - spin_unlock(&dst->d_lock); - - trace_hmdfs_merge_update_dentry_info_exit(src, dst); -} - // do this in a map-reduce manner struct dentry *hmdfs_lookup_merge(struct inode *parent_inode, struct dentry *child_dentry, @@ -763,7 +746,6 @@ struct dentry *hmdfs_lookup_merge(struct inode *parent_inode, goto out; } if (ret_dentry) { - update_dm(ret_dentry, child_dentry); child_dentry = ret_dentry; } info = hmdfs_i(child_inode); @@ -1198,10 +1180,12 @@ int do_unlink_merge(struct inode *dir, struct dentry *dentry) mutex_lock(&dim->comrade_list_lock); list_for_each_entry(comrade, &(dim->comrade_list), list) { lo_d = comrade->lo_d; + dget(lo_d); lo_d_dir = lock_parent(lo_d); lo_i_dir = d_inode(lo_d_dir); ret = vfs_unlink(lo_i_dir, lo_d, NULL); // lo_d GET unlock_dir(lo_d_dir); + dput(lo_d); if (ret) break; } -- Gitee From b841b51dbc52556be56e749f1339b66871f38e02 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Thu, 9 Feb 2023 11:37:39 -0300 Subject: [PATCH 146/243] net/sched: tcindex: update imperfect hash filters respecting rcu stable inclusion from stable-5.10.169 commit eb8e9d8572d1d9df17272783ad8a84843ce559d4 category: bugfix issue: #I6QYWE CVE: CVE-2023-1281 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit ee059170b1f7e94e55fa6cadee544e176a6e59c2 upstream. The imperfect hash area can be updated while packets are traversing, which will cause a use-after-free when 'tcf_exts_exec()' is called with the destroyed tcf_ext. CPU 0: CPU 1: tcindex_set_parms tcindex_classify tcindex_lookup tcindex_lookup tcf_exts_change tcf_exts_exec [UAF] Stop operating on the shared area directly, by using a local copy, and update the filter with 'rcu_replace_pointer()'. Delete the old filter version only after a rcu grace period elapsed. Fixes: 9b0d4446b569 ("net: sched: avoid atomic swap in tcf_exts_change") Reported-by: valis Suggested-by: valis Signed-off-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Link: https://lore.kernel.org/r/20230209143739.279867-1-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- net/sched/cls_tcindex.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index e9a8a2c86bbd..dc87feaa3cb3 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -338,6 +339,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcf_result cr = {}; int err, balloc = 0; struct tcf_exts e; + bool update_h = false; err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) @@ -455,10 +457,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } } - if (cp->perfect) + if (cp->perfect) { r = cp->perfect + handle; - else - r = tcindex_lookup(cp, handle) ? : &new_filter_result; + } else { + /* imperfect area is updated in-place using rcu */ + update_h = !!tcindex_lookup(cp, handle); + r = &new_filter_result; + } if (r == &new_filter_result) { f = kzalloc(sizeof(*f), GFP_KERNEL); @@ -492,7 +497,28 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, rcu_assign_pointer(tp->root, cp); - if (r == &new_filter_result) { + if (update_h) { + struct tcindex_filter __rcu **fp; + struct tcindex_filter *cf; + + f->result.res = r->res; + tcf_exts_change(&f->result.exts, &r->exts); + + /* imperfect area bucket */ + fp = cp->h + (handle % cp->hash); + + /* lookup the filter, guaranteed to exist */ + for (cf = rcu_dereference_bh_rtnl(*fp); cf; + fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp)) + if (cf->key == handle) + break; + + f->next = cf->next; + + cf = rcu_replace_pointer(*fp, f, 1); + tcf_exts_get_net(&cf->result.exts); + tcf_queue_work(&cf->rwork, tcindex_destroy_fexts_work); + } else if (r == &new_filter_result) { struct tcindex_filter *nfp; struct tcindex_filter __rcu **fp; -- Gitee From 91377398ee4e8262de0b87297d0e5346cf253012 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 17 Jan 2023 13:39:37 -0600 Subject: [PATCH 147/243] scsi: iscsi_tcp: Fix UAF during login when accessing the shost ipaddress stable inclusion from stable-v5.10.168~94 commit 9758ffe1c07b86aefd7ca8e40d9a461293427ca0 category: bugfix issue: #I6XC4Y CVE: CVE-2023-2162 Signed-off-by: Ywenrui44091 [ Upstream commit f484a794e4ee2a9ce61f52a78e810ac45f3fe3b3 ] If during iscsi_sw_tcp_session_create() iscsi_tcp_r2tpool_alloc() fails, userspace could be accessing the host's ipaddress attr. If we then free the session via iscsi_session_teardown() while userspace is still accessing the session we will hit a use after free bug. Set the tcp_sw_host->session after we have completed session creation and can no longer fail. Link: https://lore.kernel.org/r/20230117193937.21244-3-michael.christie@oracle.com Signed-off-by: Mike Christie Reviewed-by: Lee Duncan Acked-by: Ding Hui Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Ywenrui44091 --- drivers/scsi/iscsi_tcp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index d39f812d9b92..63aaa8305dae 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -775,7 +775,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param, char *buf) { struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(shost); - struct iscsi_session *session = tcp_sw_host->session; + struct iscsi_session *session; struct iscsi_conn *conn; struct iscsi_tcp_conn *tcp_conn; struct iscsi_sw_tcp_conn *tcp_sw_conn; @@ -785,6 +785,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, switch (param) { case ISCSI_HOST_PARAM_IPADDRESS: + session = tcp_sw_host->session; if (!session) return -ENOTCONN; @@ -881,11 +882,13 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, if (!cls_session) goto remove_host; session = cls_session->dd_data; - tcp_sw_host = iscsi_host_priv(shost); - tcp_sw_host->session = session; if (iscsi_tcp_r2tpool_alloc(session)) goto remove_session; + + /* We are now fully setup so expose the session to sysfs. */ + tcp_sw_host = iscsi_host_priv(shost); + tcp_sw_host->session = session; return cls_session; remove_session: -- Gitee From 6a302de7e1a8f32d045408fc2c6e2eb877055d2e Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 16 Jun 2022 10:13:55 +0800 Subject: [PATCH 148/243] ext4: add EXT4_INODE_HAS_XATTR_SPACE macro in xattr.h stable inclusion from stable-5.10.137 commit 2da44a2927a71bff2bc66cefa8cfbd2ace702536 category: bugfix issue: #I73C2J CVE: CVE-2023-2513 Signed-off-by: lizongfeng --------------------------------------- commit 179b14152dcb6a24c3415200603aebca70ff13af upstream. When adding an xattr to an inode, we must ensure that the inode_size is not less than EXT4_GOOD_OLD_INODE_SIZE + extra_isize + pad. Otherwise, the end position may be greater than the start position, resulting in UAF. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Reviewed-by: Ritesh Harjani (IBM) Link: https://lore.kernel.org/r/20220616021358.2504451-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: lizongfeng --- fs/ext4/xattr.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 730b91fa0dd7..87e5863bb493 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -95,6 +95,19 @@ struct ext4_xattr_entry { #define EXT4_ZERO_XATTR_VALUE ((void *)-1) +/* + * If we want to add an xattr to the inode, we should make sure that + * i_extra_isize is not 0 and that the inode size is not less than + * EXT4_GOOD_OLD_INODE_SIZE + extra_isize + pad. + * EXT4_GOOD_OLD_INODE_SIZE extra_isize header entry pad data + * |--------------------------|------------|------|---------|---|-------| + */ +#define EXT4_INODE_HAS_XATTR_SPACE(inode) \ + ((EXT4_I(inode)->i_extra_isize != 0) && \ + (EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize + \ + sizeof(struct ext4_xattr_ibody_header) + EXT4_XATTR_PAD <= \ + EXT4_INODE_SIZE((inode)->i_sb))) + struct ext4_xattr_info { const char *name; const void *value; -- Gitee From bfa3d8f83075284d423f4313d6f2b41de1853900 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 6 Jul 2022 18:04:51 +0800 Subject: [PATCH 149/243] ext4: fix use-after-free in ext4_xattr_set_entry OLK-5.10 inclusion from OLK-5.10 commit dcdb8cea24f564106ac234471f5d00c22a254834 category: bugfix issue: #I73C2J CVE: CVE-2023-2513 Signed-off-by: gaochao --------------------------------------- hulk inclusion category: bugfix bugzilla: 186866, https://gitee.com/openeuler/kernel/issues/I5DTBL CVE: NA -------------------------------- Hulk Robot reported a issue: Reviewed-by: Zhang Yi ================================================================== BUG: KASAN: use-after-free in ext4_xattr_set_entry+0x18ab/0x3500 Write of size 4105 at addr ffff8881675ef5f4 by task syz-executor.0/7092 CPU: 1 PID: 7092 Comm: syz-executor.0 Not tainted 4.19.90-dirty #17 Call Trace: [...] memcpy+0x34/0x50 mm/kasan/kasan.c:303 ext4_xattr_set_entry+0x18ab/0x3500 fs/ext4/xattr.c:1747 ext4_xattr_ibody_inline_set+0x86/0x2a0 fs/ext4/xattr.c:2205 ext4_xattr_set_handle+0x940/0x1300 fs/ext4/xattr.c:2386 ext4_xattr_set+0x1da/0x300 fs/ext4/xattr.c:2498 __vfs_setxattr+0x112/0x170 fs/xattr.c:149 __vfs_setxattr_noperm+0x11b/0x2a0 fs/xattr.c:180 __vfs_setxattr_locked+0x17b/0x250 fs/xattr.c:238 vfs_setxattr+0xed/0x270 fs/xattr.c:255 setxattr+0x235/0x330 fs/xattr.c:520 path_setxattr+0x176/0x190 fs/xattr.c:539 __do_sys_lsetxattr fs/xattr.c:561 [inline] __se_sys_lsetxattr fs/xattr.c:557 [inline] __x64_sys_lsetxattr+0xc2/0x160 fs/xattr.c:557 do_syscall_64+0xdf/0x530 arch/x86/entry/common.c:298 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x459fe9 RSP: 002b:00007fa5e54b4c08 EFLAGS: 00000246 ORIG_RAX: 00000000000000bd RAX: ffffffffffffffda RBX: 000000000051bf60 RCX: 0000000000459fe9 RDX: 00000000200003c0 RSI: 0000000020000180 RDI: 0000000020000140 RBP: 000000000051bf60 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000001009 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffc73c93fc0 R14: 000000000051bf60 R15: 00007fa5e54b4d80 [...] ================================================================== Above issue may happen as follows: ------------------------------------- ext4_xattr_set ext4_xattr_set_handle ext4_xattr_ibody_find >> s->end < s->base >> no EXT4_STATE_XATTR >> xattr_check_inode is not executed ext4_xattr_ibody_set ext4_xattr_set_entry >> size_t min_offs = s->end - s->base >> UAF in memcpy we can easily reproduce this problem with the following commands: mkfs.ext4 -F /dev/sda mount -o debug_want_extra_isize=128 /dev/sda /mnt touch /mnt/file setfattr -n user.cat -v `seq -s z 4096|tr -d '[:digit:]'` /mnt/file In ext4_xattr_ibody_find, we have the following assignment logic: header = IHDR(inode, raw_inode) = raw_inode + EXT4_GOOD_OLD_INODE_SIZE + i_extra_isize is->s.base = IFIRST(header) = header + sizeof(struct ext4_xattr_ibody_header) is->s.end = raw_inode + s_inode_size In ext4_xattr_set_entry min_offs = s->end - s->base = s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - i_extra_isize - sizeof(struct ext4_xattr_ibody_header) last = s->first free = min_offs - ((void *)last - s->base) - sizeof(__u32) = s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - i_extra_isize - sizeof(struct ext4_xattr_ibody_header) - sizeof(__u32) In the calculation formula, all values except s_inode_size and i_extra_size are fixed values. When i_extra_size is the maximum value s_inode_size - EXT4_GOOD_OLD_INODE_SIZE, min_offs is -4 and free is -8. The value overflows. As a result, the preceding issue is triggered when memcpy is executed. Therefore, when finding xattr or setting xattr, check whether there is space for storing xattr in the inode to resolve this issue. Reported-by: Hulk Robot Signed-off-by: Baokun Li Signed-off-by: Zheng Zengkai --- fs/ext4/xattr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 2f93e8b90492..eab334636200 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2223,8 +2223,9 @@ static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, struct ext4_xattr_search *s = &is->s; int error; - if (EXT4_I(inode)->i_extra_isize == 0) + if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) return -ENOSPC; + error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); if (error) return error; -- Gitee From 27551d02853cd7a114f10144afb016fbdd938d7f Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 16 Jun 2022 10:13:57 +0800 Subject: [PATCH 150/243] ext4: correct max_inline_xattr_value_size computing stable inclusion from stable-5.10.137 commit 603fb7bd744ae2710568cf18816d5b494277afaf category: bugfix issue: #I73C2J CVE: CVE-2023-2513 Signed-off-by: lizongfeng --------------------------------------- commit c9fd167d57133c5b748d16913c4eabc55e531c73 upstream. If the ext4 inode does not have xattr space, 0 is returned in the get_max_inline_xattr_value_size function. Otherwise, the function returns a negative value when the inode does not contain EXT4_STATE_XATTR. Cc: stable@kernel.org Signed-off-by: Baokun Li Reviewed-by: Ritesh Harjani (IBM) Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220616021358.2504451-4-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: lizongfeng --- fs/ext4/inline.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index cb42b2245c21..26f724a254e7 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -34,6 +34,9 @@ static int get_max_inline_xattr_value_size(struct inode *inode, struct ext4_inode *raw_inode; int free, min_offs; + if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) + return 0; + min_offs = EXT4_SB(inode->i_sb)->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - EXT4_I(inode)->i_extra_isize - -- Gitee From b8e7f1d236f3c4ade31a70d791b85585588dbc4b Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 16 Jun 2022 10:13:58 +0800 Subject: [PATCH 151/243] ext4: correct the misjudgment in ext4_iget_extra_inode stable inclusion from stable-5.10.137 commit d0b495aa2692d50f72b9d18ea3897581b0bacc46 category: bugfix issue: #I73C2J CVE: CVE-2023-2513 Signed-off-by: lizongfeng --------------------------------------- commit fd7e672ea98b95b9d4c9dae316639f03c16a749d upstream. Use the EXT4_INODE_HAS_XATTR_SPACE macro to more accurately determine whether the inode have xattr space. Cc: stable@kernel.org Signed-off-by: Baokun Li Reviewed-by: Ritesh Harjani (IBM) Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220616021358.2504451-5-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: lizongfeng --- fs/ext4/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4cfee4cb2350..ec243bfa60ad 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4668,8 +4668,7 @@ static inline int ext4_iget_extra_inode(struct inode *inode, __le32 *magic = (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; - if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <= - EXT4_INODE_SIZE(inode->i_sb) && + if (EXT4_INODE_HAS_XATTR_SPACE(inode) && *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { ext4_set_inode_state(inode, EXT4_STATE_XATTR); return ext4_find_inline_data_nolock(inode); -- Gitee From f09893eed8728dfe4d412b64d2da7d4bca49900c Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Thu, 26 Jan 2023 18:32:50 -0800 Subject: [PATCH 152/243] netrom: Fix use-after-free caused by accept on already connected socket stable inclusion from stable-v5.10.168~112 commit dd6991251a1382a9b4984962a0c7a467e9d71812 category: bugfix issue: #I72GRK CVE: CVE-2023-32269 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit 611792920925fb088ddccbe2783c7f92fdfb6b64 ] If you call listen() and accept() on an already connect()ed AF_NETROM socket, accept() can successfully connect. This is because when the peer socket sends data to sendmsg, the skb with its own sk stored in the connected socket's sk->sk_receive_queue is connected, and nr_accept() dequeues the skb waiting in the sk->sk_receive_queue. As a result, nr_accept() allocates and returns a sock with the sk of the parent AF_NETROM socket. And here use-after-free can happen through complex race conditions: ``` cpu0 cpu1 1. socket_2 = socket(AF_NETROM) . . listen(socket_2) accepted_socket = accept(socket_2) 2. socket_1 = socket(AF_NETROM) nr_create() // sk refcount : 1 connect(socket_1) 3. write(accepted_socket) nr_sendmsg() nr_output() nr_kick() nr_send_iframe() nr_transmit_buffer() nr_route_frame() nr_loopback_queue() nr_loopback_timer() nr_rx_frame() nr_process_rx_frame(sk, skb); // sk : socket_1's sk nr_state3_machine() nr_queue_rx_frame() sock_queue_rcv_skb() sock_queue_rcv_skb_reason() __sock_queue_rcv_skb() __skb_queue_tail(list, skb); // list : socket_1's sk->sk_receive_queue 4. listen(socket_1) nr_listen() uaf_socket = accept(socket_1) nr_accept() skb_dequeue(&sk->sk_receive_queue); 5. close(accepted_socket) nr_release() nr_write_internal(sk, NR_DISCREQ) nr_transmit_buffer() // NR_DISCREQ nr_route_frame() nr_loopback_queue() nr_loopback_timer() nr_rx_frame() // sk : socket_1's sk nr_process_rx_frame() // NR_STATE_3 nr_state3_machine() // NR_DISCREQ nr_disconnect() nr_sk(sk)->state = NR_STATE_0; 6. close(socket_1) // sk refcount : 3 nr_release() // NR_STATE_0 sock_put(sk); // sk refcount : 0 sk_free(sk); close(uaf_socket) nr_release() sock_hold(sk); // UAF ``` KASAN report by syzbot: ``` BUG: KASAN: use-after-free in nr_release+0x66/0x460 net/netrom/af_netrom.c:520 Write of size 4 at addr ffff8880235d8080 by task syz-executor564/5128 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd1/0x138 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:306 [inline] print_report+0x15e/0x461 mm/kasan/report.c:417 kasan_report+0xbf/0x1f0 mm/kasan/report.c:517 check_region_inline mm/kasan/generic.c:183 [inline] kasan_check_range+0x141/0x190 mm/kasan/generic.c:189 instrument_atomic_read_write include/linux/instrumented.h:102 [inline] atomic_fetch_add_relaxed include/linux/atomic/atomic-instrumented.h:116 [inline] __refcount_add include/linux/refcount.h:193 [inline] __refcount_inc include/linux/refcount.h:250 [inline] refcount_inc include/linux/refcount.h:267 [inline] sock_hold include/net/sock.h:775 [inline] nr_release+0x66/0x460 net/netrom/af_netrom.c:520 __sock_release+0xcd/0x280 net/socket.c:650 sock_close+0x1c/0x20 net/socket.c:1365 __fput+0x27c/0xa90 fs/file_table.c:320 task_work_run+0x16f/0x270 kernel/task_work.c:179 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xaa8/0x2950 kernel/exit.c:867 do_group_exit+0xd4/0x2a0 kernel/exit.c:1012 get_signal+0x21c3/0x2450 kernel/signal.c:2859 arch_do_signal_or_restart+0x79/0x5c0 arch/x86/kernel/signal.c:306 exit_to_user_mode_loop kernel/entry/common.c:168 [inline] exit_to_user_mode_prepare+0x15f/0x250 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296 do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f6c19e3c9b9 Code: Unable to access opcode bytes at 0x7f6c19e3c98f. RSP: 002b:00007fffd4ba2ce8 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: 0000000000000116 RBX: 0000000000000003 RCX: 00007f6c19e3c9b9 RDX: 0000000000000318 RSI: 00000000200bd000 RDI: 0000000000000006 RBP: 0000000000000003 R08: 000000000000000d R09: 000000000000000d R10: 0000000000000000 R11: 0000000000000246 R12: 000055555566a2c0 R13: 0000000000000011 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 5128: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:371 [inline] ____kasan_kmalloc mm/kasan/common.c:330 [inline] __kasan_kmalloc+0xa3/0xb0 mm/kasan/common.c:380 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slab_common.c:968 [inline] __kmalloc+0x5a/0xd0 mm/slab_common.c:981 kmalloc include/linux/slab.h:584 [inline] sk_prot_alloc+0x140/0x290 net/core/sock.c:2038 sk_alloc+0x3a/0x7a0 net/core/sock.c:2091 nr_create+0xb6/0x5f0 net/netrom/af_netrom.c:433 __sock_create+0x359/0x790 net/socket.c:1515 sock_create net/socket.c:1566 [inline] __sys_socket_create net/socket.c:1603 [inline] __sys_socket_create net/socket.c:1588 [inline] __sys_socket+0x133/0x250 net/socket.c:1636 __do_sys_socket net/socket.c:1649 [inline] __se_sys_socket net/socket.c:1647 [inline] __x64_sys_socket+0x73/0xb0 net/socket.c:1647 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Freed by task 5128: kasan_save_stack+0x22/0x40 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2b/0x40 mm/kasan/generic.c:518 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free+0x13b/0x1a0 mm/kasan/common.c:200 kasan_slab_free include/linux/kasan.h:177 [inline] __cache_free mm/slab.c:3394 [inline] __do_kmem_cache_free mm/slab.c:3580 [inline] __kmem_cache_free+0xcd/0x3b0 mm/slab.c:3587 sk_prot_free net/core/sock.c:2074 [inline] __sk_destruct+0x5df/0x750 net/core/sock.c:2166 sk_destruct net/core/sock.c:2181 [inline] __sk_free+0x175/0x460 net/core/sock.c:2192 sk_free+0x7c/0xa0 net/core/sock.c:2203 sock_put include/net/sock.h:1991 [inline] nr_release+0x39e/0x460 net/netrom/af_netrom.c:554 __sock_release+0xcd/0x280 net/socket.c:650 sock_close+0x1c/0x20 net/socket.c:1365 __fput+0x27c/0xa90 fs/file_table.c:320 task_work_run+0x16f/0x270 kernel/task_work.c:179 exit_task_work include/linux/task_work.h:38 [inline] do_exit+0xaa8/0x2950 kernel/exit.c:867 do_group_exit+0xd4/0x2a0 kernel/exit.c:1012 get_signal+0x21c3/0x2450 kernel/signal.c:2859 arch_do_signal_or_restart+0x79/0x5c0 arch/x86/kernel/signal.c:306 exit_to_user_mode_loop kernel/entry/common.c:168 [inline] exit_to_user_mode_prepare+0x15f/0x250 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:296 do_syscall_64+0x46/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd ``` To fix this issue, nr_listen() returns -EINVAL for sockets that successfully nr_connect(). Reported-by: syzbot+caa188bdfc1eeafeb418@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- net/netrom/af_netrom.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index e5c8a295e640..5c04da4cfbad 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -400,6 +400,11 @@ static int nr_listen(struct socket *sock, int backlog) struct sock *sk = sock->sk; lock_sock(sk); + if (sock->state != SS_UNCONNECTED) { + release_sock(sk); + return -EINVAL; + } + if (sk->sk_state != TCP_LISTEN) { memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN); sk->sk_max_ack_backlog = backlog; -- Gitee From b982a2e4a68852a1d8dfabdd2865ea5703ab9284 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 May 2023 10:25:24 +0200 Subject: [PATCH 153/243] netfilter: nf_tables: deactivate anonymous set from preparation phase stable inclusion from stable-v5.10.180 commit e044a24447189419c3a7ccc5fa6da7516036dc55 category: bugfix issue: #I71KHX CVE: CVE-2023-32233 Signed-off-by: Ywenrui44091 --------------------------------------- commit c1592a89942e9678f7d9c8030efa777c0d57edab upstream. Toggle deleted anonymous sets as inactive in the next generation, so users cannot perform any update on it. Clear the generation bitmask in case the transaction is aborted. The following KASAN splat shows a set element deletion for a bound anonymous set that has been already removed in the same transaction. [ 64.921510] ================================================================== [ 64.923123] BUG: KASAN: wild-memory-access in nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.924745] Write of size 8 at addr dead000000000122 by task test/890 [ 64.927903] CPU: 3 PID: 890 Comm: test Not tainted 6.3.0+ #253 [ 64.931120] Call Trace: [ 64.932699] [ 64.934292] dump_stack_lvl+0x33/0x50 [ 64.935908] ? nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.937551] kasan_report+0xda/0x120 [ 64.939186] ? nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.940814] nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.942452] ? __kasan_slab_alloc+0x2d/0x60 [ 64.944070] ? nf_tables_setelem_notify+0x190/0x190 [nf_tables] [ 64.945710] ? kasan_set_track+0x21/0x30 [ 64.947323] nfnetlink_rcv_batch+0x709/0xd90 [nfnetlink] [ 64.948898] ? nfnetlink_rcv_msg+0x480/0x480 [nfnetlink] Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 12 ++++++++++++ net/netfilter/nft_dynset.c | 2 +- net/netfilter/nft_lookup.c | 2 +- net/netfilter/nft_objref.c | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 76bfb6cd5815..45077056b61b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -502,6 +502,7 @@ struct nft_set_binding { }; enum nft_trans_phase; +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set); void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 25f18c93fa46..0973b66ee68b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4486,12 +4486,24 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } } +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) +{ + if (nft_set_is_anonymous(set)) + nft_clear(ctx->net, set); + + set->use++; +} +EXPORT_SYMBOL_GPL(nf_tables_activate_set); + void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { switch (phase) { case NFT_TRANS_PREPARE: + if (nft_set_is_anonymous(set)) + nft_deactivate_next(ctx->net, set); + set->use--; return; case NFT_TRANS_ABORT: diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 58904bee1a0d..546c0d674590 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -233,7 +233,7 @@ static void nft_dynset_activate(const struct nft_ctx *ctx, { struct nft_dynset *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_dynset_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index f1363b8aabba..f684b147e52d 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -132,7 +132,7 @@ static void nft_lookup_activate(const struct nft_ctx *ctx, { struct nft_lookup *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_lookup_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 5f9207a9f485..6a78c4979022 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -180,7 +180,7 @@ static void nft_objref_map_activate(const struct nft_ctx *ctx, { struct nft_objref_map *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_objref_map_destroy(const struct nft_ctx *ctx, -- Gitee From 7240c38be4c4ae664d39772c0f61750173fce98b Mon Sep 17 00:00:00 2001 From: Gwangun Jung Date: Thu, 13 Apr 2023 19:35:54 +0900 Subject: [PATCH 154/243] net: sched: sch_qfq: prevent slab-out-of-bounds in qfq_activate_agg stable inclusion from stable-v5.10.179~62 commit ddcf35deb8f2a1d9addc74b586cf4c5a1f5d6020 category: bugfix issue: #I6ZJDC CVE: CVE-2023-31436 CVE-2023-2248 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit 3037933448f60f9acb705997eae62013ecb81e0d ] If the TCA_QFQ_LMAX value is not offered through nlattr, lmax is determined by the MTU value of the network device. The MTU of the loopback device can be set up to 2^31-1. As a result, it is possible to have an lmax value that exceeds QFQ_MIN_LMAX. Due to the invalid lmax value, an index is generated that exceeds the QFQ_MAX_INDEX(=24) value, causing out-of-bounds read/write errors. The following reports a oob access: [ 84.582666] BUG: KASAN: slab-out-of-bounds in qfq_activate_agg.constprop.0 (net/sched/sch_qfq.c:1027 net/sched/sch_qfq.c:1060 net/sched/sch_qfq.c:1313) [ 84.583267] Read of size 4 at addr ffff88810f676948 by task ping/301 [ 84.583686] [ 84.583797] CPU: 3 PID: 301 Comm: ping Not tainted 6.3.0-rc5 #1 [ 84.584164] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [ 84.584644] Call Trace: [ 84.584787] [ 84.584906] dump_stack_lvl (lib/dump_stack.c:107 (discriminator 1)) [ 84.585108] print_report (mm/kasan/report.c:320 mm/kasan/report.c:430) [ 84.585570] kasan_report (mm/kasan/report.c:538) [ 84.585988] qfq_activate_agg.constprop.0 (net/sched/sch_qfq.c:1027 net/sched/sch_qfq.c:1060 net/sched/sch_qfq.c:1313) [ 84.586599] qfq_enqueue (net/sched/sch_qfq.c:1255) [ 84.587607] dev_qdisc_enqueue (net/core/dev.c:3776) [ 84.587749] __dev_queue_xmit (./include/net/sch_generic.h:186 net/core/dev.c:3865 net/core/dev.c:4212) [ 84.588763] ip_finish_output2 (./include/net/neighbour.h:546 net/ipv4/ip_output.c:228) [ 84.589460] ip_output (net/ipv4/ip_output.c:430) [ 84.590132] ip_push_pending_frames (./include/net/dst.h:444 net/ipv4/ip_output.c:126 net/ipv4/ip_output.c:1586 net/ipv4/ip_output.c:1606) [ 84.590285] raw_sendmsg (net/ipv4/raw.c:649) [ 84.591960] sock_sendmsg (net/socket.c:724 net/socket.c:747) [ 84.592084] __sys_sendto (net/socket.c:2142) [ 84.593306] __x64_sys_sendto (net/socket.c:2150) [ 84.593779] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) [ 84.593902] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) [ 84.594070] RIP: 0033:0x7fe568032066 [ 84.594192] Code: 0e 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 41 89 ca 64 8b 04 25 18 00 00 00 85 c09[ 84.594796] RSP: 002b:00007ffce388b4e8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c Code starting with the faulting instruction =========================================== [ 84.595047] RAX: ffffffffffffffda RBX: 00007ffce388cc70 RCX: 00007fe568032066 [ 84.595281] RDX: 0000000000000040 RSI: 00005605fdad6d10 RDI: 0000000000000003 [ 84.595515] RBP: 00005605fdad6d10 R08: 00007ffce388eeec R09: 0000000000000010 [ 84.595749] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040 [ 84.595984] R13: 00007ffce388cc30 R14: 00007ffce388b4f0 R15: 0000001d00000001 [ 84.596218] [ 84.596295] [ 84.596351] Allocated by task 291: [ 84.596467] kasan_save_stack (mm/kasan/common.c:46) [ 84.596597] kasan_set_track (mm/kasan/common.c:52) [ 84.596725] __kasan_kmalloc (mm/kasan/common.c:384) [ 84.596852] __kmalloc_node (./include/linux/kasan.h:196 mm/slab_common.c:967 mm/slab_common.c:974) [ 84.596979] qdisc_alloc (./include/linux/slab.h:610 ./include/linux/slab.h:731 net/sched/sch_generic.c:938) [ 84.597100] qdisc_create (net/sched/sch_api.c:1244) [ 84.597222] tc_modify_qdisc (net/sched/sch_api.c:1680) [ 84.597357] rtnetlink_rcv_msg (net/core/rtnetlink.c:6174) [ 84.597495] netlink_rcv_skb (net/netlink/af_netlink.c:2574) [ 84.597627] netlink_unicast (net/netlink/af_netlink.c:1340 net/netlink/af_netlink.c:1365) [ 84.597759] netlink_sendmsg (net/netlink/af_netlink.c:1942) [ 84.597891] sock_sendmsg (net/socket.c:724 net/socket.c:747) [ 84.598016] ____sys_sendmsg (net/socket.c:2501) [ 84.598147] ___sys_sendmsg (net/socket.c:2557) [ 84.598275] __sys_sendmsg (./include/linux/file.h:31 net/socket.c:2586) [ 84.598399] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) [ 84.598520] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) [ 84.598688] [ 84.598744] The buggy address belongs to the object at ffff88810f674000 [ 84.598744] which belongs to the cache kmalloc-8k of size 8192 [ 84.599135] The buggy address is located 2664 bytes to the right of [ 84.599135] allocated 7904-byte region [ffff88810f674000, ffff88810f675ee0) [ 84.599544] [ 84.599598] The buggy address belongs to the physical page: [ 84.599777] page:00000000e638567f refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10f670 [ 84.600074] head:00000000e638567f order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [ 84.600330] flags: 0x200000000010200(slab|head|node=0|zone=2) [ 84.600517] raw: 0200000000010200 ffff888100043180 dead000000000122 0000000000000000 [ 84.600764] raw: 0000000000000000 0000000080020002 00000001ffffffff 0000000000000000 [ 84.601009] page dumped because: kasan: bad access detected [ 84.601187] [ 84.601241] Memory state around the buggy address: [ 84.601396] ffff88810f676800: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 84.601620] ffff88810f676880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 84.601845] >ffff88810f676900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 84.602069] ^ [ 84.602243] ffff88810f676980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 84.602468] ffff88810f676a00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 84.602693] ================================================================== [ 84.602924] Disabling lock debugging due to kernel taint Fixes: 3015f3d2a3cd ("pkt_sched: enable QFQ to support TSO/GSO") Reported-by: Gwangun Jung Signed-off-by: Gwangun Jung Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- net/sched/sch_qfq.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index af8c63a9ec18..b19e4cd08ed6 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -421,15 +421,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } else weight = 1; - if (tb[TCA_QFQ_LMAX]) { + if (tb[TCA_QFQ_LMAX]) lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); - if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) { - pr_notice("qfq: invalid max length %u\n", lmax); - return -EINVAL; - } - } else + else lmax = psched_mtu(qdisc_dev(sch)); + if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) { + pr_notice("qfq: invalid max length %u\n", lmax); + return -EINVAL; + } + inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; -- Gitee From ed8e31a1bc0fb9f92770ac158d00793a31b1d549 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Sun, 12 Mar 2023 01:46:50 +0800 Subject: [PATCH 155/243] power: supply: da9150: Fix use after free bug in da9150_charger_remove due to race condition stable inclusion from stable-v5.10.177~152 commit 75e2144291e847009fbc0350e10ec588ff96e05a category: bugfix issue: #I6P4Y2 CVE: CVE-2023-30772 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit 06615d11cc78162dfd5116efb71f29eb29502d37 ] In da9150_charger_probe, &charger->otg_work is bound with da9150_charger_otg_work. da9150_charger_otg_ncb may be called to start the work. If we remove the module which will call da9150_charger_remove to make cleanup, there may be a unfinished work. The possible sequence is as follows: Fix it by canceling the work before cleanup in the da9150_charger_remove CPU0 CPUc1 |da9150_charger_otg_work da9150_charger_remove | power_supply_unregister | device_unregister | power_supply_dev_release| kfree(psy) | | | power_supply_changed(charger->usb); | //use Fixes: c1a281e34dae ("power: Add support for DA9150 Charger") Signed-off-by: Zheng Wang Signed-off-by: Sebastian Reichel Signed-off-by: Sasha Levin Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- drivers/power/supply/da9150-charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/supply/da9150-charger.c b/drivers/power/supply/da9150-charger.c index f9314cc0cd75..6b987da58655 100644 --- a/drivers/power/supply/da9150-charger.c +++ b/drivers/power/supply/da9150-charger.c @@ -662,6 +662,7 @@ static int da9150_charger_remove(struct platform_device *pdev) if (!IS_ERR_OR_NULL(charger->usb_phy)) usb_unregister_notifier(charger->usb_phy, &charger->otg_nb); + cancel_work_sync(&charger->otg_work); power_supply_unregister(charger->battery); power_supply_unregister(charger->usb); -- Gitee From e0eb76f17f0d1db24f4df788a567a021866f0472 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 10 Mar 2023 11:10:56 -0500 Subject: [PATCH 156/243] KVM: nVMX: add missing consistency checks for CR0 and CR4 stable inclusion from stable-5.10.176 commit c54974ccaff73525462e278602dfe4069877cfaa category: bugfix issue: #I6U81V CVE: CVE-2023-30456 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 112e66017bff7f2837030f34c2bc19501e9212d5 upstream. The effective values of the guest CR0 and CR4 registers may differ from those included in the VMCS12. In particular, disabling EPT forces CR4.PAE=1 and disabling unrestricted guest mode forces CR0.PG=CR0.PE=1. Therefore, checks on these bits cannot be delegated to the processor and must be performed by KVM. Reported-by: Reima ISHII Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 Signed-off-by: Ywenrui44091 --- arch/x86/kvm/vmx/nested.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 6aaf1bf68028..8ae6c0e45ac7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2982,7 +2982,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, enum vm_entry_failure_code *entry_failure_code) { - bool ia32e; + bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE); *entry_failure_code = ENTRY_FAIL_DEFAULT; @@ -3008,6 +3008,13 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, vmcs12->guest_ia32_perf_global_ctrl))) return -EINVAL; + if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG)) + return -EINVAL; + + if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) || + CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG))) + return -EINVAL; + /* * If the load IA32_EFER VM-entry control is 1, the following checks * are performed on the field for the IA32_EFER MSR: @@ -3019,7 +3026,6 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, */ if (to_vmx(vcpu)->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) { - ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) || CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) || CC(((vmcs12->guest_cr0 & X86_CR0_PG) && -- Gitee From 2c06a0b11d9d66636b716450611db12819f29963 Mon Sep 17 00:00:00 2001 From: Baisong Zhong Date: Sun, 20 Nov 2022 06:59:18 +0000 Subject: [PATCH 157/243] media: dvb-usb: az6027: fix null-ptr-deref in az6027_i2c_xfer() stable inclusion from stable-5.10.163 commit 559891d430e3f3a178040c4371ed419edbfa7d65 category: bugfix issue: #I6OL7B CVE: CVE-2023-28328 Signed-off-by: Ywenrui --------------------------------------- [ Upstream commit 0ed554fd769a19ea8464bb83e9ac201002ef74ad ] Wei Chen reports a kernel bug as blew: general protection fault, probably for non-canonical address KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] ... Call Trace: __i2c_transfer+0x77e/0x1930 drivers/i2c/i2c-core-base.c:2109 i2c_transfer+0x1d5/0x3d0 drivers/i2c/i2c-core-base.c:2170 i2cdev_ioctl_rdwr+0x393/0x660 drivers/i2c/i2c-dev.c:297 i2cdev_ioctl+0x75d/0x9f0 drivers/i2c/i2c-dev.c:458 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl+0xfb/0x170 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fd834a8bded In az6027_i2c_xfer(), if msg[i].addr is 0x99, a null-ptr-deref will caused when accessing msg[i].buf. For msg[i].len is 0 and msg[i].buf is null. Fix this by checking msg[i].len in az6027_i2c_xfer(). Link: https://lore.kernel.org/lkml/CAO4mrfcPHB5aQJO=mpqV+p8mPLNg-Fok0gw8gZ=zemAfMGTzMg@mail.gmail.com/ Link: https://lore.kernel.org/linux-media/20221120065918.2160782-1-zhongbaisong@huawei.com Fixes: 76f9a820c867 ("V4L/DVB: AZ6027: Initial import of the driver") Reported-by: Wei Chen Signed-off-by: Baisong Zhong Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Ywenrui Signed-off-by: Ywenrui44091 --- drivers/media/usb/dvb-usb/az6027.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/usb/dvb-usb/az6027.c b/drivers/media/usb/dvb-usb/az6027.c index 86788771175b..32b4ee65c280 100644 --- a/drivers/media/usb/dvb-usb/az6027.c +++ b/drivers/media/usb/dvb-usb/az6027.c @@ -975,6 +975,10 @@ static int az6027_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int n if (msg[i].addr == 0x99) { req = 0xBE; index = 0; + if (msg[i].len < 1) { + i = -EOPNOTSUPP; + break; + } value = msg[i].buf[0] & 0x00ff; length = 1; az6027_usb_out_op(d, req, value, index, data, length); -- Gitee From 4f519142738e5ee07ef47c89c7d86f81465928ef Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sun, 27 Nov 2022 10:24:11 +0900 Subject: [PATCH 158/243] af_unix: Get user_ns from in_skb in unix_diag_get_exact(). stable inclusion from stable-5.10.159 commit 575a6266f63dbb3b8eb1da03671451f0d81b8034 category: bugfix issue: #I70E0V CVE: NA Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit b3abe42e94900bdd045c472f9c9be620ba5ce553 ] Wei Chen reported a NULL deref in sk_user_ns() [0][1], and Paolo diagnosed the root cause: in unix_diag_get_exact(), the newly allocated skb does not have sk. [2] We must get the user_ns from the NETLINK_CB(in_skb).sk and pass it to sk_diag_fill(). [0]: BUG: kernel NULL pointer dereference, address: 0000000000000270 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 12bbce067 P4D 12bbce067 PUD 12bc40067 PMD 0 Oops: 0000 [#1] PREEMPT SMP CPU: 0 PID: 27942 Comm: syz-executor.0 Not tainted 6.1.0-rc5-next-20221118 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 04/01/2014 RIP: 0010:sk_user_ns include/net/sock.h:920 [inline] RIP: 0010:sk_diag_dump_uid net/unix/diag.c:119 [inline] RIP: 0010:sk_diag_fill+0x77d/0x890 net/unix/diag.c:170 Code: 89 ef e8 66 d4 2d fd c7 44 24 40 00 00 00 00 49 8d 7c 24 18 e8 54 d7 2d fd 49 8b 5c 24 18 48 8d bb 70 02 00 00 e8 43 d7 2d fd <48> 8b 9b 70 02 00 00 48 8d 7b 10 e8 33 d7 2d fd 48 8b 5b 10 48 8d RSP: 0018:ffffc90000d67968 EFLAGS: 00010246 RAX: ffff88812badaa48 RBX: 0000000000000000 RCX: ffffffff840d481d RDX: 0000000000000465 RSI: 0000000000000000 RDI: 0000000000000270 RBP: ffffc90000d679a8 R08: 0000000000000277 R09: 0000000000000000 R10: 0001ffffffffffff R11: 0001c90000d679a8 R12: ffff88812ac03800 R13: ffff88812c87c400 R14: ffff88812ae42210 R15: ffff888103026940 FS: 00007f08b4e6f700(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000270 CR3: 000000012c58b000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: unix_diag_get_exact net/unix/diag.c:285 [inline] unix_diag_handler_dump+0x3f9/0x500 net/unix/diag.c:317 __sock_diag_cmd net/core/sock_diag.c:235 [inline] sock_diag_rcv_msg+0x237/0x250 net/core/sock_diag.c:266 netlink_rcv_skb+0x13e/0x250 net/netlink/af_netlink.c:2564 sock_diag_rcv+0x24/0x40 net/core/sock_diag.c:277 netlink_unicast_kernel net/netlink/af_netlink.c:1330 [inline] netlink_unicast+0x5e9/0x6b0 net/netlink/af_netlink.c:1356 netlink_sendmsg+0x739/0x860 net/netlink/af_netlink.c:1932 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0x38f/0x500 net/socket.c:2476 ___sys_sendmsg net/socket.c:2530 [inline] __sys_sendmsg+0x197/0x230 net/socket.c:2559 __do_sys_sendmsg net/socket.c:2568 [inline] __se_sys_sendmsg net/socket.c:2566 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2566 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x4697f9 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f08b4e6ec48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000000000077bf80 RCX: 00000000004697f9 RDX: 0000000000000000 RSI: 00000000200001c0 RDI: 0000000000000003 RBP: 00000000004d29e9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000077bf80 R13: 0000000000000000 R14: 000000000077bf80 R15: 00007ffdb36bc6c0 Modules linked in: CR2: 0000000000000270 [1]: https://lore.kernel.org/netdev/CAO4mrfdvyjFpokhNsiwZiP-wpdSD0AStcJwfKcKQdAALQ9_2Qw@mail.gmail.com/ [2]: https://lore.kernel.org/netdev/e04315e7c90d9a75613f3993c2baf2d344eef7eb.camel@redhat.com/ Fixes: cae9910e7344 ("net: Add UNIX_DIAG_UID to Netlink UNIX socket diagnostics.") Reported-by: syzbot Reported-by: Wei Chen Diagnosed-by: Paolo Abeni Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing40281 Signed-off-by: Ywenrui44091 --- net/unix/diag.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/net/unix/diag.c b/net/unix/diag.c index 9ff64f9df1f3..951b33fa8f5c 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -113,14 +113,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql); } -static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb) +static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb, + struct user_namespace *user_ns) { - uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk)); + uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk)); return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid); } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 portid, u32 seq, u32 flags, int sk_ino) + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct unix_diag_msg *rep; @@ -166,7 +168,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_UID) && - sk_diag_dump_uid(sk, skb)) + sk_diag_dump_uid(sk, skb, user_ns)) goto out_nlmsg_trim; nlmsg_end(skb, nlh); @@ -178,7 +180,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r } static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 portid, u32 seq, u32 flags) + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags) { int sk_ino; @@ -189,7 +192,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (!sk_ino) return 0; - return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); + return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino); } static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -217,7 +220,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) goto next; - if (sk_diag_dump(sk, skb, req, + if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) @@ -285,7 +288,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, if (!rep) goto out; - err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, + err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { nlmsg_free(rep); -- Gitee From adda220daee460918e1ef82f5fb4731d74edf56a Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Sat, 18 Mar 2023 16:05:26 +0800 Subject: [PATCH 159/243] net: qcom/emac: Fix use after free bug in emac_remove due to race condition stable inclusion from stable-v5.10.177~136 commit cb5879efde4f9b4de4248b835890df7b6c49ffbc category: bugfix issue: #I70B17 CVE: CVE-2023-2483 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit 6b6bc5b8bd2d4ca9e1efa9ae0f98a0b0687ace75 ] In emac_probe, &adpt->work_thread is bound with emac_work_thread. Then it will be started by timeout handler emac_tx_timeout or a IRQ handler emac_isr. If we remove the driver which will call emac_remove to make cleanup, there may be a unfinished work. The possible sequence is as follows: Fix it by finishing the work before cleanup in the emac_remove and disable timeout response. CPU0 CPU1 |emac_work_thread emac_remove | free_netdev | kfree(netdev); | |emac_reinit_locked |emac_mac_down |//use netdev Fixes: b9b17debc69d ("net: emac: emac gigabit ethernet controller driver") Signed-off-by: Zheng Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- drivers/net/ethernet/qualcomm/emac/emac.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index ad655f0a4965..e1aa56be9cc0 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -728,9 +728,15 @@ static int emac_remove(struct platform_device *pdev) struct net_device *netdev = dev_get_drvdata(&pdev->dev); struct emac_adapter *adpt = netdev_priv(netdev); + netif_carrier_off(netdev); + netif_tx_disable(netdev); + unregister_netdev(netdev); netif_napi_del(&adpt->rx_q.napi); + free_irq(adpt->irq.irq, &adpt->irq); + cancel_work_sync(&adpt->work_thread); + emac_clks_teardown(adpt); put_device(&adpt->phydev->mdio.dev); -- Gitee From cce38b4966916cafd02edec411278372212fedc8 Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Tue, 14 Mar 2023 16:54:21 +0000 Subject: [PATCH 160/243] i2c: xgene-slimpro: Fix out-of-bounds bug in xgene_slimpro_i2c_xfer() off-bymainline inclusion from mainline-v6.3-rc4~27^2 commit 92fbb6d1296f81f41f65effd7f5f8c0f74943d15 category: bugfix issue: #I6Y8IE CVE: CVE-2023-2194 Signed-off-by: Ywenrui44091 --------------------------------------- The data->block[0] variable comes from user and is a number between 0-255. Without proper check, the variable may be very large to cause an out-of-bounds when performing memcpy in slimpro_i2c_blkwr. Fix this bug by checking the value of writelen. Fixes: f6505fbabc42 ("i2c: add SLIMpro I2C device driver on APM X-Gene platform") Signed-off-by: Wei Chen Cc: stable@vger.kernel.org Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- drivers/i2c/busses/i2c-xgene-slimpro.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index 63cbb9c7c1b0..76e9dcd63856 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -308,6 +308,9 @@ static int slimpro_i2c_blkwr(struct slimpro_i2c_dev *ctx, u32 chip, u32 msg[3]; int rc; + if (writelen > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + memcpy(ctx->dma_buffer, data, writelen); paddr = dma_map_single(ctx->dev, ctx->dma_buffer, writelen, DMA_TO_DEVICE); -- Gitee From 4afb8c8c1b251813f2af75c02ea716e995c51816 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 25 Jul 2022 18:11:06 -0400 Subject: [PATCH 161/243] sctp: leave the err path free in sctp_stream_init to sctp_stream_free stable inclusion from stable-5.10.135 commit 6f3505588d66b27220f07d0cab18da380fae2e2d category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: gaochao --------------------------------------- [ Upstream commit 181d8d2066c000ba0a0e6940a7ad80f1a0e68e9d ] A NULL pointer dereference was reported by Wei Chen: BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: 0010:__list_del_entry_valid+0x26/0x80 Call Trace: sctp_sched_dequeue_common+0x1c/0x90 sctp_sched_prio_dequeue+0x67/0x80 __sctp_outq_teardown+0x299/0x380 sctp_outq_free+0x15/0x20 sctp_association_free+0xc3/0x440 sctp_do_sm+0x1ca7/0x2210 sctp_assoc_bh_rcv+0x1f6/0x340 This happens when calling sctp_sendmsg without connecting to server first. In this case, a data chunk already queues up in send queue of client side when processing the INIT_ACK from server in sctp_process_init() where it calls sctp_stream_init() to alloc stream_in. If it fails to alloc stream_in all stream_out will be freed in sctp_stream_init's err path. Then in the asoc freeing it will crash when dequeuing this data chunk as stream_out is missing. As we can't free stream out before dequeuing all data from send queue, and this patch is to fix it by moving the err path stream_out/in freeing in sctp_stream_init() to sctp_stream_free() which is eventually called when freeing the asoc in sctp_association_free(). This fix also makes the code in sctp_process_init() more clear. Note that in sctp_association_init() when it fails in sctp_stream_init(), sctp_association_free() will not be called, and in that case it should go to 'stream_free' err path to free stream instead of 'fail_init'. Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") Reported-by: Wei Chen Signed-off-by: Xin Long Link: https://lore.kernel.org/r/831a3dc100c4908ff76e5bcc363be97f2778bc0b.1658787066.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: gaochao Signed-off-by: Ywenrui44091 --- net/sctp/associola.c | 5 ++--- net/sctp/stream.c | 19 +++---------------- 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index fdb69d46276d..2d4ec6187755 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -226,9 +226,8 @@ static struct sctp_association *sctp_association_init( if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, - 0, gfp)) - goto fail_init; + if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) + goto stream_free; /* Initialize default path MTU. */ asoc->pathmtu = sp->pathmtu; diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 6dc95dcc0ff4..ef9fceadef8d 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, ret = sctp_stream_alloc_out(stream, outcnt, gfp); if (ret) - goto out_err; + return ret; for (i = 0; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; @@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, handle_in: sctp_stream_interleave_init(stream); if (!incnt) - goto out; - - ret = sctp_stream_alloc_in(stream, incnt, gfp); - if (ret) - goto in_err; - - goto out; + return 0; -in_err: - sched->free(stream); - genradix_free(&stream->in); -out_err: - genradix_free(&stream->out); - stream->outcnt = 0; -out: - return ret; + return sctp_stream_alloc_in(stream, incnt, gfp); } int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) -- Gitee From c66759e30053432974e0f677ea1c57811a57693a Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 6 Dec 2022 21:12:59 +0100 Subject: [PATCH 162/243] can: af_can: fix NULL pointer dereference in can_rcv_filter stable inclusion from stable-5.10.159 commit c42221efb1159d6a3c89e96685ee38acdce86b6f category: bugfix issue: #I70E0V CVE: NA Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 0acc442309a0a1b01bcdaa135e56e6398a49439c upstream. Analogue to commit 8aa59e355949 ("can: af_can: fix NULL pointer dereference in can_rx_register()") we need to check for a missing initialization of ml_priv in the receive path of CAN frames. Since commit 4e096a18867a ("net: introduce CAN specific pointer in the struct net_device") the check for dev->type to be ARPHRD_CAN is not sufficient anymore since bonding or tun netdevices claim to be CAN devices but do not initialize ml_priv accordingly. Fixes: 4e096a18867a ("net: introduce CAN specific pointer in the struct net_device") Reported-by: syzbot+2d7f58292cb5b29eb5ad@syzkaller.appspotmail.com Reported-by: Wei Chen Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20221206201259.3028-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 Signed-off-by: Ywenrui44091 --- net/can/af_can.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 1c95ede2c9a6..704ffbd60306 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -680,7 +680,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || skb->len != CAN_MTU)) { pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", dev->type, skb->len); goto free_skb; @@ -706,7 +706,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || skb->len != CANFD_MTU)) { pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", dev->type, skb->len); goto free_skb; -- Gitee From cfc1f2616892e32b8b0a7c02ecdb0a6666167f91 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 12 Apr 2023 15:49:23 +1000 Subject: [PATCH 163/243] xfs: verify buffer contents when we skip log replay off-bymainline inclusion from mainline-v6.4-rc1~80^2~25 commit 22ed903eee23a5b174e240f1cdfa9acf393a5210 category: bugfix issue: #I708T3 CVE: CVE-2023-2124 Signed-off-by: Ywenrui44091 --------------------------------------- syzbot detected a crash during log recovery: XFS (loop0): Mounting V5 Filesystem bfdc47fc-10d8-4eed-a562-11a831b3f791 XFS (loop0): Torn write (CRC failure) detected at log block 0x180. Truncating head block from 0x200. XFS (loop0): Starting recovery (logdev: internal) ================================================================== BUG: KASAN: slab-out-of-bounds in xfs_btree_lookup_get_block+0x15c/0x6d0 fs/xfs/libxfs/xfs_btree.c:1813 Read of size 8 at addr ffff88807e89f258 by task syz-executor132/5074 CPU: 0 PID: 5074 Comm: syz-executor132 Not tainted 6.2.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1b1/0x290 lib/dump_stack.c:106 print_address_description+0x74/0x340 mm/kasan/report.c:306 print_report+0x107/0x1f0 mm/kasan/report.c:417 kasan_report+0xcd/0x100 mm/kasan/report.c:517 xfs_btree_lookup_get_block+0x15c/0x6d0 fs/xfs/libxfs/xfs_btree.c:1813 xfs_btree_lookup+0x346/0x12c0 fs/xfs/libxfs/xfs_btree.c:1913 xfs_btree_simple_query_range+0xde/0x6a0 fs/xfs/libxfs/xfs_btree.c:4713 xfs_btree_query_range+0x2db/0x380 fs/xfs/libxfs/xfs_btree.c:4953 xfs_refcount_recover_cow_leftovers+0x2d1/0xa60 fs/xfs/libxfs/xfs_refcount.c:1946 xfs_reflink_recover_cow+0xab/0x1b0 fs/xfs/xfs_reflink.c:930 xlog_recover_finish+0x824/0x920 fs/xfs/xfs_log_recover.c:3493 xfs_log_mount_finish+0x1ec/0x3d0 fs/xfs/xfs_log.c:829 xfs_mountfs+0x146a/0x1ef0 fs/xfs/xfs_mount.c:933 xfs_fs_fill_super+0xf95/0x11f0 fs/xfs/xfs_super.c:1666 get_tree_bdev+0x400/0x620 fs/super.c:1282 vfs_get_tree+0x88/0x270 fs/super.c:1489 do_new_mount+0x289/0xad0 fs/namespace.c:3145 do_mount fs/namespace.c:3488 [inline] __do_sys_mount fs/namespace.c:3697 [inline] __se_sys_mount+0x2d3/0x3c0 fs/namespace.c:3674 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f89fa3f4aca Code: 83 c4 08 5b 5d c3 66 2e 0f 1f 84 00 00 00 00 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fffd5fb5ef8 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 00646975756f6e2c RCX: 00007f89fa3f4aca RDX: 0000000020000100 RSI: 0000000020009640 RDI: 00007fffd5fb5f10 RBP: 00007fffd5fb5f10 R08: 00007fffd5fb5f50 R09: 000000000000970d R10: 0000000000200800 R11: 0000000000000206 R12: 0000000000000004 R13: 0000555556c6b2c0 R14: 0000000000200800 R15: 00007fffd5fb5f50 The fuzzed image contains an AGF with an obviously garbage agf_refcount_level value of 32, and a dirty log with a buffer log item for that AGF. The ondisk AGF has a higher LSN than the recovered log item. xlog_recover_buf_commit_pass2 reads the buffer, compares the LSNs, and decides to skip replay because the ondisk buffer appears to be newer. Unfortunately, the ondisk buffer is corrupt, but recovery just read the buffer with no buffer ops specified: error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len, buf_flags, &bp, NULL); Skipping the buffer leaves its contents in memory unverified. This sets us up for a kernel crash because xfs_refcount_recover_cow_leftovers reads the buffer (which is still around in XBF_DONE state, so no read verification) and creates a refcountbt cursor of height 32. This is impossible so we run off the end of the cursor object and crash. Fix this by invoking the verifier on all skipped buffers and aborting log recovery if the ondisk buffer is corrupt. It might be smarter to force replay the log item atop the buffer and then see if it'll pass the write verifier (like ext4 does) but for now let's go with the conservative option where we stop immediately. Link: https://syzkaller.appspot.com/bug?extid=7e9494b8b399902e994e Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- fs/xfs/xfs_buf_item_recover.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index d44e8b4a3391..954d374be662 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -923,6 +923,16 @@ xlog_recover_buf_commit_pass2( if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { trace_xfs_log_recover_buf_skip(log, buf_f); xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); + + /* + * We're skipping replay of this buffer log item due to the log + * item LSN being behind the ondisk buffer. Verify the buffer + * contents since we aren't going to run the write verifier. + */ + if (bp->b_ops) { + bp->b_ops->verify_read(bp); + error = bp->b_error; + } goto out_release; } -- Gitee From c65f757713d785d94e4959ee726d081edad03fd1 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 20 Jun 2022 09:15:47 +0200 Subject: [PATCH 164/243] udmabuf: add back sanity check stable inclusion from stable-5.10.127 commit 20119c1e0fff89542ff3272ace87e04cf6ee6bea category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: gaochao --------------------------------------- [ Upstream commit 05b252cccb2e5c3f56119d25de684b4f810ba40a ] Check vm_fault->pgoff before using it. When we removed the warning, we also removed the check. Fixes: 7b26e4e2119d ("udmabuf: drop WARN_ON() check.") Reported-by: zdi-disclosures@trendmicro.com Suggested-by: Linus Torvalds Signed-off-by: Gerd Hoffmann Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: gaochao Signed-off-by: Ywenrui44091 --- drivers/dma-buf/udmabuf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index db732f71e59a..ddbd7f2f09af 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -26,8 +26,11 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct udmabuf *ubuf = vma->vm_private_data; + pgoff_t pgoff = vmf->pgoff; - vmf->page = ubuf->pages[vmf->pgoff]; + if (pgoff >= ubuf->pagecount) + return VM_FAULT_SIGBUS; + vmf->page = ubuf->pages[pgoff]; get_page(vmf->page); return 0; } -- Gitee From 98b32fc4ff423cde30278d1f7bc65516953b96fb Mon Sep 17 00:00:00 2001 From: KP Singh Date: Mon, 27 Feb 2023 07:05:40 +0100 Subject: [PATCH 165/243] x86/speculation: Allow enabling STIBP with legacy IBRS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.173 commit abfed855f05863d292de2d0ebab4656791bab9c8 category: bugfix issue: #I6X3GC CVE: CVE-2023-1998 Signed-off-by: Ywenrui44091 --------------------------------------- commit 6921ed9049bc7457f66c1596c5b78aec0dae4a9d upstream. When plain IBRS is enabled (not enhanced IBRS), the logic in spectre_v2_user_select_mitigation() determines that STIBP is not needed. The IBRS bit implicitly protects against cross-thread branch target injection. However, with legacy IBRS, the IBRS bit is cleared on returning to userspace for performance reasons which leaves userspace threads vulnerable to cross-thread branch target injection against which STIBP protects. Exclude IBRS from the spectre_v2_in_ibrs_mode() check to allow for enabling STIBP (through seccomp/prctl() by default or always-on, if selected by spectre_v2_user kernel cmdline parameter). [ bp: Massage. ] Fixes: 7c693f54c873 ("x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS") Reported-by: José Oliveira Reported-by: Rodrigo Branco Signed-off-by: KP Singh Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230220120127.1975241-1-kpsingh@kernel.org Link: https://lore.kernel.org/r/20230221184908.2349578-1-kpsingh@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- arch/x86/kernel/cpu/bugs.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 20611c1be29d..61f801bc1b2b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1047,14 +1047,18 @@ spectre_v2_parse_user_cmdline(void) return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) +static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) { - return mode == SPECTRE_V2_IBRS || - mode == SPECTRE_V2_EIBRS || + return mode == SPECTRE_V2_EIBRS || mode == SPECTRE_V2_EIBRS_RETPOLINE || mode == SPECTRE_V2_EIBRS_LFENCE; } +static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) +{ + return spectre_v2_in_eibrs_mode(mode) || mode == SPECTRE_V2_IBRS; +} + static void __init spectre_v2_user_select_mitigation(void) { @@ -1117,12 +1121,19 @@ spectre_v2_user_select_mitigation(void) } /* - * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, - * STIBP is not required. + * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP + * is not required. + * + * Enhanced IBRS also protects against cross-thread branch target + * injection in user-mode as the IBRS bit remains always set which + * implicitly enables cross-thread protections. However, in legacy IBRS + * mode, the IBRS bit is set only on kernel entry and cleared on return + * to userspace. This disables the implicit cross-thread protection, + * so allow for STIBP to be selected in that case. */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_in_ibrs_mode(spectre_v2_enabled)) + spectre_v2_in_eibrs_mode(spectre_v2_enabled)) return; /* @@ -2200,7 +2211,7 @@ static ssize_t mmio_stale_data_show_state(char *buf) static char *stibp_state(void) { - if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) return ""; switch (spectre_v2_user_stibp) { -- Gitee From 2892cbace9c59e3ee1d7b73974a4501e04a69c36 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Mon, 13 Mar 2023 00:08:37 +0800 Subject: [PATCH 166/243] nfc: st-nci: Fix use after free bug in ndlc_remove due to race condition stable inclusion from stable-5.10.176 commit 43aa468df246175207a7d5d7d6d31b231f15b49c category: bugfix issue: #I6VHDS CVE: CVE-2023-1990 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit 5000fe6c27827a61d8250a7e4a1d26c3298ef4f6 ] This bug influences both st_nci_i2c_remove and st_nci_spi_remove. Take st_nci_i2c_remove as an example. In st_nci_i2c_probe, it called ndlc_probe and bound &ndlc->sm_work with llt_ndlc_sm_work. When it calls ndlc_recv or timeout handler, it will finally call schedule_work to start the work. When we call st_nci_i2c_remove to remove the driver, there may be a sequence as follows: Fix it by finishing the work before cleanup in ndlc_remove CPU0 CPU1 |llt_ndlc_sm_work st_nci_i2c_remove | ndlc_remove | st_nci_remove | nci_free_device| kfree(ndev) | //free ndlc->ndev | |llt_ndlc_rcv_queue |nci_recv_frame |//use ndlc->ndev Fixes: 35630df68d60 ("NFC: st21nfcb: Add driver for STMicroelectronics ST21NFCB NFC chip") Signed-off-by: Zheng Wang Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20230312160837.2040857-1-zyytlz.wz@163.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing40281 Signed-off-by: Ywenrui44091 --- drivers/nfc/st-nci/ndlc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c index 5d74c674368a..8ccf5a86ad1b 100644 --- a/drivers/nfc/st-nci/ndlc.c +++ b/drivers/nfc/st-nci/ndlc.c @@ -286,13 +286,15 @@ EXPORT_SYMBOL(ndlc_probe); void ndlc_remove(struct llt_ndlc *ndlc) { - st_nci_remove(ndlc->ndev); - /* cancel timers */ del_timer_sync(&ndlc->t1_timer); del_timer_sync(&ndlc->t2_timer); ndlc->t2_active = false; ndlc->t1_active = false; + /* cancel work */ + cancel_work_sync(&ndlc->sm_work); + + st_nci_remove(ndlc->ndev); skb_queue_purge(&ndlc->rcv_q); skb_queue_purge(&ndlc->send_q); -- Gitee From f6f34b678442feea7612041b21af0f1b58f9a56c Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Thu, 9 Mar 2023 16:07:39 +0800 Subject: [PATCH 167/243] Bluetooth: btsdio: fix use after free bug in btsdio_remove due to unfinished work mainline inclusion from mainline-v6.3-rc4 commit 1e9ac114c4428fdb7ff4635b45d4f46017e8916f category: bugfix issue: #I6V3GY CVE: CVE-2023-1989 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1e9ac114c4428fdb7ff4635b45d4f46017e8916f Signed-off-by: wanxiaoqing40281 --------------------------------------- In btsdio_probe, &data->work was bound with btsdio_work.In btsdio_send_frame, it was started by schedule_work. If we call btsdio_remove with an unfinished job, there may be a race condition and cause UAF bug on hdev. Fixes: ddbaf13e3609 ("[Bluetooth] Add generic driver for Bluetooth SDIO devices") Signed-off-by: Zheng Wang Signed-off-by: Luiz Augusto von Dentz Signed-off-by: wanxiaoqing40281 Signed-off-by: Ywenrui44091 --- drivers/bluetooth/btsdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index 199e8f7d426d..7050a16e7efe 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -352,6 +352,7 @@ static void btsdio_remove(struct sdio_func *func) BT_DBG("func %p", func); + cancel_work_sync(&data->work); if (!data) return; -- Gitee From 784d20ea74d3fd4f99cffdc192492d462c4471f3 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Mon, 10 Apr 2023 17:22:03 +0800 Subject: [PATCH 168/243] 9p/xen : Fix use after free bug in xen_9pfs_front_remove due to race condition maillist inclusion category: bugfix bugzilla: 188655, https://gitee.com/src-openeuler/kernel/issues/I6T36H CVE: CVE-2023-1859 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=ea4f1009408efb4989a0f139b70fb338e7f687d0 Signed-off-by: wanxiaoqing40281 --------------------------------------- In xen_9pfs_front_probe, it calls xen_9pfs_front_alloc_dataring to init priv->rings and bound &ring->work with p9_xen_response. When it calls xen_9pfs_front_event_handler to handle IRQ requests, it will finally call schedule_work to start the work. When we call xen_9pfs_front_remove to remove the driver, there may be a sequence as follows: Fix it by finishing the work before cleanup in xen_9pfs_front_free. Note that, this bug is found by static analysis, which might be false positive. CPU0 CPU1 |p9_xen_response xen_9pfs_front_remove| xen_9pfs_front_free| kfree(priv) | //free priv | |p9_tag_lookup |//use priv->client Fixes: 71ebd71921e4 ("xen/9pfs: connect to the backend") Signed-off-by: Zheng Wang Reviewed-by: Michal Swiatkowski Signed-off-by: Eric Van Hensbergen Signed-off-by: Lu Wei Reviewed-by: Yue Haibing Reviewed-by: Xiu Jianfeng Signed-off-by: Yongqiang Liu Signed-off-by: wanxiaoqing40281 Signed-off-by: Ywenrui44091 --- net/9p/trans_xen.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 3ec1a51a6944..9551cafe24d7 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -291,6 +291,10 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) write_unlock(&xen_9pfs_lock); for (i = 0; i < priv->num_rings; i++) { + struct xen_9pfs_dataring *ring = &priv->rings[i]; + + cancel_work_sync(&ring->work); + if (!priv->rings[i].intf) break; if (priv->rings[i].irq > 0) -- Gitee From 379d1c077610641312379c6b7b7e92e1d03a3a87 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Fri, 10 Mar 2023 16:40:07 +0800 Subject: [PATCH 169/243] hwmon: (xgene) Fix use after free bug in xgene_hwmon_remove due to race condition stable inclusion from stable-5.10.176 commit 0a73c8b3cc99d214dff83c51805c844240c4f749 category: bugfix issue: #I6TCVI CVE: CVE-2023-1855 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit cb090e64cf25602b9adaf32d5dfc9c8bec493cd1 ] In xgene_hwmon_probe, &ctx->workq is bound with xgene_hwmon_evt_work. Then it will be started. If we remove the driver which will call xgene_hwmon_remove to clean up, there may be unfinished work. The possible sequence is as follows: Fix it by finishing the work before cleanup in xgene_hwmon_remove. CPU0 CPU1 |xgene_hwmon_evt_work xgene_hwmon_remove | kfifo_free(&ctx->async_msg_fifo);| | |kfifo_out_spinlocked |//use &ctx->async_msg_fifo Fixes: 2ca492e22cb7 ("hwmon: (xgene) Fix crash when alarm occurs before driver probe") Signed-off-by: Zheng Wang Link: https://lore.kernel.org/r/20230310084007.1403388-1-zyytlz.wz@163.com Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing40281 Signed-off-by: Ywenrui44091 --- drivers/hwmon/xgene-hwmon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c index f2a5af239c95..f5d3cf86753f 100644 --- a/drivers/hwmon/xgene-hwmon.c +++ b/drivers/hwmon/xgene-hwmon.c @@ -768,6 +768,7 @@ static int xgene_hwmon_remove(struct platform_device *pdev) { struct xgene_hwmon_dev *ctx = platform_get_drvdata(pdev); + cancel_work_sync(&ctx->workq); hwmon_device_unregister(ctx->hwmon_dev); kfifo_free(&ctx->async_msg_fifo); if (acpi_disabled) -- Gitee From 38c25d84a5c8ad837afd31b9de158d53d33db768 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 16 May 2022 16:42:13 +0800 Subject: [PATCH 170/243] Fix double fget() in vhost_net_set_backend() stable inclusion from stable-5.10.118 commit ec0d801d1a44d9259377142c6218885ecd685e41 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: gaochao --------------------------------------- commit fb4554c2232e44d595920f4d5c66cf8f7d13f9bc upstream. Descriptor table is a shared resource; two fget() on the same descriptor may return different struct file references. get_tap_ptr_ring() is called after we'd found (and pinned) the socket we'll be using and it tries to find the private tun/tap data structures associated with it. Redoing the lookup by the same file descriptor we'd used to get the socket is racy - we need to same struct file. Thanks to Jason for spotting a braino in the original variant of patch - I'd missed the use of fd == -1 for disabling backend, and in that case we can end up with sock == NULL and sock != oldsock. Cc: stable@kernel.org Acked-by: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman Signed-off-by: gc1202 Signed-off-by: Ywenrui44091 --- drivers/vhost/net.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 3552b2303f74..8a4f08e36fd3 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1447,13 +1447,9 @@ static struct socket *get_raw_socket(int fd) return ERR_PTR(r); } -static struct ptr_ring *get_tap_ptr_ring(int fd) +static struct ptr_ring *get_tap_ptr_ring(struct file *file) { struct ptr_ring *ring; - struct file *file = fget(fd); - - if (!file) - return NULL; ring = tun_get_tx_ring(file); if (!IS_ERR(ring)) goto out; @@ -1462,7 +1458,6 @@ static struct ptr_ring *get_tap_ptr_ring(int fd) goto out; ring = NULL; out: - fput(file); return ring; } @@ -1549,8 +1544,12 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) r = vhost_net_enable_vq(n, vq); if (r) goto err_used; - if (index == VHOST_NET_VQ_RX) - nvq->rx_ring = get_tap_ptr_ring(fd); + if (index == VHOST_NET_VQ_RX) { + if (sock) + nvq->rx_ring = get_tap_ptr_ring(sock->file); + else + nvq->rx_ring = NULL; + } oldubufs = nvq->ubufs; nvq->ubufs = ubufs; -- Gitee From 72e88ae20df09f495db5ff77fd566d49f8eb2b5f Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Fri, 11 Feb 2022 16:32:26 -0800 Subject: [PATCH 171/243] fs/proc: task_mmu.c: don't read mapcount for migration entry stable inclusion from stable-5.10.102 commit db3f3636e4aed2cba3e4e7897a053323f7a62249 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: gaochao --------------------------------------- commit 24d7275ce2791829953ed4e72f68277ceb2571c6 upstream. The syzbot reported the below BUG: kernel BUG at include/linux/page-flags.h:785! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 4392 Comm: syz-executor560 Not tainted 5.16.0-rc6-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:PageDoubleMap include/linux/page-flags.h:785 [inline] RIP: 0010:__page_mapcount+0x2d2/0x350 mm/util.c:744 Call Trace: page_mapcount include/linux/mm.h:837 [inline] smaps_account+0x470/0xb10 fs/proc/task_mmu.c:466 smaps_pte_entry fs/proc/task_mmu.c:538 [inline] smaps_pte_range+0x611/0x1250 fs/proc/task_mmu.c:601 walk_pmd_range mm/pagewalk.c:128 [inline] walk_pud_range mm/pagewalk.c:205 [inline] walk_p4d_range mm/pagewalk.c:240 [inline] walk_pgd_range mm/pagewalk.c:277 [inline] __walk_page_range+0xe23/0x1ea0 mm/pagewalk.c:379 walk_page_vma+0x277/0x350 mm/pagewalk.c:530 smap_gather_stats.part.0+0x148/0x260 fs/proc/task_mmu.c:768 smap_gather_stats fs/proc/task_mmu.c:741 [inline] show_smap+0xc6/0x440 fs/proc/task_mmu.c:822 seq_read_iter+0xbb0/0x1240 fs/seq_file.c:272 seq_read+0x3e0/0x5b0 fs/seq_file.c:162 vfs_read+0x1b5/0x600 fs/read_write.c:479 ksys_read+0x12d/0x250 fs/read_write.c:619 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae The reproducer was trying to read /proc/$PID/smaps when calling MADV_FREE at the mean time. MADV_FREE may split THPs if it is called for partial THP. It may trigger the below race: CPU A CPU B ----- ----- smaps walk: MADV_FREE: page_mapcount() PageCompound() split_huge_page() page = compound_head(page) PageDoubleMap(page) When calling PageDoubleMap() this page is not a tail page of THP anymore so the BUG is triggered. This could be fixed by elevated refcount of the page before calling mapcount, but that would prevent it from counting migration entries, and it seems overkilling because the race just could happen when PMD is split so all PTE entries of tail pages are actually migration entries, and smaps_account() does treat migration entries as mapcount == 1 as Kirill pointed out. Add a new parameter for smaps_account() to tell this entry is migration entry then skip calling page_mapcount(). Don't skip getting mapcount for device private entries since they do track references with mapcount. Pagemap also has the similar issue although it was not reported. Fixed it as well. [shy828301@gmail.com: v4] Link: https://lkml.kernel.org/r/20220203182641.824731-1-shy828301@gmail.com [nathan@kernel.org: avoid unused variable warning in pagemap_pmd_range()] Link: https://lkml.kernel.org/r/20220207171049.1102239-1-nathan@kernel.org Link: https://lkml.kernel.org/r/20220120202805.3369-1-shy828301@gmail.com Fixes: e9b61f19858a ("thp: reintroduce split_huge_page()") Signed-off-by: Yang Shi Signed-off-by: Nathan Chancellor Reported-by: syzbot+1f52b3a18d5633fa7f82@syzkaller.appspotmail.com Acked-by: David Hildenbrand Cc: "Kirill A. Shutemov" Cc: Jann Horn Cc: Matthew Wilcox Cc: Alexey Dobriyan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: gc1202 Signed-off-by: Ywenrui44091 --- fs/proc/task_mmu.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 14617d886df8..b8fd8fe12177 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -453,7 +453,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss, } static void smaps_account(struct mem_size_stats *mss, struct page *page, - bool compound, bool young, bool dirty, bool locked) + bool compound, bool young, bool dirty, bool locked, + bool migration) { int i, nr = compound ? compound_nr(page) : 1; unsigned long size = nr * PAGE_SIZE; @@ -480,8 +481,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, * page_count(page) == 1 guarantees the page is mapped exactly once. * If any subpage of the compound page mapped with PTE it would elevate * page_count(). + * + * The page_mapcount() is called to get a snapshot of the mapcount. + * Without holding the page lock this snapshot can be slightly wrong as + * we cannot always read the mapcount atomically. It is not safe to + * call page_mapcount() even with PTL held if the page is not mapped, + * especially for migration entries. Treat regular migration entries + * as mapcount == 1. */ - if (page_count(page) == 1) { + if ((page_count(page) == 1) || migration) { smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty, locked, true); return; @@ -518,6 +526,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; + bool migration = false; if (pte_present(*pte)) { page = vm_normal_page(vma, addr, *pte); @@ -537,9 +546,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } else { mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; } - } else if (is_migration_entry(swpent)) + } else if (is_migration_entry(swpent)) { + migration = true; page = migration_entry_to_page(swpent); - else if (is_device_private_entry(swpent)) + } else if (is_device_private_entry(swpent)) page = device_private_entry_to_page(swpent); } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap && pte_none(*pte))) { @@ -553,7 +563,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, if (!page) return; - smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); + smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), + locked, migration); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -564,6 +575,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; + bool migration = false; if (pmd_present(*pmd)) { /* FOLL_DUMP will return -EFAULT on huge zero page */ @@ -571,8 +583,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { swp_entry_t entry = pmd_to_swp_entry(*pmd); - if (is_migration_entry(entry)) + if (is_migration_entry(entry)) { + migration = true; page = migration_entry_to_page(entry); + } } if (IS_ERR_OR_NULL(page)) return; @@ -584,7 +598,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, /* pass */; else mss->file_thp += HPAGE_PMD_SIZE; - smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); + + smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), + locked, migration); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -1389,6 +1405,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, { u64 frame = 0, flags = 0; struct page *page = NULL; + bool migration = false; if (pte_present(pte)) { if (pm->show_pfn) @@ -1406,8 +1423,10 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, frame = swp_type(entry) | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); flags |= PM_SWAP; - if (is_migration_entry(entry)) + if (is_migration_entry(entry)) { + migration = true; page = migration_entry_to_page(entry); + } if (is_device_private_entry(entry)) page = device_private_entry_to_page(entry); @@ -1415,7 +1434,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (page && !PageAnon(page)) flags |= PM_FILE; - if (page && page_mapcount(page) == 1) + if (page && !migration && page_mapcount(page) == 1) flags |= PM_MMAP_EXCLUSIVE; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; @@ -1431,8 +1450,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, spinlock_t *ptl; pte_t *pte, *orig_pte; int err = 0; - #ifdef CONFIG_TRANSPARENT_HUGEPAGE + bool migration = false; + ptl = pmd_trans_huge_lock(pmdp, vma); if (ptl) { u64 flags = 0, frame = 0; @@ -1467,11 +1487,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (pmd_swp_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; VM_BUG_ON(!is_pmd_migration_entry(pmd)); + migration = is_migration_entry(entry); page = migration_entry_to_page(entry); } #endif - if (page && page_mapcount(page) == 1) + if (page && !migration && page_mapcount(page) == 1) flags |= PM_MMAP_EXCLUSIVE; for (; addr != end; addr += PAGE_SIZE) { -- Gitee From a678cd548aafd485f7176fbacac7655072b75151 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Jan 2023 11:03:20 +0100 Subject: [PATCH 172/243] prlimit: do_prlimit needs to have a speculation check stable inclusion from stable-v5.10.165~57 commit 9f8e45720e0e7edb661d0082422f662ed243d8d8 category: bugfix issue: #I6ZH69 CVE: CVE-2023-0458 Signed-off-by: Ywenrui44091 --------------------------------------- commit 739790605705ddcf18f21782b9c99ad7d53a8c11 upstream. do_prlimit() adds the user-controlled resource value to a pointer that will subsequently be dereferenced. In order to help prevent this codepath from being used as a spectre "gadget" a barrier needs to be added after checking the range. Reported-by: Jordy Zomer Tested-by: Jordy Zomer Suggested-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- kernel/sys.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sys.c b/kernel/sys.c index c63de71889bf..ee71621340dc 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1549,6 +1549,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource, if (resource >= RLIM_NLIMITS) return -EINVAL; + resource = array_index_nospec(resource, RLIM_NLIMITS); + if (new_rlim) { if (new_rlim->rlim_cur > new_rlim->rlim_max) return -EINVAL; -- Gitee From f2b3594a555e71ec026b9af4a8b529087bfda0f2 Mon Sep 17 00:00:00 2001 From: George Kennedy Date: Thu, 16 Dec 2021 13:25:32 -0500 Subject: [PATCH 173/243] tun: avoid double free in tun_free_netdev stable inclusion from stable-5.10.136 commit a01a4e9f5dc93335c716fa4023b1901956e8c904 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: lizongfeng --------------------------------------- commit 158b515f703e75e7d68289bf4d98c664e1d632df upstream. Avoid double free in tun_free_netdev() by moving the dev->tstats and tun->security allocs to a new ndo_init routine (tun_net_init()) that will be called by register_netdevice(). ndo_init is paired with the desctructor (tun_free_netdev()), so if there's an error in register_netdevice() the destructor will handle the frees. BUG: KASAN: double-free or invalid-free in selinux_tun_dev_free_security+0x1a/0x20 security/selinux/hooks.c:5605 CPU: 0 PID: 25750 Comm: syz-executor416 Not tainted 5.16.0-rc2-syzk #1 Hardware name: Red Hat KVM, BIOS Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x89/0xb5 lib/dump_stack.c:106 print_address_description.constprop.9+0x28/0x160 mm/kasan/report.c:247 kasan_report_invalid_free+0x55/0x80 mm/kasan/report.c:372 ____kasan_slab_free mm/kasan/common.c:346 [inline] __kasan_slab_free+0x107/0x120 mm/kasan/common.c:374 kasan_slab_free include/linux/kasan.h:235 [inline] slab_free_hook mm/slub.c:1723 [inline] slab_free_freelist_hook mm/slub.c:1749 [inline] slab_free mm/slub.c:3513 [inline] kfree+0xac/0x2d0 mm/slub.c:4561 selinux_tun_dev_free_security+0x1a/0x20 security/selinux/hooks.c:5605 security_tun_dev_free_security+0x4f/0x90 security/security.c:2342 tun_free_netdev+0xe6/0x150 drivers/net/tun.c:2215 netdev_run_todo+0x4df/0x840 net/core/dev.c:10627 rtnl_unlock+0x13/0x20 net/core/rtnetlink.c:112 __tun_chr_ioctl+0x80c/0x2870 drivers/net/tun.c:3302 tun_chr_ioctl+0x2f/0x40 drivers/net/tun.c:3311 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x19d/0x220 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3a/0x80 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Reported-by: syzkaller Signed-off-by: George Kennedy Suggested-by: Jakub Kicinski Link: https://lore.kernel.org/r/1639679132-19884-1-git-send-email-george.kennedy@oracle.com Signed-off-by: Jakub Kicinski Signed-off-by: Fedor Pchelkin Signed-off-by: Greg Kroah-Hartman Signed-off-by: lizongfeng Signed-off-by: Ywenrui44091 --- drivers/net/tun.c | 114 ++++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 55 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index fe230c554e3c..3a292dc5b3a4 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -220,6 +220,9 @@ struct tun_struct { struct tun_prog __rcu *steering_prog; struct tun_prog __rcu *filter_prog; struct ethtool_link_ksettings link_ksettings; + /* init args */ + struct file *file; + struct ifreq *ifr; }; struct veth { @@ -227,6 +230,9 @@ struct veth { __be16 h_vlan_TCI; }; +static void tun_flow_init(struct tun_struct *tun); +static void tun_flow_uninit(struct tun_struct *tun); + static int tun_napi_receive(struct napi_struct *napi, int budget) { struct tun_file *tfile = container_of(napi, struct tun_file, napi); @@ -964,6 +970,49 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) static const struct ethtool_ops tun_ethtool_ops; +static int tun_net_init(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + struct ifreq *ifr = tun->ifr; + int err; + + tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); + if (!tun->pcpu_stats) + return -ENOMEM; + + spin_lock_init(&tun->lock); + + err = security_tun_dev_alloc_security(&tun->security); + if (err < 0) { + free_percpu(tun->pcpu_stats); + return err; + } + + tun_flow_init(tun); + + dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | + TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + dev->features = dev->hw_features | NETIF_F_LLTX; + dev->vlan_features = dev->features & + ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); + + tun->flags = (tun->flags & ~TUN_FEATURES) | + (ifr->ifr_flags & TUN_FEATURES); + + INIT_LIST_HEAD(&tun->disabled); + err = tun_attach(tun, tun->file, false, ifr->ifr_flags & IFF_NAPI, + ifr->ifr_flags & IFF_NAPI_FRAGS, false); + if (err < 0) { + tun_flow_uninit(tun); + security_tun_dev_free_security(tun->security); + free_percpu(tun->pcpu_stats); + return err; + } + return 0; +} + /* Net device detach from fd. */ static void tun_net_uninit(struct net_device *dev) { @@ -1205,6 +1254,7 @@ static int tun_net_change_carrier(struct net_device *dev, bool new_carrier) } static const struct net_device_ops tun_netdev_ops = { + .ndo_init = tun_net_init, .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, @@ -1285,6 +1335,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) } static const struct net_device_ops tap_netdev_ops = { + .ndo_init = tun_net_init, .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, @@ -1325,7 +1376,7 @@ static void tun_flow_uninit(struct tun_struct *tun) #define MAX_MTU 65535 /* Initialize net device. */ -static void tun_net_init(struct net_device *dev) +static void tun_net_initialize(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@ -2257,10 +2308,6 @@ static void tun_free_netdev(struct net_device *dev) BUG_ON(!(list_empty(&tun->disabled))); free_percpu(tun->pcpu_stats); - /* We clear pcpu_stats so that tun_set_iff() can tell if - * tun_free_netdev() has been called from register_netdevice(). - */ - tun->pcpu_stats = NULL; tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); @@ -2772,41 +2819,16 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->rx_batched = 0; RCU_INIT_POINTER(tun->steering_prog, NULL); - tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); - if (!tun->pcpu_stats) { - err = -ENOMEM; - goto err_free_dev; - } + tun->ifr = ifr; + tun->file = file; - spin_lock_init(&tun->lock); - - err = security_tun_dev_alloc_security(&tun->security); - if (err < 0) - goto err_free_stat; - - tun_net_init(dev); - tun_flow_init(tun); - - dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | - TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX; - dev->features = dev->hw_features | NETIF_F_LLTX; - dev->vlan_features = dev->features & - ~(NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); - - tun->flags = (tun->flags & ~TUN_FEATURES) | - (ifr->ifr_flags & TUN_FEATURES); - - INIT_LIST_HEAD(&tun->disabled); - err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI, - ifr->ifr_flags & IFF_NAPI_FRAGS, false); - if (err < 0) - goto err_free_flow; + tun_net_initialize(dev); err = register_netdevice(tun->dev); - if (err < 0) - goto err_detach; + if (err < 0) { + free_netdev(dev); + return err; + } /* free_netdev() won't check refcnt, to aovid race * with dev_put() we need publish tun after registration. */ @@ -2823,24 +2845,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) strcpy(ifr->ifr_name, tun->dev->name); return 0; - -err_detach: - tun_detach_all(dev); - /* We are here because register_netdevice() has failed. - * If register_netdevice() already called tun_free_netdev() - * while dealing with the error, tun->pcpu_stats has been cleared. - */ - if (!tun->pcpu_stats) - goto err_free_dev; - -err_free_flow: - tun_flow_uninit(tun); - security_tun_dev_free_security(tun->security); -err_free_stat: - free_percpu(tun->pcpu_stats); -err_free_dev: - free_netdev(dev); - return err; } static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr) -- Gitee From b82882e2a5ded5241a4f627446af015eac8b183b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Aug 2022 17:55:07 +0300 Subject: [PATCH 174/243] staging: rtl8712: fix use after free bugs stable inclusion from stable-5.10.142 commit 19e3f69d19801940abc2ac37c169882769ed9770 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: lizongfeng --------------------------------------- commit e230a4455ac3e9b112f0367d1b8e255e141afae0 upstream. _Read/Write_MACREG callbacks are NULL so the read/write_macreg_hdl() functions don't do anything except free the "pcmd" pointer. It results in a use after free. Delete them. Fixes: 2865d42c78a9 ("staging: r8712u: Add the new driver to the mainline kernel") Cc: stable Reported-by: Zheng Wang Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/Yw4ASqkYcUhUfoY2@kili Signed-off-by: Greg Kroah-Hartman Signed-off-by: lizongfeng Signed-off-by: Ywenrui44091 --- drivers/staging/rtl8712/rtl8712_cmd.c | 36 --------------------------- 1 file changed, 36 deletions(-) diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c index ff3cb09c57a6..30e965c410ff 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.c +++ b/drivers/staging/rtl8712/rtl8712_cmd.c @@ -117,34 +117,6 @@ static void r871x_internal_cmd_hdl(struct _adapter *padapter, u8 *pbuf) kfree(pdrvcmd->pbuf); } -static u8 read_macreg_hdl(struct _adapter *padapter, u8 *pbuf) -{ - void (*pcmd_callback)(struct _adapter *dev, struct cmd_obj *pcmd); - struct cmd_obj *pcmd = (struct cmd_obj *)pbuf; - - /* invoke cmd->callback function */ - pcmd_callback = cmd_callback[pcmd->cmdcode].callback; - if (!pcmd_callback) - r8712_free_cmd_obj(pcmd); - else - pcmd_callback(padapter, pcmd); - return H2C_SUCCESS; -} - -static u8 write_macreg_hdl(struct _adapter *padapter, u8 *pbuf) -{ - void (*pcmd_callback)(struct _adapter *dev, struct cmd_obj *pcmd); - struct cmd_obj *pcmd = (struct cmd_obj *)pbuf; - - /* invoke cmd->callback function */ - pcmd_callback = cmd_callback[pcmd->cmdcode].callback; - if (!pcmd_callback) - r8712_free_cmd_obj(pcmd); - else - pcmd_callback(padapter, pcmd); - return H2C_SUCCESS; -} - static u8 read_bbreg_hdl(struct _adapter *padapter, u8 *pbuf) { struct cmd_obj *pcmd = (struct cmd_obj *)pbuf; @@ -213,14 +185,6 @@ static struct cmd_obj *cmd_hdl_filter(struct _adapter *padapter, pcmd_r = NULL; switch (pcmd->cmdcode) { - case GEN_CMD_CODE(_Read_MACREG): - read_macreg_hdl(padapter, (u8 *)pcmd); - pcmd_r = pcmd; - break; - case GEN_CMD_CODE(_Write_MACREG): - write_macreg_hdl(padapter, (u8 *)pcmd); - pcmd_r = pcmd; - break; case GEN_CMD_CODE(_Read_BBREG): read_bbreg_hdl(padapter, (u8 *)pcmd); break; -- Gitee From 175d70ad0c82090d31ff83f925742f0b6c8fc62e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Mar 2022 12:56:24 -0400 Subject: [PATCH 175/243] KVM: x86/mmu: do compare-and-exchange of gPTE via the user address stable inclusion from stable-5.10.110 commit e90518d10c7dd59d5ebbe25b0f0083a7dbffa42f category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: gaochao --------------------------------------- commit 2a8859f373b0a86f0ece8ec8312607eacf12485d upstream. FNAME(cmpxchg_gpte) is an inefficient mess. It is at least decent if it can go through get_user_pages_fast(), but if it cannot then it tries to use memremap(); that is not just terribly slow, it is also wrong because it assumes that the VM_PFNMAP VMA is contiguous. The right way to do it would be to do the same thing as hva_to_pfn_remapped() does since commit add6a0cd1c5b ("KVM: MMU: try to fix up page faults before giving up", 2016-07-05), using follow_pte() and fixup_user_fault() to determine the correct address to use for memremap(). To do this, one could for example extract hva_to_pfn() for use outside virt/kvm/kvm_main.c. But really there is no reason to do that either, because there is already a perfectly valid address to do the cmpxchg() on, only it is a userspace address. That means doing user_access_begin()/user_access_end() and writing the code in assembly to handle exceptions correctly. Worse, the guest PTE can be 8-byte even on i686 so there is the extra complication of using cmpxchg8b to account for. But at least it is an efficient mess. (Thanks to Linus for suggesting improvement on the inline assembly). Reported-by: Qiuhao Li Reported-by: Gaoning Pan Reported-by: Yongkang Jia Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com Debugged-by: Tadeusz Struk Tested-by: Maxim Levitsky Cc: stable@vger.kernel.org Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs") Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman Signed-off-by: gc1202 Signed-off-by: Ywenrui44091 --- arch/x86/kvm/mmu/paging_tmpl.h | 77 ++++++++++++++++------------------ 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index f8829134bf34..c6daeeff1d9c 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -34,9 +34,8 @@ #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgq" #else - #define CMPXCHG cmpxchg64 #define PT_MAX_FULL_LEVELS 2 #endif #elif PTTYPE == 32 @@ -52,7 +51,7 @@ #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true - #define CMPXCHG cmpxchg + #define CMPXCHG "cmpxchgl" #elif PTTYPE == PTTYPE_EPT #define pt_element_t u64 #define guest_walker guest_walkerEPT @@ -65,7 +64,9 @@ #define PT_GUEST_DIRTY_SHIFT 9 #define PT_GUEST_ACCESSED_SHIFT 8 #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad) - #define CMPXCHG cmpxchg64 + #ifdef CONFIG_X86_64 + #define CMPXCHG "cmpxchgq" + #endif #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #else #error Invalid PTTYPE value @@ -147,43 +148,39 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t __user *ptep_user, unsigned index, pt_element_t orig_pte, pt_element_t new_pte) { - int npages; - pt_element_t ret; - pt_element_t *table; - struct page *page; - - npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); - if (likely(npages == 1)) { - table = kmap_atomic(page); - ret = CMPXCHG(&table[index], orig_pte, new_pte); - kunmap_atomic(table); - - kvm_release_page_dirty(page); - } else { - struct vm_area_struct *vma; - unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK; - unsigned long pfn; - unsigned long paddr; - - mmap_read_lock(current->mm); - vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE); - if (!vma || !(vma->vm_flags & VM_PFNMAP)) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - paddr = pfn << PAGE_SHIFT; - table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB); - if (!table) { - mmap_read_unlock(current->mm); - return -EFAULT; - } - ret = CMPXCHG(&table[index], orig_pte, new_pte); - memunmap(table); - mmap_read_unlock(current->mm); - } + int r = -EFAULT; + + if (!user_access_begin(ptep_user, sizeof(pt_element_t))) + return -EFAULT; + +#ifdef CMPXCHG + asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n" + "mov $0, %[r]\n" + "setnz %b[r]\n" + "2:" + _ASM_EXTABLE_UA(1b, 2b) + : [ptr] "+m" (*ptep_user), + [old] "+a" (orig_pte), + [r] "+q" (r) + : [new] "r" (new_pte) + : "memory"); +#else + asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n" + "movl $0, %[r]\n" + "jz 2f\n" + "incl %[r]\n" + "2:" + _ASM_EXTABLE_UA(1b, 2b) + : [ptr] "+m" (*ptep_user), + [old] "+A" (orig_pte), + [r] "+rm" (r) + : [new_lo] "b" ((u32)new_pte), + [new_hi] "c" ((u32)(new_pte >> 32)) + : "memory"); +#endif - return (ret != orig_pte); + user_access_end(); + return r; } static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, -- Gitee From 5d201eff36ef143ecd0c3bdfa93f9bccf8201812 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 21 Feb 2023 12:30:15 -0800 Subject: [PATCH 176/243] uaccess: Add speculation barrier to copy_from_user() stable inclusion from stable-v5.10.170~10 commit 3b6ce54cfa2c04f0636fd0c985913af8703b408d category: bugfix issue: #I6ZJ2L CVE: CVE-2023-0459 Signed-off-by: Ywenrui44091 --------------------------------------- commit 74e19ef0ff8061ef55957c3abd71614ef0f42f47 upstream. The results of "access_ok()" can be mis-speculated. The result is that you can end speculatively: if (access_ok(from, size)) // Right here even for bad from/size combinations. On first glance, it would be ideal to just add a speculation barrier to "access_ok()" so that its results can never be mis-speculated. But there are lots of system calls just doing access_ok() via "copy_to_user()" and friends (example: fstat() and friends). Those are generally not problematic because they do not _consume_ data from userspace other than the pointer. They are also very quick and common system calls that should not be needlessly slowed down. "copy_from_user()" on the other hand uses a user-controller pointer and is frequently followed up with code that might affect caches. Take something like this: if (!copy_from_user(&kernelvar, uptr, size)) do_something_with(kernelvar); If userspace passes in an evil 'uptr' that *actually* points to a kernel addresses, and then do_something_with() has cache (or other) side-effects, it could allow userspace to infer kernel data values. Add a barrier to the common copy_from_user() code to prevent mis-speculated values which happen after the copy. Also add a stub for architectures that do not define barrier_nospec(). This makes the macro usable in generic code. Since the barrier is now usable in generic code, the x86 #ifdef in the BPF code can also go away. Reported-by: Jordy Zomer Suggested-by: Linus Torvalds Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Acked-by: Daniel Borkmann # BPF bits Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- include/linux/nospec.h | 4 ++++ kernel/bpf/core.c | 2 -- lib/usercopy.c | 7 +++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index c1e79f72cd89..9f0af4f116d9 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -11,6 +11,10 @@ struct task_struct; +#ifndef barrier_nospec +# define barrier_nospec() do { } while (0) +#endif + /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise * @index: array element index diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d3a1f25f8ec2..d314c4c686c4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1640,9 +1640,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack) * reuse preexisting logic from Spectre v1 mitigation that * happens to produce the required code on x86 for v4 as well. */ -#ifdef CONFIG_X86 barrier_nospec(); -#endif CONT; #define LDST(SIZEOP, SIZE) \ STX_MEM_##SIZEOP: \ diff --git a/lib/usercopy.c b/lib/usercopy.c index 7413dd300516..7ee63df042d7 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -3,6 +3,7 @@ #include #include #include +#include /* out-of-line parts */ @@ -12,6 +13,12 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { + /* + * Ensure that bad access_ok() speculation will not + * lead to nasty side effects *after* the copy is + * finished: + */ + barrier_nospec(); instrument_copy_from_user(to, from, n); res = raw_copy_from_user(to, from, n); } -- Gitee From dfba71b1a47da52bf54a7c9ed5e3c9cd21e44850 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Tue, 5 Apr 2022 21:22:06 +0800 Subject: [PATCH 177/243] drivers: net: slip: fix NPD bug in sl_tx_timeout() stable inclusion from stable-5.10.112 commit ca24c5e8f0ac3d43ec0cff29e1c861be73aff165 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: gaochao --------------------------------------- [ Upstream commit ec4eb8a86ade4d22633e1da2a7d85a846b7d1798 ] When a slip driver is detaching, the slip_close() will act to cleanup necessary resources and sl->tty is set to NULL in slip_close(). Meanwhile, the packet we transmit is blocked, sl_tx_timeout() will be called. Although slip_close() and sl_tx_timeout() use sl->lock to synchronize, we don`t judge whether sl->tty equals to NULL in sl_tx_timeout() and the null pointer dereference bug will happen. (Thread 1) | (Thread 2) | slip_close() | spin_lock_bh(&sl->lock) | ... ... | sl->tty = NULL //(1) sl_tx_timeout() | spin_unlock_bh(&sl->lock) spin_lock(&sl->lock); | ... | ... tty_chars_in_buffer(sl->tty)| if (tty->ops->..) //(2) | ... | synchronize_rcu() We set NULL to sl->tty in position (1) and dereference sl->tty in position (2). This patch adds check in sl_tx_timeout(). If sl->tty equals to NULL, sl_tx_timeout() will goto out. Signed-off-by: Duoming Zhou Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20220405132206.55291-1-duoming@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: gc1202 Signed-off-by: Ywenrui44091 --- drivers/net/slip/slip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index f81fb0b13a94..369bd30fed35 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -468,7 +468,7 @@ static void sl_tx_timeout(struct net_device *dev, unsigned int txqueue) spin_lock(&sl->lock); if (netif_queue_stopped(dev)) { - if (!netif_running(dev)) + if (!netif_running(dev) || !sl->tty) goto out; /* May be we must check transmitter timeout here ? -- Gitee From ed78c575d3981d537bb70260a6f0d3379c407ada Mon Sep 17 00:00:00 2001 From: Zhang Zhengming Date: Tue, 20 Jun 2023 19:55:52 +0800 Subject: [PATCH 178/243] relayfs: fix out-of-bounds access in relay_file_read stable inclusion from stable-v5.10.180 commit 1b0df44753bf9e45eaf5cee34f87597193f862e8 category: bugfix issue: #I7FM5F bugzilla: https://gitee.com/src-openeuler/kernel/issues/I7E5C1 CVE: CVE-2023-3268 Signed-off-by: Zhang jialin Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=1b0df44753bf9e45eaf5cee34f87597193f862e8 ---------------------------------------- commit 43ec16f1450f4936025a9bdf1a273affdb9732c1 upstream. There is a crash in relay_file_read, as the var from point to the end of last subbuf. The oops looks something like: pc : __arch_copy_to_user+0x180/0x310 lr : relay_file_read+0x20c/0x2c8 Call trace: __arch_copy_to_user+0x180/0x310 full_proxy_read+0x68/0x98 vfs_read+0xb0/0x1d0 ksys_read+0x6c/0xf0 __arm64_sys_read+0x20/0x28 el0_svc_common.constprop.3+0x84/0x108 do_el0_svc+0x74/0x90 el0_svc+0x1c/0x28 el0_sync_handler+0x88/0xb0 el0_sync+0x148/0x180 We get the condition by analyzing the vmcore: 1). The last produced byte and last consumed byte both at the end of the last subbuf 2). A softirq calls function(e.g __blk_add_trace) to write relay buffer occurs when an program is calling relay_file_read_avail(). relay_file_read relay_file_read_avail relay_file_read_consume(buf, 0, 0); //interrupted by softirq who will write subbuf .... return 1; //read_start point to the end of the last subbuf read_start = relay_file_read_start_pos //avail is equal to subsize avail = relay_file_read_subbuf_avail //from points to an invalid memory address from = buf->start + read_start //system is crashed copy_to_user(buffer, from, avail) Link: https://lkml.kernel.org/r/20230419040203.37676-1-zhang.zhengming@h3c.com Fixes: 8d62fdebdaf9 ("relay file read: start-pos fix") Signed-off-by: Zhang Zhengming Reviewed-by: Zhao Lei Reviewed-by: Zhou Kete Reviewed-by: Pengcheng Yang Cc: Jens Axboe Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: GONG, Ruiqi (cherry picked from commit 6b2322dbd44d1e023b3f0743d65f4a9dcf5a64f6) Signed-off-by: Ywenrui44091 --- kernel/relay.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/relay.c b/kernel/relay.c index b08d936d5fa7..9cae6bf2e66a 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -1077,7 +1077,8 @@ static size_t relay_file_read_start_pos(struct rchan_buf *buf) size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; size_t consumed = buf->subbufs_consumed % n_subbufs; - size_t read_pos = consumed * subbuf_size + buf->bytes_consumed; + size_t read_pos = (consumed * subbuf_size + buf->bytes_consumed) + % (n_subbufs * subbuf_size); read_subbuf = read_pos / subbuf_size; padding = buf->padding[read_subbuf]; -- Gitee From 986b8295b51a2491d068883f692cfb96f933a431 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Wed, 31 May 2023 18:28:04 +0800 Subject: [PATCH 179/243] net/sched: flower: fix possible OOB write in fl_set_geneve_opt() stable inclusion from stable-v5.10.183 commit 7c5c67aa294444b53f697dc3ddce61b33ff8badd category: bugfix issue:#I7FM6A CVE: CVE-2023-35788 Signed-off-by: wangjingjin --------------------------------------- [ Upstream commit 4d56304e5827c8cc8cc18c75343d283af7c4825c ] If we send two TCA_FLOWER_KEY_ENC_OPTS_GENEVE packets and their total size is 252 bytes(key->enc_opts.len = 252) then key->enc_opts.len = opt->length = data_len / 4 = 0 when the third TCA_FLOWER_KEY_ENC_OPTS_GENEVE packet enters fl_set_geneve_opt. This bypasses the next bounds check and results in an out-of-bounds. Fixes: 0a6e77784f49 ("net/sched: allow flower to match tunnel options") Signed-off-by: Hangyu Hua Reviewed-by: Simon Horman Reviewed-by: Pieter Jansen van Vuuren Link: https://lore.kernel.org/r/20230531102805.27090-1-hbh25y@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: wangjingjin Signed-off-by: Ywenrui44091 --- net/sched/cls_flower.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 8ff6945b9f8f..275122cc0b6d 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1080,6 +1080,9 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key, if (option_len > sizeof(struct geneve_opt)) data_len = option_len - sizeof(struct geneve_opt); + if (key->enc_opts.len > FLOW_DIS_TUN_OPTS_MAX - 4) + return -ERANGE; + opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len]; memset(opt, 0xff, option_len); opt->length = data_len / 4; -- Gitee From 511557ddc94c112ecdf7542be55b6db7176d9483 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 29 Jan 2023 16:17:40 +0100 Subject: [PATCH 180/243] fbcon: Check font dimension limits stable inclusion from stable-v5.10.168 commit 28d190882ba55cbcee1db8e4ae90c149178dcf64 category: bugfix issue:#I7FW72 CVE: CVE-2023-3161 Signed-off-by: wangjingjin --------------------------------------- commit 2b09d5d364986f724f17001ccfe4126b9b43a0be upstream. blit_x and blit_y are u32, so fbcon currently cannot support fonts larger than 32x32. The 32x32 case also needs shifting an unsigned int, to properly set bit 31, otherwise we get "UBSAN: shift-out-of-bounds in fbcon_set_font", as reported on: http://lore.kernel.org/all/IA1PR07MB98308653E259A6F2CE94A4AFABCE9@IA1PR07MB9830.namprd07.prod.outlook.com Kernel Branch: 6.2.0-rc5-next-20230124 Kernel config: https://drive.google.com/file/d/1F-LszDAizEEH0ZX0HcSR06v5q8FPl2Uv/view?usp=sharing Reproducer: https://drive.google.com/file/d/1mP1jcLBY7vWCNM60OMf-ogw-urQRjNrm/view?usp=sharing Reported-by: Sanan Hasanov Signed-off-by: Samuel Thibault Fixes: 2d2699d98492 ("fbcon: font setting should check limitation of driver") Cc: stable@vger.kernel.org Tested-by: Miko Larsson Reviewed-by: Greg Kroah-Hartman Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wangjingjin Signed-off-by: Ywenrui44091 --- drivers/video/fbdev/core/fbcon.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index a876b2a95343..da087eda79b2 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2469,9 +2469,12 @@ static int fbcon_set_font(struct vc_data *vc, struct console_font *font, h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres)) return -EINVAL; + if (font->width > 32 || font->height > 32) + return -EINVAL; + /* Make sure drawing engine can handle the font */ - if (!(info->pixmap.blit_x & (1 << (font->width - 1))) || - !(info->pixmap.blit_y & (1 << (font->height - 1)))) + if (!(info->pixmap.blit_x & BIT(font->width - 1)) || + !(info->pixmap.blit_y & BIT(font->height - 1))) return -EINVAL; /* Make sure driver can handle the font length */ -- Gitee From ecff6d5a254306a27678bbc6bbf91dd68edf6800 Mon Sep 17 00:00:00 2001 From: Xia Fukun Date: Wed, 28 Jun 2023 14:46:14 +0800 Subject: [PATCH 181/243] drm/msm/dpu: Add check for pstates stable inclusion from stable-v5.10.173 commit e9743b3052e125c44b555f07f2876a4bdccfd983 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I7F2UT CVE: CVE-2023-3220 Signed-off-by: Lin shengwang Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=e9743b3052e125c44b555f07f2876a4bdccfd983 -------------------------------- [ Upstream commit 93340e10b9c5fc86730d149636e0aa8b47bb5a34 ] As kzalloc may fail and return NULL pointer, it should be better to check pstates in order to avoid the NULL pointer dereference. Fixes: 25fdd5933e4c ("drm/msm: Add SDM845 DPU support") Signed-off-by: Jiasheng Jiang Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/514160/ Link: https://lore.kernel.org/r/20221206080236.43687-1-jiasheng@iscas.ac.cn Signed-off-by: Dmitry Baryshkov Signed-off-by: Sasha Levin Signed-off-by: Xia Fukun Signed-off-by: Ywenrui44091 --- drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index f56414a06ec4..9bc4a1cd9ac6 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -831,6 +831,8 @@ static int dpu_crtc_atomic_check(struct drm_crtc *crtc, struct drm_rect crtc_rect = { 0 }; pstates = kzalloc(sizeof(*pstates) * DPU_STAGE_MAX * 4, GFP_KERNEL); + if (!pstates) + return -ENOMEM; if (!state->enable || !state->active) { DPU_DEBUG("crtc%d -> enable %d, active %d, skip atomic_check\n", -- Gitee From fcdcdcdf4f9f0b528a177bd1f30dd6bdbf4fb5a9 Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Sat, 9 Apr 2022 13:12:41 +0900 Subject: [PATCH 182/243] firewire: fix potential uaf in outbound_phy_packet_callback() stable inclusion from stable-5.10.115 commit e757ff4bbc893bc030c2d10143091094da73b9ff category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: gaochao --------------------------------------- commit b7c81f80246fac44077166f3e07103affe6db8ff upstream. &e->event and e point to the same address, and &e->event could be freed in queue_event. So there is a potential uaf issue if we dereference e after calling queue_event(). Fix this by adding a temporary variable to maintain e->client in advance, this can avoid the potential uaf issue. Cc: Signed-off-by: Chengfeng Ye Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20220409041243.603210-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman Signed-off-by: gc1202 Signed-off-by: Ywenrui44091 --- drivers/firewire/core-cdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index fb6c651214f3..b0cc3f1e9bb0 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1480,6 +1480,7 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, { struct outbound_phy_packet_event *e = container_of(packet, struct outbound_phy_packet_event, p); + struct client *e_client; switch (status) { /* expected: */ @@ -1496,9 +1497,10 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, } e->phy_packet.data[0] = packet->timestamp; + e_client = e->client; queue_event(e->client, &e->event, &e->phy_packet, sizeof(e->phy_packet) + e->phy_packet.length, NULL, 0); - client_put(e->client); + client_put(e_client); } static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg) -- Gitee From 06d25e28f3c82a09120abb2f8461ea3ba7195c41 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Sat, 18 Mar 2023 16:50:23 +0800 Subject: [PATCH 183/243] media: saa7134: fix use after free bug in saa7134_finidev due to race condition stable inclusion from stable-v5.10.180 commit 7dac96e9cc985328ec1fae92f0c245f559dc0e11 category: bugfix issue:#I7G1FE CVE: CVE-2023-35823 Signed-off-by: wangjingjin --------------------------------------- [ Upstream commit 30cf57da176cca80f11df0d9b7f71581fe601389 ] In saa7134_initdev, it will call saa7134_hwinit1. There are three function invoking here: saa7134_video_init1, saa7134_ts_init1 and saa7134_vbi_init1. All of them will init a timer with same function. Take saa7134_video_init1 as an example. It'll bound &dev->video_q.timeout with saa7134_buffer_timeout. In buffer_activate, the timer funtcion is started. If we remove the module or device which will call saa7134_finidev to make cleanup, there may be a unfinished work. The possible sequence is as follows, which will cause a typical UAF bug. Fix it by canceling the timer works accordingly before cleanup in saa7134_finidev. CPU0 CPU1 |saa7134_buffer_timeout saa7134_finidev | kfree(dev); | | | saa7134_buffer_next | //use dev Fixes: 1e7126b4a86a ("media: saa7134: Convert timers to use timer_setup()") Signed-off-by: Zheng Wang Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin Signed-off-by: wangjingjin Signed-off-by: Ywenrui44091 --- drivers/media/pci/saa7134/saa7134-ts.c | 1 + drivers/media/pci/saa7134/saa7134-vbi.c | 1 + drivers/media/pci/saa7134/saa7134-video.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/media/pci/saa7134/saa7134-ts.c b/drivers/media/pci/saa7134/saa7134-ts.c index 6a5053126237..437dbe5e75e2 100644 --- a/drivers/media/pci/saa7134/saa7134-ts.c +++ b/drivers/media/pci/saa7134/saa7134-ts.c @@ -300,6 +300,7 @@ int saa7134_ts_start(struct saa7134_dev *dev) int saa7134_ts_fini(struct saa7134_dev *dev) { + del_timer_sync(&dev->ts_q.timeout); saa7134_pgtable_free(dev->pci, &dev->ts_q.pt); return 0; } diff --git a/drivers/media/pci/saa7134/saa7134-vbi.c b/drivers/media/pci/saa7134/saa7134-vbi.c index 3f0b0933eed6..3e773690468b 100644 --- a/drivers/media/pci/saa7134/saa7134-vbi.c +++ b/drivers/media/pci/saa7134/saa7134-vbi.c @@ -185,6 +185,7 @@ int saa7134_vbi_init1(struct saa7134_dev *dev) int saa7134_vbi_fini(struct saa7134_dev *dev) { /* nothing */ + del_timer_sync(&dev->vbi_q.timeout); return 0; } diff --git a/drivers/media/pci/saa7134/saa7134-video.c b/drivers/media/pci/saa7134/saa7134-video.c index 85d082baaadc..df9e3293015a 100644 --- a/drivers/media/pci/saa7134/saa7134-video.c +++ b/drivers/media/pci/saa7134/saa7134-video.c @@ -2153,6 +2153,7 @@ int saa7134_video_init1(struct saa7134_dev *dev) void saa7134_video_fini(struct saa7134_dev *dev) { + del_timer_sync(&dev->video_q.timeout); /* free stuff */ saa7134_pgtable_free(dev->pci, &dev->video_q.pt); saa7134_pgtable_free(dev->pci, &dev->vbi_q.pt); -- Gitee From fa5f2747ae3360e7bb5b3f14470d764802bb5304 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Wed, 28 Jun 2023 14:19:08 +0800 Subject: [PATCH 184/243] usb: gadget: udc: renesas_usb3: Fix use after free bug in renesas_usb3_remove due to race condition mainline inclusion from mainline-v6.4-rc1 commit 2b947f8769be8b8181dc795fd292d3e7120f5204 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I7EDYS CVE: CVE-2023-35828 Signed-off-by: Jialin Zhang Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2b947f8769be8b8181dc795fd292d3e7120f5204 -------------------------------- In renesas_usb3_probe, role_work is bound with renesas_usb3_role_work. renesas_usb3_start will be called to start the work. If we remove the driver which will call usbhs_remove, there may be an unfinished work. The possible sequence is as follows: CPU0 CPU1 renesas_usb3_role_work renesas_usb3_remove usb_role_switch_unregister device_unregister kfree(sw) //free usb3->role_sw usb_role_switch_set_role //use usb3->role_sw The usb3->role_sw could be freed under such circumstance and then used in usb_role_switch_set_role. This bug was found by static analysis. And note that removing a driver is a root-only operation, and should never happen in normal case. But the root user may directly remove the device which will also trigger the remove function. Fix it by canceling the work before cleanup in the renesas_usb3_remove. Fixes: 39facfa01c9f ("usb: gadget: udc: renesas_usb3: Add register of usb role switch") Signed-off-by: Zheng Wang Reviewed-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20230320062931.505170-1-zyytlz.wz@163.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jialin Zhang Signed-off-by: Ywenrui44091 --- drivers/usb/gadget/udc/renesas_usb3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 57d417a7c3e0..ba8b0f10a1d2 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -2566,6 +2566,7 @@ static int renesas_usb3_remove(struct platform_device *pdev) debugfs_remove_recursive(usb3->dentry); device_remove_file(&pdev->dev, &dev_attr_role); + cancel_work_sync(&usb3->role_work); usb_role_switch_unregister(usb3->role_sw); usb_del_gadget_udc(&usb3->gadget); -- Gitee From 74901906c69d814857bc278dcad1390701034ab9 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Sat, 18 Mar 2023 16:15:06 +0800 Subject: [PATCH 185/243] media: dm1105: Fix use after free bug in dm1105_remove due to race condition stable inclusion from stable-v5.10.180 commit e9d64e90a0ada4d00ac6562e351ef10ae7d9b911 category: bugfix issue:#I7G6D8 CVE: CVE-2023-35824 Signed-off-by: wangjingjin --------------------------------------- [ Upstream commit 5abda7a16698d4d1f47af1168d8fa2c640116b4a ] In dm1105_probe, it called dm1105_ir_init and bound &dm1105->ir.work with dm1105_emit_key. When it handles IRQ request with dm1105_irq, it may call schedule_work to start the work. When we call dm1105_remove to remove the driver, there may be a sequence as follows: Fix it by finishing the work before cleanup in dm1105_remove CPU0 CPU1 |dm1105_emit_key dm1105_remove | dm1105_ir_exit | rc_unregister_device | rc_free_device | rc_dev_release | kfree(dev); | | | rc_keydown | //use Fixes: 34d2f9bf189c ("V4L/DVB: dm1105: use dm1105_dev & dev instead of dm1105dvb") Signed-off-by: Zheng Wang Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin Signed-off-by: wangjingjin Signed-off-by: Ywenrui44091 --- drivers/media/pci/dm1105/dm1105.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/pci/dm1105/dm1105.c b/drivers/media/pci/dm1105/dm1105.c index 9dce31d2b525..d2e194a24e7e 100644 --- a/drivers/media/pci/dm1105/dm1105.c +++ b/drivers/media/pci/dm1105/dm1105.c @@ -1178,6 +1178,7 @@ static void dm1105_remove(struct pci_dev *pdev) struct dvb_demux *dvbdemux = &dev->demux; struct dmx_demux *dmx = &dvbdemux->dmx; + cancel_work_sync(&dev->ir.work); dm1105_ir_exit(dev); dmx->close(dmx); dvb_net_release(&dev->dvbnet); -- Gitee From 732df51f3d07561b6788189dff95a7d22d6bbaf8 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Sun, 25 Jun 2023 12:15:52 +0800 Subject: [PATCH 186/243] media: rkvdec: fix use after free bug in rkvdec_remove stable inclusion from stable-v5.10.180 commit de19d02d734ef29f5dbd2c12fe810fa960ecd83f category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I7EDZ3 CVE: CVE-2023-35829 Signed-off-by: Zhang Xiaoxu Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=de19d02d734ef29f5dbd2c12fe810fa960ecd83f -------------------------------- [ Upstream commit 3228cec23b8b29215e18090c6ba635840190993d ] In rkvdec_probe, rkvdec->watchdog_work is bound with rkvdec_watchdog_func. Then rkvdec_vp9_run may be called to start the work. If we remove the module which will call rkvdec_remove to make cleanup, there may be a unfinished work. The possible sequence is as follows, which will cause a typical UAF bug. Fix it by canceling the work before cleanup in rkvdec_remove. CPU0 CPU1 |rkvdec_watchdog_func rkvdec_remove | rkvdec_v4l2_cleanup| v4l2_m2m_release | kfree(m2m_dev); | | | v4l2_m2m_get_curr_priv | m2m_dev->curr_ctx //use Fixes: cd33c830448b ("media: rkvdec: Add the rkvdec driver") Signed-off-by: Zheng Wang Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: Zhang Xiaoxu (cherry picked from commit df1542ddcdaeaf49815717f02398f22c965c46d6) Signed-off-by: Yaowenrui Signed-off-by: Ywenrui44091 --- drivers/staging/media/rkvdec/rkvdec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c index a7788e7a9542..0a6a6ea23e5e 100644 --- a/drivers/staging/media/rkvdec/rkvdec.c +++ b/drivers/staging/media/rkvdec/rkvdec.c @@ -1079,6 +1079,8 @@ static int rkvdec_remove(struct platform_device *pdev) { struct rkvdec_dev *rkvdec = platform_get_drvdata(pdev); + cancel_delayed_work_sync(&rkvdec->watchdog_work); + rkvdec_v4l2_cleanup(rkvdec); pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); -- Gitee From 35d6e1873d772a2c9e2a39e924c5f334e561f0c4 Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Wed, 8 Mar 2023 00:43:38 +0800 Subject: [PATCH 187/243] memstick: r592: Fix UAF bug in r592_remove due to race condition stable inclusion from stable-v5.10.181~176 commit 5c23f6da62f71ebfeda6ea3960982ccd926ebb09 category: bugfix issue: #I7D510 CVE:CVE-2023-3141 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit 63264422785021704c39b38f65a78ab9e4a186d7 ] In r592_probe, dev->detect_timer was bound with r592_detect_timer. In r592_irq function, the timer function will be invoked by mod_timer. If we remove the module which will call hantro_release to make cleanup, there may be a unfinished work. The possible sequence is as follows, which will cause a typical UAF bug. Fix it by canceling the work before cleanup in r592_remove. CPU0 CPU1 |r592_detect_timer r592_remove | memstick_free_host| put_device; | kfree(host); | | | queue_work | &host->media_checker //use Signed-off-by: Zheng Wang Link: https://lore.kernel.org/r/20230307164338.1246287-1-zyytlz.wz@163.com Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- drivers/memstick/host/r592.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index eaa2a94d18be..dd06c18495eb 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -828,7 +828,7 @@ static void r592_remove(struct pci_dev *pdev) /* Stop the processing thread. That ensures that we won't take any more requests */ kthread_stop(dev->io_thread); - + del_timer_sync(&dev->detect_timer); r592_enable_device(dev, false); while (!error && dev->req) { -- Gitee From 6dbbb970f4590bc83c7d4ae4c688e9c7fb9b04a9 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Fri, 28 Apr 2023 12:07:46 -0400 Subject: [PATCH 188/243] gfs2: Don't deref jdesc in evict stable inclusion from stable-v5.10.183 commit d03d31d3a206093b9b8759dddf0ba9bd843606ba category: bugfix issue:# I7FM2U CVE: CVE-2023-3212 Signed-off-by: wangjingjin --------------------------------------- [ Upstream commit 504a10d9e46bc37b23d0a1ae2f28973c8516e636 ] On corrupt gfs2 file systems the evict code can try to reference the journal descriptor structure, jdesc, after it has been freed and set to NULL. The sequence of events is: init_journal() ... fail_jindex: gfs2_jindex_free(sdp); <------frees journals, sets jdesc = NULL if (gfs2_holder_initialized(&ji_gh)) gfs2_glock_dq_uninit(&ji_gh); fail: iput(sdp->sd_jindex); <--references jdesc in evict_linked_inode evict() gfs2_evict_inode() evict_linked_inode() ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); <------references the now freed/zeroed sd_jdesc pointer. The call to gfs2_trans_begin is done because the truncate_inode_pages call can cause gfs2 events that require a transaction, such as removing journaled data (jdata) blocks from the journal. This patch fixes the problem by adding a check for sdp->sd_jdesc to function gfs2_evict_inode. In theory, this should only happen to corrupt gfs2 file systems, when gfs2 detects the problem, reports it, then tries to evict all the system inodes it has read in up to that point. Reported-by: Yang Lan Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher Signed-off-by: Sasha Levin Signed-off-by: wangjingjin Signed-off-by: Ywenrui44091 --- fs/gfs2/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index d2b7ecbd1b15..52fc4401bed9 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1412,6 +1412,14 @@ static void gfs2_evict_inode(struct inode *inode) if (inode->i_nlink || sb_rdonly(sb)) goto out; + /* + * In case of an incomplete mount, gfs2_evict_inode() may be called for + * system files without having an active journal to write to. In that + * case, skip the filesystem evict. + */ + if (!sdp->sd_jdesc) + goto out; + gfs2_holder_mark_uninitialized(&gh); ret = evict_should_delete(inode, &gh); if (ret == SHOULD_DEFER_EVICTION) -- Gitee From 9a526279f20ff44a3155be7566a422f74b43ad85 Mon Sep 17 00:00:00 2001 From: Ruihan Li Date: Sun, 16 Apr 2023 16:14:04 +0800 Subject: [PATCH 189/243] bluetooth: Perform careful capability checks in hci_sock_ioctl() mainline inclusion from mainline-v6.4-rc1~77^2~46^2~8 commit 25c150ac103a4ebeed0319994c742a90634ddf18 category: bugfix issue: #I6X3GE CVE: CVE-2023-2002 Signed-off-by: Ywenrui44091 --------------------------------------- Previously, capability was checked using capable(), which verified that the caller of the ioctl system call had the required capability. In addition, the result of the check would be stored in the HCI_SOCK_TRUSTED flag, making it persistent for the socket. However, malicious programs can abuse this approach by deliberately sharing an HCI socket with a privileged task. The HCI socket will be marked as trusted when the privileged task occasionally makes an ioctl call. This problem can be solved by using sk_capable() to check capability, which ensures that not only the current task but also the socket opener has the specified capability, thus reducing the risk of privilege escalation through the previously identified vulnerability. Cc: stable@vger.kernel.org Fixes: f81f5b2db869 ("Bluetooth: Send control open and close messages for HCI raw sockets") Signed-off-by: Ruihan Li Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- net/bluetooth/hci_sock.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 53f85d7c5f9e..e3318678870f 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1000,7 +1000,14 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, if (hci_sock_gen_cookie(sk)) { struct sk_buff *skb; - if (capable(CAP_NET_ADMIN)) + /* Perform careful checks before setting the HCI_SOCK_TRUSTED + * flag. Make sure that not only the current task but also + * the socket opener has the required capability, since + * privileged programs can be tricked into making ioctl calls + * on HCI sockets, and the socket should not be marked as + * trusted simply because the ioctl caller is privileged. + */ + if (sk_capable(sk, CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); /* Send event to monitor */ -- Gitee From f82f6f6641ad1f53576b5f5445db0a19a3b9b16e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 22 Feb 2023 09:52:32 -0800 Subject: [PATCH 190/243] bpf: add missing header file include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.170~10 commit 3b6ce54cfa2c04f0636fd0c985913af8703b408d category: bugfix issue: #I6ZJ2L CVE: CVE-2023-0459 Signed-off-by: Ywenrui44091 --------------------------------------- commit f3dd0c53370e70c0f9b7e931bbec12916f3bb8cc upstream. Commit 74e19ef0ff80 ("uaccess: Add speculation barrier to copy_from_user()") built fine on x86-64 and arm64, and that's the extent of my local build testing. It turns out those got the include incidentally through other header files ( in particular), but that was not true of other architectures, resulting in build errors kernel/bpf/core.c: In function ‘___bpf_prog_run’: kernel/bpf/core.c:1913:3: error: implicit declaration of function ‘barrier_nospec’ so just make sure to explicitly include the proper header file to make everybody see it. Fixes: 74e19ef0ff80 ("uaccess: Add speculation barrier to copy_from_user()") Reported-by: kernel test robot Reported-by: Viresh Kumar Reported-by: Huacai Chen Tested-by: Geert Uytterhoeven Tested-by: Dave Hansen Acked-by: Alexei Starovoitov Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- kernel/bpf/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d314c4c686c4..49a4052872dc 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include -- Gitee From 2ef62e1ef6856162adada4482032ec5192c25181 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 4 May 2023 12:15:25 +0000 Subject: [PATCH 191/243] ext4: avoid a potential slab-out-of-bounds in ext4_group_desc_csum stable inclusion from stable-v5.10.180~23 commit 0dde3141c527b09b96bef1e7eeb18b8127810ce9 category: bugfix issue: #I79YE0 CVE:CVE-2023-34256 Signed-off-by: Ywenrui44091 --------------------------------------- commit 4f04351888a83e595571de672e0a4a8b74f4fb31 upstream. When modifying the block device while it is mounted by the filesystem, syzbot reported the following: BUG: KASAN: slab-out-of-bounds in crc16+0x206/0x280 lib/crc16.c:58 Read of size 1 at addr ffff888075f5c0a8 by task syz-executor.2/15586 CPU: 1 PID: 15586 Comm: syz-executor.2 Not tainted 6.2.0-rc5-syzkaller-00205-gc96618275234 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/12/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1b1/0x290 lib/dump_stack.c:106 print_address_description+0x74/0x340 mm/kasan/report.c:306 print_report+0x107/0x1f0 mm/kasan/report.c:417 kasan_report+0xcd/0x100 mm/kasan/report.c:517 crc16+0x206/0x280 lib/crc16.c:58 ext4_group_desc_csum+0x81b/0xb20 fs/ext4/super.c:3187 ext4_group_desc_csum_set+0x195/0x230 fs/ext4/super.c:3210 ext4_mb_clear_bb fs/ext4/mballoc.c:6027 [inline] ext4_free_blocks+0x191a/0x2810 fs/ext4/mballoc.c:6173 ext4_remove_blocks fs/ext4/extents.c:2527 [inline] ext4_ext_rm_leaf fs/ext4/extents.c:2710 [inline] ext4_ext_remove_space+0x24ef/0x46a0 fs/ext4/extents.c:2958 ext4_ext_truncate+0x177/0x220 fs/ext4/extents.c:4416 ext4_truncate+0xa6a/0xea0 fs/ext4/inode.c:4342 ext4_setattr+0x10c8/0x1930 fs/ext4/inode.c:5622 notify_change+0xe50/0x1100 fs/attr.c:482 do_truncate+0x200/0x2f0 fs/open.c:65 handle_truncate fs/namei.c:3216 [inline] do_open fs/namei.c:3561 [inline] path_openat+0x272b/0x2dd0 fs/namei.c:3714 do_filp_open+0x264/0x4f0 fs/namei.c:3741 do_sys_openat2+0x124/0x4e0 fs/open.c:1310 do_sys_open fs/open.c:1326 [inline] __do_sys_creat fs/open.c:1402 [inline] __se_sys_creat fs/open.c:1396 [inline] __x64_sys_creat+0x11f/0x160 fs/open.c:1396 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f72f8a8c0c9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 f1 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f72f97e3168 EFLAGS: 00000246 ORIG_RAX: 0000000000000055 RAX: ffffffffffffffda RBX: 00007f72f8bac050 RCX: 00007f72f8a8c0c9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000280 RBP: 00007f72f8ae7ae9 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffd165348bf R14: 00007f72f97e3300 R15: 0000000000022000 Replace le16_to_cpu(sbi->s_es->s_desc_size) with sbi->s_desc_size It reduces ext4's compiled text size, and makes the code more efficient (we remove an extra indirect reference and a potential byte swap on big endian systems), and there is no downside. It also avoids the potential KASAN / syzkaller failure, as a bonus. Reported-by: syzbot+fc51227e7100c9294894@syzkaller.appspotmail.com Reported-by: syzbot+8785e41224a3afd04321@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=70d28d11ab14bd7938f3e088365252aa923cff42 Link: https://syzkaller.appspot.com/bug?id=b85721b38583ecc6b5e72ff524c67302abbc30f3 Link: https://lore.kernel.org/all/000000000000ece18705f3b20934@google.com/ Fixes: 717d50e4971b ("Ext4: Uninitialized Block Groups") Cc: stable@vger.kernel.org Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20230504121525.3275886-1-tudor.ambarus@linaro.org Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ywenrui44091 Signed-off-by: Ywenrui44091 --- fs/ext4/super.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 56bd2ea08090..7094f387c8da 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2808,11 +2808,9 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group, crc = crc16(crc, (__u8 *)gdp, offset); offset += sizeof(gdp->bg_checksum); /* skip checksum */ /* for checksum of struct ext4_group_desc do the rest...*/ - if (ext4_has_feature_64bit(sb) && - offset < le16_to_cpu(sbi->s_es->s_desc_size)) + if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size) crc = crc16(crc, (__u8 *)gdp + offset, - le16_to_cpu(sbi->s_es->s_desc_size) - - offset); + sbi->s_desc_size - offset); out: return cpu_to_le16(crc); -- Gitee From ad4a2e04da74979d08eca95ae9992c2ba3632722 Mon Sep 17 00:00:00 2001 From: George Kennedy Date: Tue, 24 Jan 2023 11:16:54 -0500 Subject: [PATCH 192/243] vc_screen: move load of struct vc_data pointer in vcs_read() to avoid UAF stable inclusion from stable-v5.10.168~86 commit 55515d7d8743b71b80bfe68e89eb9d92630626ab category: bugfix issue:#I7NZ47 CVE: CVE-2023-3567 Signed-off-by: Ywenrui --------------------------- [ Upstream commit 226fae124b2dac217ea5436060d623ff3385bc34 ] After a call to console_unlock() in vcs_read() the vc_data struct can be freed by vc_deallocate(). Because of that, the struct vc_data pointer load must be done at the top of while loop in vcs_read() to avoid a UAF when vcs_size() is called. Syzkaller reported a UAF in vcs_size(). BUG: KASAN: use-after-free in vcs_size (drivers/tty/vt/vc_screen.c:215) Read of size 4 at addr ffff8881137479a8 by task 4a005ed81e27e65/1537 CPU: 0 PID: 1537 Comm: 4a005ed81e27e65 Not tainted 6.2.0-rc5 #1 Hardware name: Red Hat KVM, BIOS 1.15.0-2.module Call Trace: __asan_report_load4_noabort (mm/kasan/report_generic.c:350) vcs_size (drivers/tty/vt/vc_screen.c:215) vcs_read (drivers/tty/vt/vc_screen.c:415) vfs_read (fs/read_write.c:468 fs/read_write.c:450) ... Allocated by task 1191: ... kmalloc_trace (mm/slab_common.c:1069) vc_allocate (./include/linux/slab.h:580 ./include/linux/slab.h:720 drivers/tty/vt/vt.c:1128 drivers/tty/vt/vt.c:1108) con_install (drivers/tty/vt/vt.c:3383) tty_init_dev (drivers/tty/tty_io.c:1301 drivers/tty/tty_io.c:1413 drivers/tty/tty_io.c:1390) tty_open (drivers/tty/tty_io.c:2080 drivers/tty/tty_io.c:2126) chrdev_open (fs/char_dev.c:415) do_dentry_open (fs/open.c:883) vfs_open (fs/open.c:1014) ... Freed by task 1548: ... kfree (mm/slab_common.c:1021) vc_port_destruct (drivers/tty/vt/vt.c:1094) tty_port_destructor (drivers/tty/tty_port.c:296) tty_port_put (drivers/tty/tty_port.c:312) vt_disallocate_all (drivers/tty/vt/vt_ioctl.c:662 (discriminator 2)) vt_ioctl (drivers/tty/vt/vt_ioctl.c:903) tty_ioctl (drivers/tty/tty_io.c:2776) ... The buggy address belongs to the object at ffff888113747800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 424 bytes inside of 1024-byte region [ffff888113747800, ffff888113747c00) The buggy address belongs to the physical page: page:00000000b3fe6c7c refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x113740 head:00000000b3fe6c7c order:3 compound_mapcount:0 subpages_mapcount:0 compound_pincount:0 anon flags: 0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff) raw: 0017ffffc0010200 ffff888100042dc0 0000000000000000 dead000000000001 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888113747880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888113747900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb > ffff888113747980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888113747a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888113747a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Disabling lock debugging due to kernel taint Fixes: ac751efa6a0d ("console: rename acquire/release_console_sem() to console_lock/unlock()") Reported-by: syzkaller Suggested-by: Jiri Slaby Signed-off-by: George Kennedy Link: https://lore.kernel.org/r/1674577014-12374-1-git-send-email-george.kennedy@oracle.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: Ywenrui44091 Signed-off-by: wanxiaoqing --- drivers/tty/vt/vc_screen.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index 1850bacdb5b0..f566eb1839dc 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -386,10 +386,6 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) uni_mode = use_unicode(inode); attr = use_attributes(inode); - ret = -ENXIO; - vc = vcs_vc(inode, &viewed); - if (!vc) - goto unlock_out; ret = -EINVAL; if (pos < 0) @@ -407,6 +403,11 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) unsigned int this_round, skip = 0; int size; + ret = -ENXIO; + vc = vcs_vc(inode, &viewed); + if (!vc) + goto unlock_out; + /* Check whether we are above size each round, * as copy_to_user at the end of this loop * could sleep. -- Gitee From 654998a8914ab1629c37950352b50906a140fdfd Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sat, 29 Oct 2022 17:41:01 +0800 Subject: [PATCH 193/243] net: tun: fix bugs for oversize packet when napi frags enabled stable inclusion from stable-5.10.154 commit 3583826b443a63681deaa855048d3f2b742af47e category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 363a5328f4b0517e59572118ccfb7c626d81dca9 ] Recently, we got two syzkaller problems because of oversize packet when napi frags enabled. One of the problems is because the first seg size of the iov_iter from user space is very big, it is 2147479538 which is bigger than the threshold value for bail out early in __alloc_pages(). And skb->pfmemalloc is true, __kmalloc_reserve() would use pfmemalloc reserves without __GFP_NOWARN flag. Thus we got a warning as following: ======================================================== WARNING: CPU: 1 PID: 17965 at mm/page_alloc.c:5295 __alloc_pages+0x1308/0x16c4 mm/page_alloc.c:5295 ... Call trace: __alloc_pages+0x1308/0x16c4 mm/page_alloc.c:5295 __alloc_pages_node include/linux/gfp.h:550 [inline] alloc_pages_node include/linux/gfp.h:564 [inline] kmalloc_large_node+0x94/0x350 mm/slub.c:4038 __kmalloc_node_track_caller+0x620/0x8e4 mm/slub.c:4545 __kmalloc_reserve.constprop.0+0x1e4/0x2b0 net/core/skbuff.c:151 pskb_expand_head+0x130/0x8b0 net/core/skbuff.c:1654 __skb_grow include/linux/skbuff.h:2779 [inline] tun_napi_alloc_frags+0x144/0x610 drivers/net/tun.c:1477 tun_get_user+0x31c/0x2010 drivers/net/tun.c:1835 tun_chr_write_iter+0x98/0x100 drivers/net/tun.c:2036 The other problem is because odd IPv6 packets without NEXTHDR_NONE extension header and have big packet length, it is 2127925 which is bigger than ETH_MAX_MTU(65535). After ipv6_gso_pull_exthdrs() in ipv6_gro_receive(), network_header offset and transport_header offset are all bigger than U16_MAX. That would trigger skb->network_header and skb->transport_header overflow error, because they are all '__u16' type. Eventually, it would affect the value for __skb_push(skb, value), and make it be a big value. After __skb_push() in ipv6_gro_receive(), skb->data would less than skb->head, an out of bounds memory bug occurred. That would trigger the problem as following: ================================================================== BUG: KASAN: use-after-free in eth_type_trans+0x100/0x260 ... Call trace: dump_backtrace+0xd8/0x130 show_stack+0x1c/0x50 dump_stack_lvl+0x64/0x7c print_address_description.constprop.0+0xbc/0x2e8 print_report+0x100/0x1e4 kasan_report+0x80/0x120 __asan_load8+0x78/0xa0 eth_type_trans+0x100/0x260 napi_gro_frags+0x164/0x550 tun_get_user+0xda4/0x1270 tun_chr_write_iter+0x74/0x130 do_iter_readv_writev+0x130/0x1ec do_iter_write+0xbc/0x1e0 vfs_writev+0x13c/0x26c To fix the problems, restrict the packet size less than (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN) which has considered reserved skb space in napi_alloc_skb() because transport_header is an offset from skb->head. Add len check in tun_napi_alloc_frags() simply. Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Ziyang Xuan Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20221029094101.1653855-1-william.xuanziyang@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/net/tun.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3a292dc5b3a4..e44c3c24828c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1464,7 +1464,8 @@ static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, int err; int i; - if (it->nr_segs > MAX_SKB_FRAGS + 1) + if (it->nr_segs > MAX_SKB_FRAGS + 1 || + len > (ETH_MAX_MTU - NET_SKB_PAD - NET_IP_ALIGN)) return ERR_PTR(-EMSGSIZE); local_bh_disable(); -- Gitee From eccfb6e50207b5830b468809983d1625553937ec Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 13 Jul 2023 21:59:37 +0900 Subject: [PATCH 194/243] exfat: check if filename entries exceeds max filename length mainline inclusion from mainline-v6.5-rc5 commit d42334578eba1390859012ebb91e1e556d51db49 category: bugfix issue:#I7P7V8 CVE: CVE-2023-4273 Signed-off-by: Yao wenrui -------------------------------- exfat_extract_uni_name copies characters from a given file name entry into the 'uniname' variable. This variable is actually defined on the stack of the exfat_readdir() function. According to the definition of the 'exfat_uni_name' type, the file name should be limited 255 characters (+ null teminator space), but the exfat_get_uniname_from_ext_entry() function can write more characters because there is no check if filename entries exceeds max filename length. This patch add the check not to copy filename characters when exceeding max filename length. Cc: stable@vger.kernel.org Cc: Yuezhang Mo Reported-by: Maxim Suhanov Reviewed-by: Sungjong Seo Signed-off-by: Namjae Jeon Signed-off-by: wanxiaoqing --- fs/exfat/dir.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c index dedbc55cd48f..0d736bf97146 100644 --- a/fs/exfat/dir.c +++ b/fs/exfat/dir.c @@ -33,6 +33,7 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, { int i; struct exfat_entry_set_cache *es; + unsigned int uni_len = 0, len; es = exfat_get_dentry_set(sb, p_dir, entry, ES_ALL_ENTRIES); if (!es) @@ -51,7 +52,10 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb, if (exfat_get_entry_type(ep) != TYPE_EXTEND) break; - exfat_extract_uni_name(ep, uniname); + len = exfat_extract_uni_name(ep, uniname); + uni_len += len; + if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH) + break; uniname += EXFAT_FILE_NAME_LEN; } @@ -1026,7 +1030,8 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei, if (entry_type == TYPE_EXTEND) { unsigned short entry_uniname[16], unichar; - if (step != DIRENT_STEP_NAME) { + if (step != DIRENT_STEP_NAME || + name_len >= MAX_NAME_LENGTH) { step = DIRENT_STEP_FILE; continue; } -- Gitee From a90c296763570462f26b8e35f419a8855e8c627e Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 31 Jul 2023 18:42:36 +0200 Subject: [PATCH 195/243] net: tun_chr_open(): set sk_uid from current_fsuid() mainline inclusion from mainline-v6.5-rc5 commit 9bc3047374d5bec163e83e743709e23753376f0c category: bugfix issue:#I7P7V8 CVE: CVE-2023-4194 Signed-off-by: Yao wenrui -------------------------------- Commit a096ccca6e50 initializes the "sk_uid" field in the protocol socket (struct sock) from the "/dev/net/tun" device node's owner UID. Per original commit 86741ec25462 ("net: core: Add a UID field to struct sock.", 2016-11-04), that's wrong: the idea is to cache the UID of the userspace process that creates the socket. Commit 86741ec25462 mentions socket() and accept(); with "tun", the action that creates the socket is open("/dev/net/tun"). Therefore the device node's owner UID is irrelevant. In most cases, "/dev/net/tun" will be owned by root, so in practice, commit a096ccca6e50 has no observable effect: - before, "sk_uid" would be zero, due to undefined behavior (CVE-2023-1076), - after, "sk_uid" would be zero, due to "/dev/net/tun" being owned by root. What matters is the (fs)UID of the process performing the open(), so cache that in "sk_uid". Cc: Eric Dumazet Cc: Lorenzo Colitti Cc: Paolo Abeni Cc: Pietro Borrello Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Fixes: a096ccca6e50 ("tun: tun_chr_open(): correctly initialize socket uid") Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173435 Signed-off-by: Laszlo Ersek Signed-off-by: David S. Miller Signed-off-by: wanxiaoqing --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e44c3c24828c..118fea8f1715 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3435,7 +3435,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) tfile->socket.file = file; tfile->socket.ops = &tun_socket_ops; - sock_init_data_uid(&tfile->socket, &tfile->sk, inode->i_uid); + sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid()); tfile->sk.sk_write_space = tun_sock_write_space; tfile->sk.sk_sndbuf = INT_MAX; -- Gitee From 3fc9fb8fc47e67e256c79d540429dd91acf4d8ea Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Mon, 31 Jul 2023 18:42:37 +0200 Subject: [PATCH 196/243] net: tap_open(): set sk_uid from current_fsuid() mainline inclusion from mainline-v6.5-rc5 commit 5c9241f3ceab3257abe2923a59950db0dc8bb737 category: bugfix issue:#I7P7V8 CVE: CVE-2023-4194 Signed-off-by: Yao wenrui -------------------------------- Commit 66b2c338adce initializes the "sk_uid" field in the protocol socket (struct sock) from the "/dev/tapX" device node's owner UID. Per original commit 86741ec25462 ("net: core: Add a UID field to struct sock.", 2016-11-04), that's wrong: the idea is to cache the UID of the userspace process that creates the socket. Commit 86741ec25462 mentions socket() and accept(); with "tap", the action that creates the socket is open("/dev/tapX"). Therefore the device node's owner UID is irrelevant. In most cases, "/dev/tapX" will be owned by root, so in practice, commit 66b2c338adce has no observable effect: - before, "sk_uid" would be zero, due to undefined behavior (CVE-2023-1076), - after, "sk_uid" would be zero, due to "/dev/tapX" being owned by root. What matters is the (fs)UID of the process performing the open(), so cache that in "sk_uid". Cc: Eric Dumazet Cc: Lorenzo Colitti Cc: Paolo Abeni Cc: Pietro Borrello Cc: netdev@vger.kernel.org Cc: stable@vger.kernel.org Fixes: 66b2c338adce ("tap: tap_open(): correctly initialize socket uid") Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173435 Signed-off-by: Laszlo Ersek Signed-off-by: David S. Miller Signed-off-by: wanxiaoqing --- drivers/net/tap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index e8a7d56befb2..7fc872659b62 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -523,7 +523,7 @@ static int tap_open(struct inode *inode, struct file *file) q->sock.state = SS_CONNECTED; q->sock.file = file; q->sock.ops = &tap_socket_ops; - sock_init_data_uid(&q->sock, &q->sk, inode->i_uid); + sock_init_data_uid(&q->sock, &q->sk, current_fsuid()); q->sk.sk_write_space = tap_sock_write_space; q->sk.sk_destruct = tap_sock_destruct; q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; -- Gitee From e4fbbde45005678128429425d6f0791d7c5bb927 Mon Sep 17 00:00:00 2001 From: Zixuan Fu Date: Sat, 14 May 2022 13:07:11 +0800 Subject: [PATCH 197/243] net: vmxnet3: fix possible NULL pointer dereference in vmxnet3_rq_cleanup() stable inclusion from stable-5.10.118 commit 6e2caee5cddc3d9e0ad0484c9c21b9f10676c044 category: bugfix issue: #I7VS48 CVE: CVE-2023-4459 Signed-off-by: gaochao --------------------------------------- [ Upstream commit edf410cb74dc612fd47ef5be319c5a0bcd6e6ccd ] In vmxnet3_rq_create(), when dma_alloc_coherent() fails, vmxnet3_rq_destroy() is called. It sets rq->rx_ring[i].base to NULL. Then vmxnet3_rq_create() returns an error to its callers mxnet3_rq_create_all() -> vmxnet3_change_mtu(). Then vmxnet3_change_mtu() calls vmxnet3_force_close() -> dev_close() in error handling code. And the driver calls vmxnet3_close() -> vmxnet3_quiesce_dev() -> vmxnet3_rq_cleanup_all() -> vmxnet3_rq_cleanup(). In vmxnet3_rq_cleanup(), rq->rx_ring[ring_idx].base is accessed, but this variable is NULL, causing a NULL pointer dereference. To fix this possible bug, an if statement is added to check whether rq->rx_ring[0].base is NULL in vmxnet3_rq_cleanup() and exit early if so. The error log in our fault-injection testing is shown as follows: [ 65.220135] BUG: kernel NULL pointer dereference, address: 0000000000000008 ... [ 65.222633] RIP: 0010:vmxnet3_rq_cleanup_all+0x396/0x4e0 [vmxnet3] ... [ 65.227977] Call Trace: ... [ 65.228262] vmxnet3_quiesce_dev+0x80f/0x8a0 [vmxnet3] [ 65.228580] vmxnet3_close+0x2c4/0x3f0 [vmxnet3] [ 65.228866] __dev_close_many+0x288/0x350 [ 65.229607] dev_close_many+0xa4/0x480 [ 65.231124] dev_close+0x138/0x230 [ 65.231933] vmxnet3_force_close+0x1f0/0x240 [vmxnet3] [ 65.232248] vmxnet3_change_mtu+0x75d/0x920 [vmxnet3] ... Fixes: d1a890fa37f27 ("net: VMware virtual Ethernet NIC driver: vmxnet3") Reported-by: TOTE Robot Signed-off-by: Zixuan Fu Link: https://lore.kernel.org/r/20220514050711.2636709-1-r33s3n6@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: gc1202 Signed-off-by: wanxiaoqing Signed-off-by: Yu Liao --- drivers/net/vmxnet3/vmxnet3_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 932a39945cc6..7b1d483299ba 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1654,6 +1654,10 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq, u32 i, ring_idx; struct Vmxnet3_RxDesc *rxd; + /* ring has already been cleaned up */ + if (!rq->rx_ring[0].base) + return; + for (ring_idx = 0; ring_idx < 2; ring_idx++) { for (i = 0; i < rq->rx_ring[ring_idx].size; i++) { #ifdef __BIG_ENDIAN_BITFIELD -- Gitee From 73a5d15e87c184befd7e87537cb3b2078ca95c4a Mon Sep 17 00:00:00 2001 From: Zixuan Fu Date: Sat, 14 May 2022 13:06:56 +0800 Subject: [PATCH 198/243] net: vmxnet3: fix possible use-after-free bugs in vmxnet3_rq_alloc_rx_buf() stable inclusion from stable-5.10.118 commit a54d86cf418427584e0a3cd1e89f757c92df5e89 category: bugfix issue: #I7V74X CVE: CVE-2023-4387 Signed-off-by: gaochao --------------------------------------- [ Upstream commit 9e7fef9521e73ca8afd7da9e58c14654b02dfad8 ] In vmxnet3_rq_alloc_rx_buf(), when dma_map_single() fails, rbi->skb is freed immediately. Similarly, in another branch, when dma_map_page() fails, rbi->page is also freed. In the two cases, vmxnet3_rq_alloc_rx_buf() returns an error to its callers vmxnet3_rq_init() -> vmxnet3_rq_init_all() -> vmxnet3_activate_dev(). Then vmxnet3_activate_dev() calls vmxnet3_rq_cleanup_all() in error handling code, and rbi->skb or rbi->page are freed again in vmxnet3_rq_cleanup_all(), causing use-after-free bugs. To fix these possible bugs, rbi->skb and rbi->page should be cleared after they are freed. The error log in our fault-injection testing is shown as follows: [ 14.319016] BUG: KASAN: use-after-free in consume_skb+0x2f/0x150 ... [ 14.321586] Call Trace: ... [ 14.325357] consume_skb+0x2f/0x150 [ 14.325671] vmxnet3_rq_cleanup_all+0x33a/0x4e0 [vmxnet3] [ 14.326150] vmxnet3_activate_dev+0xb9d/0x2ca0 [vmxnet3] [ 14.326616] vmxnet3_open+0x387/0x470 [vmxnet3] ... [ 14.361675] Allocated by task 351: ... [ 14.362688] __netdev_alloc_skb+0x1b3/0x6f0 [ 14.362960] vmxnet3_rq_alloc_rx_buf+0x1b0/0x8d0 [vmxnet3] [ 14.363317] vmxnet3_activate_dev+0x3e3/0x2ca0 [vmxnet3] [ 14.363661] vmxnet3_open+0x387/0x470 [vmxnet3] ... [ 14.367309] [ 14.367412] Freed by task 351: ... [ 14.368932] __dev_kfree_skb_any+0xd2/0xe0 [ 14.369193] vmxnet3_rq_alloc_rx_buf+0x71e/0x8d0 [vmxnet3] [ 14.369544] vmxnet3_activate_dev+0x3e3/0x2ca0 [vmxnet3] [ 14.369883] vmxnet3_open+0x387/0x470 [vmxnet3] [ 14.370174] __dev_open+0x28a/0x420 [ 14.370399] __dev_change_flags+0x192/0x590 [ 14.370667] dev_change_flags+0x7a/0x180 [ 14.370919] do_setlink+0xb28/0x3570 [ 14.371150] rtnl_newlink+0x1160/0x1740 [ 14.371399] rtnetlink_rcv_msg+0x5bf/0xa50 [ 14.371661] netlink_rcv_skb+0x1cd/0x3e0 [ 14.371913] netlink_unicast+0x5dc/0x840 [ 14.372169] netlink_sendmsg+0x856/0xc40 [ 14.372420] ____sys_sendmsg+0x8a7/0x8d0 [ 14.372673] __sys_sendmsg+0x1c2/0x270 [ 14.372914] do_syscall_64+0x41/0x90 [ 14.373145] entry_SYSCALL_64_after_hwframe+0x44/0xae ... Fixes: 5738a09d58d5a ("vmxnet3: fix checks for dma mapping errors") Reported-by: TOTE Robot Signed-off-by: Zixuan Fu Link: https://lore.kernel.org/r/20220514050656.2636588-1-r33s3n6@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: gc1202 Signed-off-by: wanxiaoqing Signed-off-by: Yu Liao --- drivers/net/vmxnet3/vmxnet3_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 7b1d483299ba..6678a734cc4d 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -595,6 +595,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { dev_kfree_skb_any(rbi->skb); + rbi->skb = NULL; rq->stats.rx_buf_alloc_failure++; break; } @@ -619,6 +620,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, if (dma_mapping_error(&adapter->pdev->dev, rbi->dma_addr)) { put_page(rbi->page); + rbi->page = NULL; rq->stats.rx_buf_alloc_failure++; break; } -- Gitee From 4afc38955d0d9a21a3af3c8bc1eabde8fa85afeb Mon Sep 17 00:00:00 2001 From: Zixuan Fu Date: Mon, 11 Apr 2022 18:45:34 +0800 Subject: [PATCH 199/243] fs: jfs: fix possible NULL pointer dereference in dbFree() stable inclusion from stable-5.10.121 commit 9dfa8d087bb854f613fcdbf1af4fb02c0b2d1e4f category: bugfix issue: #I7V73B CVE: CVE-2023-4385 Signed-off-by: gaochao --------------------------------------- [ Upstream commit 0d4837fdb796f99369cf7691d33de1b856bcaf1f ] In our fault-injection testing, the variable "nblocks" in dbFree() can be zero when kmalloc_array() fails in dtSearch(). In this case, the variable "mp" in dbFree() would be NULL and then it is dereferenced in "write_metapage(mp)". The failure log is listed as follows: [ 13.824137] BUG: kernel NULL pointer dereference, address: 0000000000000020 ... [ 13.827416] RIP: 0010:dbFree+0x5f7/0x910 [jfs] [ 13.834341] Call Trace: [ 13.834540] [ 13.834713] txFreeMap+0x7b4/0xb10 [jfs] [ 13.835038] txUpdateMap+0x311/0x650 [jfs] [ 13.835375] jfs_lazycommit+0x5f2/0xc70 [jfs] [ 13.835726] ? sched_dynamic_update+0x1b0/0x1b0 [ 13.836092] kthread+0x3c2/0x4a0 [ 13.836355] ? txLockFree+0x160/0x160 [jfs] [ 13.836763] ? kthread_unuse_mm+0x160/0x160 [ 13.837106] ret_from_fork+0x1f/0x30 [ 13.837402] ... This patch adds a NULL check of "mp" before "write_metapage(mp)" is called. Reported-by: TOTE Robot Signed-off-by: Zixuan Fu Signed-off-by: Dave Kleikamp Signed-off-by: Sasha Levin Signed-off-by: gc1202 Signed-off-by: wanxiaoqing Signed-off-by: Wang Hai --- fs/jfs/jfs_dmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index aedad59f8a45..9b44be5b4f2a 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -378,7 +378,8 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) } /* write the last buffer. */ - write_metapage(mp); + if (mp) + write_metapage(mp); IREAD_UNLOCK(ipbmap); -- Gitee From 0f385cfb3cafb6ec47ab86201610d5a3a1eb5537 Mon Sep 17 00:00:00 2001 From: Sungwoo Kim Date: Wed, 31 May 2023 01:39:56 -0400 Subject: [PATCH 200/243] Bluetooth: L2CAP: Fix use-after-free in l2cap_sock_ready_cb stable inclusion from stable-v5.10.189 commit 06f87c96216bc5cd1094c23492274f77f1d5dd3b category: bugfix issue: #I7UZ5X CVE: CVE-2023-40283 Signed-off-by: yaowenrui --------------------------------------- commit 1728137b33c00d5a2b5110ed7aafb42e7c32e4a1 upstream. l2cap_sock_release(sk) frees sk. However, sk's children are still alive and point to the already free'd sk's address. To fix this, l2cap_sock_release(sk) also cleans sk's children. ================================================================== BUG: KASAN: use-after-free in l2cap_sock_ready_cb+0xb7/0x100 net/bluetooth/l2cap_sock.c:1650 Read of size 8 at addr ffff888104617aa8 by task kworker/u3:0/276 CPU: 0 PID: 276 Comm: kworker/u3:0 Not tainted 6.2.0-00001-gef397bd4d5fb-dirty #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 Workqueue: hci2 hci_rx_work Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x72/0x95 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:306 [inline] print_report+0x175/0x478 mm/kasan/report.c:417 kasan_report+0xb1/0x130 mm/kasan/report.c:517 l2cap_sock_ready_cb+0xb7/0x100 net/bluetooth/l2cap_sock.c:1650 l2cap_chan_ready+0x10e/0x1e0 net/bluetooth/l2cap_core.c:1386 l2cap_config_req+0x753/0x9f0 net/bluetooth/l2cap_core.c:4480 l2cap_bredr_sig_cmd net/bluetooth/l2cap_core.c:5739 [inline] l2cap_sig_channel net/bluetooth/l2cap_core.c:6509 [inline] l2cap_recv_frame+0xe2e/0x43c0 net/bluetooth/l2cap_core.c:7788 l2cap_recv_acldata+0x6ed/0x7e0 net/bluetooth/l2cap_core.c:8506 hci_acldata_packet net/bluetooth/hci_core.c:3813 [inline] hci_rx_work+0x66e/0xbc0 net/bluetooth/hci_core.c:4048 process_one_work+0x4ea/0x8e0 kernel/workqueue.c:2289 worker_thread+0x364/0x8e0 kernel/workqueue.c:2436 kthread+0x1b9/0x200 kernel/kthread.c:376 ret_from_fork+0x2c/0x50 arch/x86/entry/entry_64.S:308 Allocated by task 288: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:374 [inline] __kasan_kmalloc+0x82/0x90 mm/kasan/common.c:383 kasan_kmalloc include/linux/kasan.h:211 [inline] __do_kmalloc_node mm/slab_common.c:968 [inline] __kmalloc+0x5a/0x140 mm/slab_common.c:981 kmalloc include/linux/slab.h:584 [inline] sk_prot_alloc+0x113/0x1f0 net/core/sock.c:2040 sk_alloc+0x36/0x3c0 net/core/sock.c:2093 l2cap_sock_alloc.constprop.0+0x39/0x1c0 net/bluetooth/l2cap_sock.c:1852 l2cap_sock_create+0x10d/0x220 net/bluetooth/l2cap_sock.c:1898 bt_sock_create+0x183/0x290 net/bluetooth/af_bluetooth.c:132 __sock_create+0x226/0x380 net/socket.c:1518 sock_create net/socket.c:1569 [inline] __sys_socket_create net/socket.c:1606 [inline] __sys_socket_create net/socket.c:1591 [inline] __sys_socket+0x112/0x200 net/socket.c:1639 __do_sys_socket net/socket.c:1652 [inline] __se_sys_socket net/socket.c:1650 [inline] __x64_sys_socket+0x40/0x50 net/socket.c:1650 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3f/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x72/0xdc Freed by task 288: kasan_save_stack+0x22/0x50 mm/kasan/common.c:45 kasan_set_track+0x25/0x30 mm/kasan/common.c:52 kasan_save_free_info+0x2e/0x50 mm/kasan/generic.c:523 ____kasan_slab_free mm/kasan/common.c:236 [inline] ____kasan_slab_free mm/kasan/common.c:200 [inline] __kasan_slab_free+0x10a/0x190 mm/kasan/common.c:244 kasan_slab_free include/linux/kasan.h:177 [inline] slab_free_hook mm/slub.c:1781 [inline] slab_free_freelist_hook mm/slub.c:1807 [inline] slab_free mm/slub.c:3787 [inline] __kmem_cache_free+0x88/0x1f0 mm/slub.c:3800 sk_prot_free net/core/sock.c:2076 [inline] __sk_destruct+0x347/0x430 net/core/sock.c:2168 sk_destruct+0x9c/0xb0 net/core/sock.c:2183 __sk_free+0x82/0x220 net/core/sock.c:2194 sk_free+0x7c/0xa0 net/core/sock.c:2205 sock_put include/net/sock.h:1991 [inline] l2cap_sock_kill+0x256/0x2b0 net/bluetooth/l2cap_sock.c:1257 l2cap_sock_release+0x1a7/0x220 net/bluetooth/l2cap_sock.c:1428 __sock_release+0x80/0x150 net/socket.c:650 sock_close+0x19/0x30 net/socket.c:1368 __fput+0x17a/0x5c0 fs/file_table.c:320 task_work_run+0x132/0x1c0 kernel/task_work.c:179 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x113/0x120 kernel/entry/common.c:203 __syscall_exit_to_user_mode_work kernel/entry/common.c:285 [inline] syscall_exit_to_user_mode+0x21/0x50 kernel/entry/common.c:296 do_syscall_64+0x4c/0x90 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x72/0xdc The buggy address belongs to the object at ffff888104617800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 680 bytes inside of 1024-byte region [ffff888104617800, ffff888104617c00) The buggy address belongs to the physical page: page:00000000dbca6a80 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888104614000 pfn:0x104614 head:00000000dbca6a80 order:2 compound_mapcount:0 subpages_mapcount:0 compound_pincount:0 flags: 0x200000000010200(slab|head|node=0|zone=2) raw: 0200000000010200 ffff888100041dc0 ffffea0004212c10 ffffea0004234b10 raw: ffff888104614000 0000000000080002 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff888104617980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888104617a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff888104617a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff888104617b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff888104617b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Ack: This bug is found by FuzzBT with a modified Syzkaller. Other contributors are Ruoyu Wu and Hui Peng. Signed-off-by: Sungwoo Kim Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: Dong Chenchen Signed-off-by: yaowenrui --- net/bluetooth/l2cap_sock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index d2c678520599..63bfbb5918fe 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -45,6 +45,7 @@ static const struct proto_ops l2cap_sock_ops; static void l2cap_sock_init(struct sock *sk, struct sock *parent); static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern); +static void l2cap_sock_cleanup_listen(struct sock *parent); bool l2cap_is_socket(struct socket *sock) { @@ -1414,6 +1415,7 @@ static int l2cap_sock_release(struct socket *sock) if (!sk) return 0; + l2cap_sock_cleanup_listen(sk); bt_sock_unlink(&l2cap_sk_list, sk); err = l2cap_sock_shutdown(sock, SHUT_RDWR); -- Gitee From 47082be005889f9fd067ab37cfeb68a0329d270f Mon Sep 17 00:00:00 2001 From: D Scott Phillips Date: Mon, 10 Oct 2022 19:21:40 -0700 Subject: [PATCH 201/243] arm64: Add AMPERE1 to the Spectre-BHB affected list stable inclusion from stable-5.10.153 commit 52a43b82006dc88f996bd06da5a3fcfef85220c8 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Ywenrui --------------------------------------- [ Upstream commit 0e5d5ae837c8ce04d2ddb874ec5f920118bd9d31 ] Per AmpereOne erratum AC03_CPU_12, "Branch history may allow control of speculative execution across software contexts," the AMPERE1 core needs the bhb clearing loop to mitigate Spectre-BHB, with a loop iteration count of 11. Signed-off-by: D Scott Phillips Link: https://lore.kernel.org/r/20221011022140.432370-1-scott@os.amperecomputing.com Reviewed-by: James Morse Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin Signed-off-by: Ywenrui Signed-off-by: Lin Yujun Signed-off-by: yaowenrui --- arch/arm64/include/asm/cputype.h | 4 ++++ arch/arm64/kernel/proton-pack.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7a58d54eec0f..cb297abe9c83 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -59,6 +59,7 @@ #define ARM_CPU_IMP_NVIDIA 0x4E #define ARM_CPU_IMP_FUJITSU 0x46 #define ARM_CPU_IMP_HISI 0x48 +#define ARM_CPU_IMP_AMPERE 0xC0 #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -103,6 +104,8 @@ #define HISI_CPU_PART_TSV110 0xD01 +#define AMPERE_CPU_PART_AMPERE1 0xAC3 + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -135,6 +138,7 @@ #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 5d5f2fa19c93..dd27a38fdaa1 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -872,6 +872,10 @@ u8 spectre_bhb_loop_affected(int scope) MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), {}, }; + static const struct midr_range spectre_bhb_k11_list[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1), + {}, + }; static const struct midr_range spectre_bhb_k8_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), @@ -882,6 +886,8 @@ u8 spectre_bhb_loop_affected(int scope) k = 32; else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) k = 24; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k11_list)) + k = 11; else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) k = 8; -- Gitee From ee2e5d2cdac9eab7b20e1e9e470be8b83a2181e2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 21 Aug 2023 10:55:05 -0700 Subject: [PATCH 202/243] af_unix: Fix null-ptr-deref in unix_stream_sendpage(). stable inclusion from stable-v5.10.192 commit c080cee930303124624fe64fc504f66c815ee6b9 category: bugfix issue: #I7ZBXU CVE:CVE-2023-4622 Signed-off-by: Jin Zhengyang --------------------------------------- Bing-Jhong Billy Jheng reported null-ptr-deref in unix_stream_sendpage() with detailed analysis and a nice repro. unix_stream_sendpage() tries to add data to the last skb in the peer's recv queue without locking the queue. If the peer's FD is passed to another socket and the socket's FD is passed to the peer, there is a loop between them. If we close both sockets without receiving FD, the sockets will be cleaned up by garbage collection. The garbage collection iterates such sockets and unlinks skb with FD from the socket's receive queue under the queue's lock. So, there is a race where unix_stream_sendpage() could access an skb locklessly that is being released by garbage collection, resulting in use-after-free. To avoid the issue, unix_stream_sendpage() must lock the peer's recv queue. Note the issue does not exist in 6.5+ thanks to the recent sendpage() refactoring. This patch is originally written by Linus Torvalds. BUG: unable to handle page fault for address: ffff988004dd6870 PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 0 P4D 0 PREEMPT SMP PTI CPU: 4 PID: 297 Comm: garbage_uaf Not tainted 6.1.46 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:kmem_cache_alloc_node+0xa2/0x1e0 Code: c0 0f 84 32 01 00 00 41 83 fd ff 74 10 48 8b 00 48 c1 e8 3a 41 39 c5 0f 85 1c 01 00 00 41 8b 44 24 28 49 8b 3c 24 48 8d 4a 40 <49> 8b 1c 06 4c 89 f0 65 48 0f c7 0f 0f 94 c0 84 c0 74 a1 41 8b 44 RSP: 0018:ffffc9000079fac0 EFLAGS: 00000246 RAX: 0000000000000070 RBX: 0000000000000005 RCX: 000000000001a284 RDX: 000000000001a244 RSI: 0000000000400cc0 RDI: 000000000002eee0 RBP: 0000000000400cc0 R08: 0000000000400cc0 R09: 0000000000000003 R10: 0000000000000001 R11: 0000000000000000 R12: ffff888003970f00 R13: 00000000ffffffff R14: ffff988004dd6800 R15: 00000000000000e8 FS: 00007f174d6f3600(0000) GS:ffff88807db00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff988004dd6870 CR3: 00000000092be000 CR4: 00000000007506e0 PKRU: 55555554 Call Trace: ? __die_body.cold+0x1a/0x1f ? page_fault_oops+0xa9/0x1e0 ? fixup_exception+0x1d/0x310 ? exc_page_fault+0xa8/0x150 ? asm_exc_page_fault+0x22/0x30 ? kmem_cache_alloc_node+0xa2/0x1e0 ? __alloc_skb+0x16c/0x1e0 __alloc_skb+0x16c/0x1e0 alloc_skb_with_frags+0x48/0x1e0 sock_alloc_send_pskb+0x234/0x270 unix_stream_sendmsg+0x1f5/0x690 sock_sendmsg+0x5d/0x60 ____sys_sendmsg+0x210/0x260 ___sys_sendmsg+0x83/0xd0 ? kmem_cache_alloc+0xc6/0x1c0 ? avc_disable+0x20/0x20 ? percpu_counter_add_batch+0x53/0xc0 ? alloc_empty_file+0x5d/0xb0 ? alloc_file+0x91/0x170 ? alloc_file_pseudo+0x94/0x100 ? __fget_light+0x9f/0x120 __sys_sendmsg+0x54/0xa0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x69/0xd3 RIP: 0033:0x7f174d639a7d Code: 28 89 54 24 1c 48 89 74 24 10 89 7c 24 08 e8 8a c1 f4 ff 8b 54 24 1c 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 33 44 89 c7 48 89 44 24 08 e8 de c1 f4 ff 48 RSP: 002b:00007ffcb563ea50 EFLAGS: 00000293 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f174d639a7d RDX: 0000000000000000 RSI: 00007ffcb563eab0 RDI: 0000000000000007 RBP: 00007ffcb563eb10 R08: 0000000000000000 R09: 00000000ffffffff R10: 00000000004040a0 R11: 0000000000000293 R12: 00007ffcb563ec28 R13: 0000000000401398 R14: 0000000000403e00 R15: 00007f174d72c000 Fixes: 869e7c62486e ("net: af_unix: implement stream sendpage support") Reported-by: Bing-Jhong Billy Jheng Reviewed-by: Bing-Jhong Billy Jheng Co-developed-by: Linus Torvalds Signed-off-by: Linus Torvalds Signed-off-by: Kuniyuki Iwashima Signed-off-by: Greg Kroah-Hartman Signed-off-by: Liu Jian Signed-off-by: Ywenrui --- net/unix/af_unix.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b7edca89e0ba..f38616da504d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2008,6 +2008,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, if (false) { alloc_skb: + spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); mutex_unlock(&unix_sk(other)->iolock); newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, @@ -2047,6 +2048,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, init_scm = false; } + spin_lock(&other->sk_receive_queue.lock); skb = skb_peek_tail(&other->sk_receive_queue); if (tail && tail == skb) { skb = newskb; @@ -2077,14 +2079,11 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, refcount_add(size, &sk->sk_wmem_alloc); if (newskb) { - err = unix_scm_to_skb(&scm, skb, false); - if (err) - goto err_state_unlock; - spin_lock(&other->sk_receive_queue.lock); + unix_scm_to_skb(&scm, skb, false); __skb_queue_tail(&other->sk_receive_queue, newskb); - spin_unlock(&other->sk_receive_queue.lock); } + spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); mutex_unlock(&unix_sk(other)->iolock); -- Gitee From b4dc753cf831cba8885e4de5a951b41d79d877c4 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 5 Sep 2023 23:13:56 +0200 Subject: [PATCH 203/243] netfilter: nftables: exthdr: fix 4-byte stack OOB write mainline inclusion from mainline-v6.6-rc1 commit fd94d9dadee58e09b49075240fe83423eb1dcd36 category: bugfix issue: #I818G7 CVE:CVE-2023-4881 Signed-off-by: Lin Shengwang --------------------------------------- If priv->len is a multiple of 4, then dst[len / 4] can write past the destination array which leads to stack corruption. This construct is necessary to clean the remainder of the register in case ->len is NOT a multiple of the register size, so make it conditional just like nft_payload.c does. The bug was added in 4.1 cycle and then copied/inherited when tcp/sctp and ip option support was added. Bug reported by Zero Day Initiative project (ZDI-CAN-21950, ZDI-CAN-21951, ZDI-CAN-21961). Fixes: 49499c3e6e18 ("netfilter: nf_tables: switch registers to 32 bit addressing") Fixes: 935b7f643018 ("netfilter: nft_exthdr: add TCP option matching") Fixes: 133dc203d77d ("netfilter: nft_exthdr: Support SCTP chunks") Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options") Signed-off-by: Florian Westphal Signed-off-by: Zhengchao Shao Signed-off-by: Ywenrui --- net/netfilter/nft_exthdr.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index faa0844c01fb..d158ca8b7b2c 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -33,6 +33,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset) return opt[offset + 1]; } +static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len) +{ + if (len % NFT_REG32_SIZE) + dest[len / NFT_REG32_SIZE] = 0; + + return skb_copy_bits(skb, offset, dest, len); +} + static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -54,8 +62,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, } offset += priv->offset; - dest[priv->len / NFT_REG32_SIZE] = 0; - if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0) + if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0) goto err; return; err: @@ -151,8 +158,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr, } offset += priv->offset; - dest[priv->len / NFT_REG32_SIZE] = 0; - if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0) + if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0) goto err; return; err: @@ -208,7 +214,8 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr, if (priv->flags & NFT_EXTHDR_F_PRESENT) { *dest = 1; } else { - dest[priv->len / NFT_REG32_SIZE] = 0; + if (priv->len % NFT_REG32_SIZE) + dest[priv->len / NFT_REG32_SIZE] = 0; memcpy(dest, opt + offset, priv->len); } -- Gitee From f55dd775cbac5ada0a283d7e13266b4957664019 Mon Sep 17 00:00:00 2001 From: Budimir Markovic Date: Thu, 24 Aug 2023 01:49:05 -0700 Subject: [PATCH 204/243] net/sched: sch_hfsc: Ensure inner classes have fsc curve mainline inclusion from mainline-v6.6-rc1 commit b3d26c5702c7d6c45456326e56d2ccf3f103e60f category: bugfix issue: #I7ZBXR CVE: CVE-2023-4623 Signed-off-by: Ywenrui44091 --------------------------------------- HFSC assumes that inner classes have an fsc curve, but it is currently possible for classes without an fsc curve to become parents. This leads to bugs including a use-after-free. Don't allow non-root classes without HFSC_FSC to become parents. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Budimir Markovic Signed-off-by: Budimir Markovic Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20230824084905.422-1-markovicbudimir@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Zhengchao Shao Signed-off-by: Ywenrui --- net/sched/sch_hfsc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d1902fca9844..334aae0d8dce 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1012,6 +1012,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (parent == NULL) return -ENOENT; } + if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) { + NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC"); + return -EINVAL; + } if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0) return -EINVAL; -- Gitee From e96e6487780789f1634b6e0c36e6911daa546dd7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 23 Jul 2023 16:41:48 +0200 Subject: [PATCH 205/243] netfilter: nf_tables: disallow rule addition to bound chain via NFTA_RULE_CHAIN_ID stable inclusion from stable-5.10.190 commit 308a43f1521d5b7220693d0865b23e8dad3ed137 category: bugfix issue: #I80615 CVE: CVE-2023-3995 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit 0ebc1064e4874d5987722a2ddbc18f94aa53b211 ] Bail out with EOPNOTSUPP when adding rule to bound chain via NFTA_RULE_CHAIN_ID. The following warning splat is shown when adding a rule to a deleted bound chain: WARNING: CPU: 2 PID: 13692 at net/netfilter/nf_tables_api.c:2013 nf_tables_chain_destroy+0x1f7/0x210 [nf_tables] CPU: 2 PID: 13692 Comm: chain-bound-rul Not tainted 6.1.39 #1 RIP: 0010:nf_tables_chain_destroy+0x1f7/0x210 [nf_tables] Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Reported-by: Kevin Rich Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin Signed-off-by: Lu Wei Signed-off-by: Ywenrui --- net/netfilter/nf_tables_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0973b66ee68b..96e2a40492c1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3195,8 +3195,6 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); return PTR_ERR(chain); } - if (nft_chain_is_bound(chain)) - return -EOPNOTSUPP; } else if (nla[NFTA_RULE_CHAIN_ID]) { chain = nft_chain_lookup_byid(net, nla[NFTA_RULE_CHAIN_ID]); @@ -3208,6 +3206,9 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return -EINVAL; } + if (nft_chain_is_bound(chain)) + return -EOPNOTSUPP; + if (nla[NFTA_RULE_HANDLE]) { handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE])); rule = __nft_rule_lookup(chain, handle); -- Gitee From 9746ca0f2874c63c1093eb1fe250807003b5c886 Mon Sep 17 00:00:00 2001 From: valis Date: Sat, 29 Jul 2023 08:32:02 -0400 Subject: [PATCH 206/243] net/sched: cls_route: No longer copy tcf_result on update to avoid use-after-free mainline inclusion from mainline-v6.5-rc5 commit b80b829e9e2c1b3f7aae34855e04d8f6ecaf13c8 category: bugfix issue:#I7P7V8 CVE: CVE-2023-4128 Signed-off-by: Yao wenrui -------------------------------- When route4_change() is called on an existing filter, the whole tcf_result struct is always copied into the new instance of the filter. This causes a problem when updating a filter bound to a class, as tcf_unbind_filter() is always called on the old instance in the success path, decreasing filter_cnt of the still referenced class and allowing it to be deleted, leading to a use-after-free. Fix this by no longer copying the tcf_result struct from the old filter. Fixes: 1109c00547fc ("net: sched: RCU cls_route") Reported-by: valis Reported-by: Bing-Jhong Billy Jheng Signed-off-by: valis Signed-off-by: Jamal Hadi Salim Reviewed-by: Victor Nogueira Reviewed-by: Pedro Tammela Reviewed-by: M A Ramdhan Link: https://lore.kernel.org/r/20230729123202.72406-4-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/cls_route.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 315ca2b7e2ed..eb6401c88768 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -501,7 +501,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (fold) { f->id = fold->id; f->iif = fold->iif; - f->res = fold->res; f->handle = fold->handle; f->tp = fold->tp; -- Gitee From d2c05b2d73e46d2cacd97850c029c438d345b6ae Mon Sep 17 00:00:00 2001 From: valis Date: Sat, 29 Jul 2023 08:32:01 -0400 Subject: [PATCH 207/243] net/sched: cls_fw: No longer copy tcf_result on update to avoid use-after-free stable inclusion from stable-5.10.190 commit a8d478200b104ff356f51e1f63499fe46ba8c9b8 category: bugfix issue: #I7SZWH CVE: CVE-2023-4128 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit 76e42ae831991c828cffa8c37736ebfb831ad5ec ] When fw_change() is called on an existing filter, the whole tcf_result struct is always copied into the new instance of the filter. This causes a problem when updating a filter bound to a class, as tcf_unbind_filter() is always called on the old instance in the success path, decreasing filter_cnt of the still referenced class and allowing it to be deleted, leading to a use-after-free. Fix this by no longer copying the tcf_result struct from the old filter. Fixes: e35a8ee5993b ("net: sched: fw use RCU") Reported-by: valis Reported-by: Bing-Jhong Billy Jheng Signed-off-by: valis Signed-off-by: Jamal Hadi Salim Reviewed-by: Victor Nogueira Reviewed-by: Pedro Tammela Reviewed-by: M A Ramdhan Link: https://lore.kernel.org/r/20230729123202.72406-3-jhs@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: Liu Jian Signed-off-by: Ywenrui --- net/sched/cls_fw.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index ec945294626a..45fa9466c32e 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -266,7 +266,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return -ENOBUFS; fnew->id = f->id; - fnew->res = f->res; fnew->ifindex = f->ifindex; fnew->tp = f->tp; -- Gitee From 783fc177a941cd4ac26c230c8ba02247bb70054a Mon Sep 17 00:00:00 2001 From: valis Date: Sat, 29 Jul 2023 08:32:00 -0400 Subject: [PATCH 208/243] net/sched: cls_u32: No longer copy tcf_result on update to avoid use-after-free stable inclusion from stable-5.10.190 commit b4256c99a7116c9514224847e8aaee2ecf110a0a category: bugfix issue: #I7SZWH CVE: CVE-2023-4128 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit 3044b16e7c6fe5d24b1cdbcf1bd0a9d92d1ebd81 ] When u32_change() is called on an existing filter, the whole tcf_result struct is always copied into the new instance of the filter. This causes a problem when updating a filter bound to a class, as tcf_unbind_filter() is always called on the old instance in the success path, decreasing filter_cnt of the still referenced class and allowing it to be deleted, leading to a use-after-free. Fix this by no longer copying the tcf_result struct from the old filter. Fixes: de5df63228fc ("net: sched: cls_u32 changes to knode must appear atomic to readers") Reported-by: valis Reported-by: M A Ramdhan Signed-off-by: valis Signed-off-by: Jamal Hadi Salim Reviewed-by: Victor Nogueira Reviewed-by: Pedro Tammela Reviewed-by: M A Ramdhan Link: https://lore.kernel.org/r/20230729123202.72406-2-jhs@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: Liu Jian Signed-off-by: Ywenrui --- net/sched/cls_u32.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index b61db335c49d..6fe23392918f 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -810,7 +810,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, new->ifindex = n->ifindex; new->fshift = n->fshift; - new->res = n->res; new->flags = n->flags; RCU_INIT_POINTER(new->ht_down, ht); -- Gitee From ae5b33cba5f007576037f3008fc1844ff4d3e178 Mon Sep 17 00:00:00 2001 From: valis Date: Fri, 1 Sep 2023 12:22:37 -0400 Subject: [PATCH 209/243] net: sched: sch_qfq: Fix UAF in qfq_dequeue() mainline inclusion from mainline-v6.6-rc1~29^2~18 commit 8fc134fee27f2263988ae38920bc03da416b03d8 category: bugfix issue: #I819HK CVE:CVE-2023-4921 Signed-off-by: Ywenrui44091 --------------------------------------- When the plug qdisc is used as a class of the qfq qdisc it could trigger a UAF. This issue can be reproduced with following commands: tc qdisc add dev lo root handle 1: qfq tc class add dev lo parent 1: classid 1:1 qfq weight 1 maxpkt 512 tc qdisc add dev lo parent 1:1 handle 2: plug tc filter add dev lo parent 1: basic classid 1:1 ping -c1 127.0.0.1 and boom: [ 285.353793] BUG: KASAN: slab-use-after-free in qfq_dequeue+0xa7/0x7f0 [ 285.354910] Read of size 4 at addr ffff8880bad312a8 by task ping/144 [ 285.355903] [ 285.356165] CPU: 1 PID: 144 Comm: ping Not tainted 6.5.0-rc3+ #4 [ 285.357112] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [ 285.358376] Call Trace: [ 285.358773] [ 285.359109] dump_stack_lvl+0x44/0x60 [ 285.359708] print_address_description.constprop.0+0x2c/0x3c0 [ 285.360611] kasan_report+0x10c/0x120 [ 285.361195] ? qfq_dequeue+0xa7/0x7f0 [ 285.361780] qfq_dequeue+0xa7/0x7f0 [ 285.362342] __qdisc_run+0xf1/0x970 [ 285.362903] net_tx_action+0x28e/0x460 [ 285.363502] __do_softirq+0x11b/0x3de [ 285.364097] do_softirq.part.0+0x72/0x90 [ 285.364721] [ 285.365072] [ 285.365422] __local_bh_enable_ip+0x77/0x90 [ 285.366079] __dev_queue_xmit+0x95f/0x1550 [ 285.366732] ? __pfx_csum_and_copy_from_iter+0x10/0x10 [ 285.367526] ? __pfx___dev_queue_xmit+0x10/0x10 [ 285.368259] ? __build_skb_around+0x129/0x190 [ 285.368960] ? ip_generic_getfrag+0x12c/0x170 [ 285.369653] ? __pfx_ip_generic_getfrag+0x10/0x10 [ 285.370390] ? csum_partial+0x8/0x20 [ 285.370961] ? raw_getfrag+0xe5/0x140 [ 285.371559] ip_finish_output2+0x539/0xa40 [ 285.372222] ? __pfx_ip_finish_output2+0x10/0x10 [ 285.372954] ip_output+0x113/0x1e0 [ 285.373512] ? __pfx_ip_output+0x10/0x10 [ 285.374130] ? icmp_out_count+0x49/0x60 [ 285.374739] ? __pfx_ip_finish_output+0x10/0x10 [ 285.375457] ip_push_pending_frames+0xf3/0x100 [ 285.376173] raw_sendmsg+0xef5/0x12d0 [ 285.376760] ? do_syscall_64+0x40/0x90 [ 285.377359] ? __static_call_text_end+0x136578/0x136578 [ 285.378173] ? do_syscall_64+0x40/0x90 [ 285.378772] ? kasan_enable_current+0x11/0x20 [ 285.379469] ? __pfx_raw_sendmsg+0x10/0x10 [ 285.380137] ? __sock_create+0x13e/0x270 [ 285.380673] ? __sys_socket+0xf3/0x180 [ 285.381174] ? __x64_sys_socket+0x3d/0x50 [ 285.381725] ? entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 285.382425] ? __rcu_read_unlock+0x48/0x70 [ 285.382975] ? ip4_datagram_release_cb+0xd8/0x380 [ 285.383608] ? __pfx_ip4_datagram_release_cb+0x10/0x10 [ 285.384295] ? preempt_count_sub+0x14/0xc0 [ 285.384844] ? __list_del_entry_valid+0x76/0x140 [ 285.385467] ? _raw_spin_lock_bh+0x87/0xe0 [ 285.386014] ? __pfx__raw_spin_lock_bh+0x10/0x10 [ 285.386645] ? release_sock+0xa0/0xd0 [ 285.387148] ? preempt_count_sub+0x14/0xc0 [ 285.387712] ? freeze_secondary_cpus+0x348/0x3c0 [ 285.388341] ? aa_sk_perm+0x177/0x390 [ 285.388856] ? __pfx_aa_sk_perm+0x10/0x10 [ 285.389441] ? check_stack_object+0x22/0x70 [ 285.390032] ? inet_send_prepare+0x2f/0x120 [ 285.390603] ? __pfx_inet_sendmsg+0x10/0x10 [ 285.391172] sock_sendmsg+0xcc/0xe0 [ 285.391667] __sys_sendto+0x190/0x230 [ 285.392168] ? __pfx___sys_sendto+0x10/0x10 [ 285.392727] ? kvm_clock_get_cycles+0x14/0x30 [ 285.393328] ? set_normalized_timespec64+0x57/0x70 [ 285.393980] ? _raw_spin_unlock_irq+0x1b/0x40 [ 285.394578] ? __x64_sys_clock_gettime+0x11c/0x160 [ 285.395225] ? __pfx___x64_sys_clock_gettime+0x10/0x10 [ 285.395908] ? _copy_to_user+0x3e/0x60 [ 285.396432] ? exit_to_user_mode_prepare+0x1a/0x120 [ 285.397086] ? syscall_exit_to_user_mode+0x22/0x50 [ 285.397734] ? do_syscall_64+0x71/0x90 [ 285.398258] __x64_sys_sendto+0x74/0x90 [ 285.398786] do_syscall_64+0x64/0x90 [ 285.399273] ? exit_to_user_mode_prepare+0x1a/0x120 [ 285.399949] ? syscall_exit_to_user_mode+0x22/0x50 [ 285.400605] ? do_syscall_64+0x71/0x90 [ 285.401124] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 285.401807] RIP: 0033:0x495726 [ 285.402233] Code: ff ff ff f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 11 b8 2c 00 00 00 0f 09 [ 285.404683] RSP: 002b:00007ffcc25fb618 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 285.405677] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 0000000000495726 [ 285.406628] RDX: 0000000000000040 RSI: 0000000002518750 RDI: 0000000000000000 [ 285.407565] RBP: 00000000005205ef R08: 00000000005f8838 R09: 000000000000001c [ 285.408523] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000002517634 [ 285.409460] R13: 00007ffcc25fb6f0 R14: 0000000000000003 R15: 0000000000000000 [ 285.410403] [ 285.410704] [ 285.410929] Allocated by task 144: [ 285.411402] kasan_save_stack+0x1e/0x40 [ 285.411926] kasan_set_track+0x21/0x30 [ 285.412442] __kasan_slab_alloc+0x55/0x70 [ 285.412973] kmem_cache_alloc_node+0x187/0x3d0 [ 285.413567] __alloc_skb+0x1b4/0x230 [ 285.414060] __ip_append_data+0x17f7/0x1b60 [ 285.414633] ip_append_data+0x97/0xf0 [ 285.415144] raw_sendmsg+0x5a8/0x12d0 [ 285.415640] sock_sendmsg+0xcc/0xe0 [ 285.416117] __sys_sendto+0x190/0x230 [ 285.416626] __x64_sys_sendto+0x74/0x90 [ 285.417145] do_syscall_64+0x64/0x90 [ 285.417624] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 285.418306] [ 285.418531] Freed by task 144: [ 285.418960] kasan_save_stack+0x1e/0x40 [ 285.419469] kasan_set_track+0x21/0x30 [ 285.419988] kasan_save_free_info+0x27/0x40 [ 285.420556] ____kasan_slab_free+0x109/0x1a0 [ 285.421146] kmem_cache_free+0x1c2/0x450 [ 285.421680] __netif_receive_skb_core+0x2ce/0x1870 [ 285.422333] __netif_receive_skb_one_core+0x97/0x140 [ 285.423003] process_backlog+0x100/0x2f0 [ 285.423537] __napi_poll+0x5c/0x2d0 [ 285.424023] net_rx_action+0x2be/0x560 [ 285.424510] __do_softirq+0x11b/0x3de [ 285.425034] [ 285.425254] The buggy address belongs to the object at ffff8880bad31280 [ 285.425254] which belongs to the cache skbuff_head_cache of size 224 [ 285.426993] The buggy address is located 40 bytes inside of [ 285.426993] freed 224-byte region [ffff8880bad31280, ffff8880bad31360) [ 285.428572] [ 285.428798] The buggy address belongs to the physical page: [ 285.429540] page:00000000f4b77674 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xbad31 [ 285.430758] flags: 0x100000000000200(slab|node=0|zone=1) [ 285.431447] page_type: 0xffffffff() [ 285.431934] raw: 0100000000000200 ffff88810094a8c0 dead000000000122 0000000000000000 [ 285.432757] raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000 [ 285.433562] page dumped because: kasan: bad access detected [ 285.434144] [ 285.434320] Memory state around the buggy address: [ 285.434828] ffff8880bad31180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 285.435580] ffff8880bad31200: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 285.436264] >ffff8880bad31280: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 285.436777] ^ [ 285.437106] ffff8880bad31300: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc [ 285.437616] ffff8880bad31380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 285.438126] ================================================================== [ 285.438662] Disabling lock debugging due to kernel taint Fix this by: 1. Changing sch_plug's .peek handler to qdisc_peek_dequeued(), a function compatible with non-work-conserving qdiscs 2. Checking the return value of qdisc_dequeue_peeked() in sch_qfq. Fixes: 462dbc9101ac ("pkt_sched: QFQ Plus: fair-queueing service at DRR cost") Reported-by: valis Signed-off-by: valis Signed-off-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20230901162237.11525-1-jhs@mojatatu.com Signed-off-by: Paolo Abeni --- net/sched/sch_plug.c | 2 +- net/sched/sch_qfq.c | 22 +++++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index cbc2ebca4548..339990bb5981 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -210,7 +210,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = { .priv_size = sizeof(struct plug_sched_data), .enqueue = plug_enqueue, .dequeue = plug_dequeue, - .peek = qdisc_peek_head, + .peek = qdisc_peek_dequeued, .init = plug_init, .change = plug_change, .reset = qdisc_reset_queue, diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index b19e4cd08ed6..ad94a026616c 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -970,10 +970,13 @@ static void qfq_update_eligible(struct qfq_sched *q) } /* Dequeue head packet of the head class in the DRR queue of the aggregate. */ -static void agg_dequeue(struct qfq_aggregate *agg, - struct qfq_class *cl, unsigned int len) +static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg, + struct qfq_class *cl, unsigned int len) { - qdisc_dequeue_peeked(cl->qdisc); + struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc); + + if (!skb) + return NULL; cl->deficit -= (int) len; @@ -983,6 +986,8 @@ static void agg_dequeue(struct qfq_aggregate *agg, cl->deficit += agg->lmax; list_move_tail(&cl->alist, &agg->active); } + + return skb; } static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg, @@ -1128,11 +1133,18 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) if (!skb) return NULL; - qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; + + skb = agg_dequeue(in_serv_agg, cl, len); + + if (!skb) { + sch->q.qlen++; + return NULL; + } + + qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); - agg_dequeue(in_serv_agg, cl, len); /* If lmax is lowered, through qfq_change_class, for a class * owning pending packets with larger size than the new value * of lmax, then the following condition may hold. -- Gitee From c81114409857b556ba1bc678701f12a4cd483660 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 20 Jul 2023 09:17:21 +0200 Subject: [PATCH 210/243] netfilter: nf_tables: skip bound chain on rule flush stable inclusion from stable-v5.10.188 commit 30e5460d69e631c0e84db37dba2d8f98648778d4 category: bugfix issue: #I7SDXF CVE: CVE-2023-3777 Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 6eaf41e87a223ae6f8e7a28d6e78384ad7e407f8 ] Skip bound chain when flushing table rules, the rule that owns this chain releases these objects. Otherwise, the following warning is triggered: WARNING: CPU: 2 PID: 1217 at net/netfilter/nf_tables_api.c:2013 nf_tables_chain_destroy+0x1f7/0x210 [nf_tables] CPU: 2 PID: 1217 Comm: chain-flush Not tainted 6.1.39 #1 RIP: 0010:nf_tables_chain_destroy+0x1f7/0x210 [nf_tables] Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Reported-by: Kevin Rich Signed-off-by: Pablo Neira Ayuso Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin Signed-off-by: Lu Wei Signed-off-by: yaowenrui --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 96e2a40492c1..09642d2ed2e6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3452,6 +3452,8 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, list_for_each_entry(chain, &table->chains, list) { if (!nft_is_active_next(net, chain)) continue; + if (nft_chain_is_bound(chain)) + continue; ctx.chain = chain; err = nft_delrule_by_chain(&ctx); -- Gitee From 08cc72ea1383591a0f1ad33fd06f6258e4c67cff Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Fri, 21 Jul 2023 15:24:10 -0700 Subject: [PATCH 211/243] tcp: Reduce chance of collisions in inet6_hashfn(). stable inclusion from stable-v5.10.190 commit 0cd74fbd3b8327e60525e1ec4a6c28895693909f category: bugfix issue: #I7IT8M CVE: CVE-2023-1206 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit d11b0df7ddf1831f3e170972f43186dad520bfcc ] For both IPv4 and IPv6 incoming TCP connections are tracked in a hash table with a hash over the source & destination addresses and ports. However, the IPv6 hash is insufficient and can lead to a high rate of collisions. The IPv6 hash used an XOR to fit everything into the 96 bits for the fast jenkins hash, meaning it is possible for an external entity to ensure the hash collides, thus falling back to a linear search in the bucket, which is slow. We take the approach of hash the full length of IPv6 address in __ipv6_addr_jhash() so that all users can benefit from a more secure version. While this may look like it adds overhead, the reality of modern CPUs means that this is unmeasurable in real world scenarios. In simulating with llvm-mca, the increase in cycles for the hashing code was ~16 cycles on Skylake (from a base of ~155), and an extra ~9 on Nehalem (base of ~173). In commit dd6d2910c5e0 ("netfilter: conntrack: switch to siphash") netfilter switched from a jenkins hash to a siphash, but even the faster hsiphash is a more significant overhead (~20-30%) in some preliminary testing. So, in this patch, we keep to the more conservative approach to ensure we don't add much overhead per SYN. In testing, this results in a consistently even spread across the connection buckets. In both testing and real-world scenarios, we have not found any measurable performance impact. Fixes: 08dcdbf6a7b9 ("ipv6: use a stronger hash for tcp") Signed-off-by: Stewart Smith Signed-off-by: Samuel Mendoza-Jonas Suggested-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20230721222410.17914-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: Wang Yufen Signed-off-by: wanxiaoqing Signed-off-by: Ywenrui --- include/net/ipv6.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index bd1f396cc9c7..99219c617c9d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -660,12 +660,8 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a) /* more secured version of ipv6_addr_hash() */ static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval) { - u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1]; - - return jhash_3words(v, - (__force u32)a->s6_addr32[2], - (__force u32)a->s6_addr32[3], - initval); + return jhash2((__force const u32 *)a->s6_addr32, + ARRAY_SIZE(a->s6_addr32), initval); } static inline bool ipv6_addr_loopback(const struct in6_addr *a) -- Gitee From fac510d24bd7b74c2fe03d8bd547c51e452cf660 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 Apr 2023 15:24:13 +0000 Subject: [PATCH 212/243] bpf: Fix incorrect verifier pruning due to missing register precision taints stable inclusion from stable-v5.10.179~52 commit b1281d008845ae9a4de9ef7510dcc1667557a67a category: bugfix issue: #I7ITAL CVE: CVE-2023-2163 Signed-off-by: Cai xinchen --------------------------------------- [ Upstream commit 71b547f561247897a0a14f3082730156c0533fed ] Juan Jose et al reported an issue found via fuzzing where the verifier's pruning logic prematurely marks a program path as safe. Consider the following program: 0: (b7) r6 = 1024 1: (b7) r7 = 0 2: (b7) r8 = 0 3: (b7) r9 = -2147483648 4: (97) r6 %= 1025 5: (05) goto pc+0 6: (bd) if r6 <= r9 goto pc+2 7: (97) r6 %= 1 8: (b7) r9 = 0 9: (bd) if r6 <= r9 goto pc+1 10: (b7) r6 = 0 11: (b7) r0 = 0 12: (63) *(u32 *)(r10 -4) = r0 13: (18) r4 = 0xffff888103693400 // map_ptr(ks=4,vs=48) 15: (bf) r1 = r4 16: (bf) r2 = r10 17: (07) r2 += -4 18: (85) call bpf_map_lookup_elem#1 19: (55) if r0 != 0x0 goto pc+1 20: (95) exit 21: (77) r6 >>= 10 22: (27) r6 *= 8192 23: (bf) r1 = r0 24: (0f) r0 += r6 25: (79) r3 = *(u64 *)(r0 +0) 26: (7b) *(u64 *)(r1 +0) = r3 27: (95) exit The verifier treats this as safe, leading to oob read/write access due to an incorrect verifier conclusion: func#0 @0 0: R1=ctx(off=0,imm=0) R10=fp0 0: (b7) r6 = 1024 ; R6_w=1024 1: (b7) r7 = 0 ; R7_w=0 2: (b7) r8 = 0 ; R8_w=0 3: (b7) r9 = -2147483648 ; R9_w=-2147483648 4: (97) r6 %= 1025 ; R6_w=scalar() 5: (05) goto pc+0 6: (bd) if r6 <= r9 goto pc+2 ; R6_w=scalar(umin=18446744071562067969,var_off=(0xffffffff00000000; 0xffffffff)) R9_w=-2147483648 7: (97) r6 %= 1 ; R6_w=scalar() 8: (b7) r9 = 0 ; R9=0 9: (bd) if r6 <= r9 goto pc+1 ; R6=scalar(umin=1) R9=0 10: (b7) r6 = 0 ; R6_w=0 11: (b7) r0 = 0 ; R0_w=0 12: (63) *(u32 *)(r10 -4) = r0 last_idx 12 first_idx 9 regs=1 stack=0 before 11: (b7) r0 = 0 13: R0_w=0 R10=fp0 fp-8=0000???? 13: (18) r4 = 0xffff8ad3886c2a00 ; R4_w=map_ptr(off=0,ks=4,vs=48,imm=0) 15: (bf) r1 = r4 ; R1_w=map_ptr(off=0,ks=4,vs=48,imm=0) R4_w=map_ptr(off=0,ks=4,vs=48,imm=0) 16: (bf) r2 = r10 ; R2_w=fp0 R10=fp0 17: (07) r2 += -4 ; R2_w=fp-4 18: (85) call bpf_map_lookup_elem#1 ; R0=map_value_or_null(id=1,off=0,ks=4,vs=48,imm=0) 19: (55) if r0 != 0x0 goto pc+1 ; R0=0 20: (95) exit from 19 to 21: R0=map_value(off=0,ks=4,vs=48,imm=0) R6=0 R7=0 R8=0 R9=0 R10=fp0 fp-8=mmmm???? 21: (77) r6 >>= 10 ; R6_w=0 22: (27) r6 *= 8192 ; R6_w=0 23: (bf) r1 = r0 ; R0=map_value(off=0,ks=4,vs=48,imm=0) R1_w=map_value(off=0,ks=4,vs=48,imm=0) 24: (0f) r0 += r6 last_idx 24 first_idx 19 regs=40 stack=0 before 23: (bf) r1 = r0 regs=40 stack=0 before 22: (27) r6 *= 8192 regs=40 stack=0 before 21: (77) r6 >>= 10 regs=40 stack=0 before 19: (55) if r0 != 0x0 goto pc+1 parent didn't have regs=40 stack=0 marks: R0_rw=map_value_or_null(id=1,off=0,ks=4,vs=48,imm=0) R6_rw=P0 R7=0 R8=0 R9=0 R10=fp0 fp-8=mmmm???? last_idx 18 first_idx 9 regs=40 stack=0 before 18: (85) call bpf_map_lookup_elem#1 regs=40 stack=0 before 17: (07) r2 += -4 regs=40 stack=0 before 16: (bf) r2 = r10 regs=40 stack=0 before 15: (bf) r1 = r4 regs=40 stack=0 before 13: (18) r4 = 0xffff8ad3886c2a00 regs=40 stack=0 before 12: (63) *(u32 *)(r10 -4) = r0 regs=40 stack=0 before 11: (b7) r0 = 0 regs=40 stack=0 before 10: (b7) r6 = 0 25: (79) r3 = *(u64 *)(r0 +0) ; R0_w=map_value(off=0,ks=4,vs=48,imm=0) R3_w=scalar() 26: (7b) *(u64 *)(r1 +0) = r3 ; R1_w=map_value(off=0,ks=4,vs=48,imm=0) R3_w=scalar() 27: (95) exit from 9 to 11: R1=ctx(off=0,imm=0) R6=0 R7=0 R8=0 R9=0 R10=fp0 11: (b7) r0 = 0 ; R0_w=0 12: (63) *(u32 *)(r10 -4) = r0 last_idx 12 first_idx 11 regs=1 stack=0 before 11: (b7) r0 = 0 13: R0_w=0 R10=fp0 fp-8=0000???? 13: (18) r4 = 0xffff8ad3886c2a00 ; R4_w=map_ptr(off=0,ks=4,vs=48,imm=0) 15: (bf) r1 = r4 ; R1_w=map_ptr(off=0,ks=4,vs=48,imm=0) R4_w=map_ptr(off=0,ks=4,vs=48,imm=0) 16: (bf) r2 = r10 ; R2_w=fp0 R10=fp0 17: (07) r2 += -4 ; R2_w=fp-4 18: (85) call bpf_map_lookup_elem#1 frame 0: propagating r6 last_idx 19 first_idx 11 regs=40 stack=0 before 18: (85) call bpf_map_lookup_elem#1 regs=40 stack=0 before 17: (07) r2 += -4 regs=40 stack=0 before 16: (bf) r2 = r10 regs=40 stack=0 before 15: (bf) r1 = r4 regs=40 stack=0 before 13: (18) r4 = 0xffff8ad3886c2a00 regs=40 stack=0 before 12: (63) *(u32 *)(r10 -4) = r0 regs=40 stack=0 before 11: (b7) r0 = 0 parent didn't have regs=40 stack=0 marks: R1=ctx(off=0,imm=0) R6_r=P0 R7=0 R8=0 R9=0 R10=fp0 last_idx 9 first_idx 9 regs=40 stack=0 before 9: (bd) if r6 <= r9 goto pc+1 parent didn't have regs=40 stack=0 marks: R1=ctx(off=0,imm=0) R6_rw=Pscalar() R7_w=0 R8_w=0 R9_rw=0 R10=fp0 last_idx 8 first_idx 0 regs=40 stack=0 before 8: (b7) r9 = 0 regs=40 stack=0 before 7: (97) r6 %= 1 regs=40 stack=0 before 6: (bd) if r6 <= r9 goto pc+2 regs=40 stack=0 before 5: (05) goto pc+0 regs=40 stack=0 before 4: (97) r6 %= 1025 regs=40 stack=0 before 3: (b7) r9 = -2147483648 regs=40 stack=0 before 2: (b7) r8 = 0 regs=40 stack=0 before 1: (b7) r7 = 0 regs=40 stack=0 before 0: (b7) r6 = 1024 19: safe frame 0: propagating r6 last_idx 9 first_idx 0 regs=40 stack=0 before 6: (bd) if r6 <= r9 goto pc+2 regs=40 stack=0 before 5: (05) goto pc+0 regs=40 stack=0 before 4: (97) r6 %= 1025 regs=40 stack=0 before 3: (b7) r9 = -2147483648 regs=40 stack=0 before 2: (b7) r8 = 0 regs=40 stack=0 before 1: (b7) r7 = 0 regs=40 stack=0 before 0: (b7) r6 = 1024 from 6 to 9: safe verification time 110 usec stack depth 4 processed 36 insns (limit 1000000) max_states_per_insn 0 total_states 3 peak_states 3 mark_read 2 The verifier considers this program as safe by mistakenly pruning unsafe code paths. In the above func#0, code lines 0-10 are of interest. In line 0-3 registers r6 to r9 are initialized with known scalar values. In line 4 the register r6 is reset to an unknown scalar given the verifier does not track modulo operations. Due to this, the verifier can also not determine precisely which branches in line 6 and 9 are taken, therefore it needs to explore them both. As can be seen, the verifier starts with exploring the false/fall-through paths first. The 'from 19 to 21' path has both r6=0 and r9=0 and the pointer arithmetic on r0 += r6 is therefore considered safe. Given the arithmetic, r6 is correctly marked for precision tracking where backtracking kicks in where it walks back the current path all the way where r6 was set to 0 in the fall-through branch. Next, the pruning logics pops the path 'from 9 to 11' from the stack. Also here, the state of the registers is the same, that is, r6=0 and r9=0, so that at line 19 the path can be pruned as it is considered safe. It is interesting to note that the conditional in line 9 turned r6 into a more precise state, that is, in the fall-through path at the beginning of line 10, it is R6=scalar(umin=1), and in the branch-taken path (which is analyzed here) at the beginning of line 11, r6 turned into a known const r6=0 as r9=0 prior to that and therefore (unsigned) r6 <= 0 concludes that r6 must be 0 (**): [...] ; R6_w=scalar() 9: (bd) if r6 <= r9 goto pc+1 ; R6=scalar(umin=1) R9=0 [...] from 9 to 11: R1=ctx(off=0,imm=0) R6=0 R7=0 R8=0 R9=0 R10=fp0 [...] The next path is 'from 6 to 9'. The verifier considers the old and current state equivalent, and therefore prunes the search incorrectly. Looking into the two states which are being compared by the pruning logic at line 9, the old state consists of R6_rwD=Pscalar() R9_rwD=0 R10=fp0 and the new state consists of R1=ctx(off=0,imm=0) R6_w=scalar(umax=18446744071562067968) R7_w=0 R8_w=0 R9_w=-2147483648 R10=fp0. While r6 had the reg->precise flag correctly set in the old state, r9 did not. Both r6'es are considered as equivalent given the old one is a superset of the current, more precise one, however, r9's actual values (0 vs 0x80000000) mismatch. Given the old r9 did not have reg->precise flag set, the verifier does not consider the register as contributing to the precision state of r6, and therefore it considered both r9 states as equivalent. However, for this specific pruned path (which is also the actual path taken at runtime), register r6 will be 0x400 and r9 0x80000000 when reaching line 21, thus oob-accessing the map. The purpose of precision tracking is to initially mark registers (including spilled ones) as imprecise to help verifier's pruning logic finding equivalent states it can then prune if they don't contribute to the program's safety aspects. For example, if registers are used for pointer arithmetic or to pass constant length to a helper, then the verifier sets reg->precise flag and backtracks the BPF program instruction sequence and chain of verifier states to ensure that the given register or stack slot including their dependencies are marked as precisely tracked scalar. This also includes any other registers and slots that contribute to a tracked state of given registers/stack slot. This backtracking relies on recorded jmp_history and is able to traverse entire chain of parent states. This process ends only when all the necessary registers/slots and their transitive dependencies are marked as precise. The backtrack_insn() is called from the current instruction up to the first instruction, and its purpose is to compute a bitmask of registers and stack slots that need precision tracking in the parent's verifier state. For example, if a current instruction is r6 = r7, then r6 needs precision after this instruction and r7 needs precision before this instruction, that is, in the parent state. Hence for the latter r7 is marked and r6 unmarked. For the class of jmp/jmp32 instructions, backtrack_insn() today only looks at call and exit instructions and for all other conditionals the masks remain as-is. However, in the given situation register r6 has a dependency on r9 (as described above in **), so also that one needs to be marked for precision tracking. In other words, if an imprecise register influences a precise one, then the imprecise register should also be marked precise. Meaning, in the parent state both dest and src register need to be tracked for precision and therefore the marking must be more conservative by setting reg->precise flag for both. The precision propagation needs to cover both for the conditional: if the src reg was marked but not the dst reg and vice versa. After the fix the program is correctly rejected: func#0 @0 0: R1=ctx(off=0,imm=0) R10=fp0 0: (b7) r6 = 1024 ; R6_w=1024 1: (b7) r7 = 0 ; R7_w=0 2: (b7) r8 = 0 ; R8_w=0 3: (b7) r9 = -2147483648 ; R9_w=-2147483648 4: (97) r6 %= 1025 ; R6_w=scalar() 5: (05) goto pc+0 6: (bd) if r6 <= r9 goto pc+2 ; R6_w=scalar(umin=18446744071562067969,var_off=(0xffffffff80000000; 0x7fffffff),u32_min=-2147483648) R9_w=-2147483648 7: (97) r6 %= 1 ; R6_w=scalar() 8: (b7) r9 = 0 ; R9=0 9: (bd) if r6 <= r9 goto pc+1 ; R6=scalar(umin=1) R9=0 10: (b7) r6 = 0 ; R6_w=0 11: (b7) r0 = 0 ; R0_w=0 12: (63) *(u32 *)(r10 -4) = r0 last_idx 12 first_idx 9 regs=1 stack=0 before 11: (b7) r0 = 0 13: R0_w=0 R10=fp0 fp-8=0000???? 13: (18) r4 = 0xffff9290dc5bfe00 ; R4_w=map_ptr(off=0,ks=4,vs=48,imm=0) 15: (bf) r1 = r4 ; R1_w=map_ptr(off=0,ks=4,vs=48,imm=0) R4_w=map_ptr(off=0,ks=4,vs=48,imm=0) 16: (bf) r2 = r10 ; R2_w=fp0 R10=fp0 17: (07) r2 += -4 ; R2_w=fp-4 18: (85) call bpf_map_lookup_elem#1 ; R0=map_value_or_null(id=1,off=0,ks=4,vs=48,imm=0) 19: (55) if r0 != 0x0 goto pc+1 ; R0=0 20: (95) exit from 19 to 21: R0=map_value(off=0,ks=4,vs=48,imm=0) R6=0 R7=0 R8=0 R9=0 R10=fp0 fp-8=mmmm???? 21: (77) r6 >>= 10 ; R6_w=0 22: (27) r6 *= 8192 ; R6_w=0 23: (bf) r1 = r0 ; R0=map_value(off=0,ks=4,vs=48,imm=0) R1_w=map_value(off=0,ks=4,vs=48,imm=0) 24: (0f) r0 += r6 last_idx 24 first_idx 19 regs=40 stack=0 before 23: (bf) r1 = r0 regs=40 stack=0 before 22: (27) r6 *= 8192 regs=40 stack=0 before 21: (77) r6 >>= 10 regs=40 stack=0 before 19: (55) if r0 != 0x0 goto pc+1 parent didn't have regs=40 stack=0 marks: R0_rw=map_value_or_null(id=1,off=0,ks=4,vs=48,imm=0) R6_rw=P0 R7=0 R8=0 R9=0 R10=fp0 fp-8=mmmm???? last_idx 18 first_idx 9 regs=40 stack=0 before 18: (85) call bpf_map_lookup_elem#1 regs=40 stack=0 before 17: (07) r2 += -4 regs=40 stack=0 before 16: (bf) r2 = r10 regs=40 stack=0 before 15: (bf) r1 = r4 regs=40 stack=0 before 13: (18) r4 = 0xffff9290dc5bfe00 regs=40 stack=0 before 12: (63) *(u32 *)(r10 -4) = r0 regs=40 stack=0 before 11: (b7) r0 = 0 regs=40 stack=0 before 10: (b7) r6 = 0 25: (79) r3 = *(u64 *)(r0 +0) ; R0_w=map_value(off=0,ks=4,vs=48,imm=0) R3_w=scalar() 26: (7b) *(u64 *)(r1 +0) = r3 ; R1_w=map_value(off=0,ks=4,vs=48,imm=0) R3_w=scalar() 27: (95) exit from 9 to 11: R1=ctx(off=0,imm=0) R6=0 R7=0 R8=0 R9=0 R10=fp0 11: (b7) r0 = 0 ; R0_w=0 12: (63) *(u32 *)(r10 -4) = r0 last_idx 12 first_idx 11 regs=1 stack=0 before 11: (b7) r0 = 0 13: R0_w=0 R10=fp0 fp-8=0000???? 13: (18) r4 = 0xffff9290dc5bfe00 ; R4_w=map_ptr(off=0,ks=4,vs=48,imm=0) 15: (bf) r1 = r4 ; R1_w=map_ptr(off=0,ks=4,vs=48,imm=0) R4_w=map_ptr(off=0,ks=4,vs=48,imm=0) 16: (bf) r2 = r10 ; R2_w=fp0 R10=fp0 17: (07) r2 += -4 ; R2_w=fp-4 18: (85) call bpf_map_lookup_elem#1 frame 0: propagating r6 last_idx 19 first_idx 11 regs=40 stack=0 before 18: (85) call bpf_map_lookup_elem#1 regs=40 stack=0 before 17: (07) r2 += -4 regs=40 stack=0 before 16: (bf) r2 = r10 regs=40 stack=0 before 15: (bf) r1 = r4 regs=40 stack=0 before 13: (18) r4 = 0xffff9290dc5bfe00 regs=40 stack=0 before 12: (63) *(u32 *)(r10 -4) = r0 regs=40 stack=0 before 11: (b7) r0 = 0 parent didn't have regs=40 stack=0 marks: R1=ctx(off=0,imm=0) R6_r=P0 R7=0 R8=0 R9=0 R10=fp0 last_idx 9 first_idx 9 regs=40 stack=0 before 9: (bd) if r6 <= r9 goto pc+1 parent didn't have regs=240 stack=0 marks: R1=ctx(off=0,imm=0) R6_rw=Pscalar() R7_w=0 R8_w=0 R9_rw=P0 R10=fp0 last_idx 8 first_idx 0 regs=240 stack=0 before 8: (b7) r9 = 0 regs=40 stack=0 before 7: (97) r6 %= 1 regs=40 stack=0 before 6: (bd) if r6 <= r9 goto pc+2 regs=240 stack=0 before 5: (05) goto pc+0 regs=240 stack=0 before 4: (97) r6 %= 1025 regs=240 stack=0 before 3: (b7) r9 = -2147483648 regs=40 stack=0 before 2: (b7) r8 = 0 regs=40 stack=0 before 1: (b7) r7 = 0 regs=40 stack=0 before 0: (b7) r6 = 1024 19: safe from 6 to 9: R1=ctx(off=0,imm=0) R6_w=scalar(umax=18446744071562067968) R7_w=0 R8_w=0 R9_w=-2147483648 R10=fp0 9: (bd) if r6 <= r9 goto pc+1 last_idx 9 first_idx 0 regs=40 stack=0 before 6: (bd) if r6 <= r9 goto pc+2 regs=240 stack=0 before 5: (05) goto pc+0 regs=240 stack=0 before 4: (97) r6 %= 1025 regs=240 stack=0 before 3: (b7) r9 = -2147483648 regs=40 stack=0 before 2: (b7) r8 = 0 regs=40 stack=0 before 1: (b7) r7 = 0 regs=40 stack=0 before 0: (b7) r6 = 1024 last_idx 9 first_idx 0 regs=200 stack=0 before 6: (bd) if r6 <= r9 goto pc+2 regs=240 stack=0 before 5: (05) goto pc+0 regs=240 stack=0 before 4: (97) r6 %= 1025 regs=240 stack=0 before 3: (b7) r9 = -2147483648 regs=40 stack=0 before 2: (b7) r8 = 0 regs=40 stack=0 before 1: (b7) r7 = 0 regs=40 stack=0 before 0: (b7) r6 = 1024 11: R6=scalar(umax=18446744071562067968) R9=-2147483648 11: (b7) r0 = 0 ; R0_w=0 12: (63) *(u32 *)(r10 -4) = r0 last_idx 12 first_idx 11 regs=1 stack=0 before 11: (b7) r0 = 0 13: R0_w=0 R10=fp0 fp-8=0000???? 13: (18) r4 = 0xffff9290dc5bfe00 ; R4_w=map_ptr(off=0,ks=4,vs=48,imm=0) 15: (bf) r1 = r4 ; R1_w=map_ptr(off=0,ks=4,vs=48,imm=0) R4_w=map_ptr(off=0,ks=4,vs=48,imm=0) 16: (bf) r2 = r10 ; R2_w=fp0 R10=fp0 17: (07) r2 += -4 ; R2_w=fp-4 18: (85) call bpf_map_lookup_elem#1 ; R0_w=map_value_or_null(id=3,off=0,ks=4,vs=48,imm=0) 19: (55) if r0 != 0x0 goto pc+1 ; R0_w=0 20: (95) exit from 19 to 21: R0=map_value(off=0,ks=4,vs=48,imm=0) R6=scalar(umax=18446744071562067968) R7=0 R8=0 R9=-2147483648 R10=fp0 fp-8=mmmm???? 21: (77) r6 >>= 10 ; R6_w=scalar(umax=18014398507384832,var_off=(0x0; 0x3fffffffffffff)) 22: (27) r6 *= 8192 ; R6_w=scalar(smax=9223372036854767616,umax=18446744073709543424,var_off=(0x0; 0xffffffffffffe000),s32_max=2147475456,u32_max=-8192) 23: (bf) r1 = r0 ; R0=map_value(off=0,ks=4,vs=48,imm=0) R1_w=map_value(off=0,ks=4,vs=48,imm=0) 24: (0f) r0 += r6 last_idx 24 first_idx 21 regs=40 stack=0 before 23: (bf) r1 = r0 regs=40 stack=0 before 22: (27) r6 *= 8192 regs=40 stack=0 before 21: (77) r6 >>= 10 parent didn't have regs=40 stack=0 marks: R0_rw=map_value(off=0,ks=4,vs=48,imm=0) R6_r=Pscalar(umax=18446744071562067968) R7=0 R8=0 R9=-2147483648 R10=fp0 fp-8=mmmm???? last_idx 19 first_idx 11 regs=40 stack=0 before 19: (55) if r0 != 0x0 goto pc+1 regs=40 stack=0 before 18: (85) call bpf_map_lookup_elem#1 regs=40 stack=0 before 17: (07) r2 += -4 regs=40 stack=0 before 16: (bf) r2 = r10 regs=40 stack=0 before 15: (bf) r1 = r4 regs=40 stack=0 before 13: (18) r4 = 0xffff9290dc5bfe00 regs=40 stack=0 before 12: (63) *(u32 *)(r10 -4) = r0 regs=40 stack=0 before 11: (b7) r0 = 0 parent didn't have regs=40 stack=0 marks: R1=ctx(off=0,imm=0) R6_rw=Pscalar(umax=18446744071562067968) R7_w=0 R8_w=0 R9_w=-2147483648 R10=fp0 last_idx 9 first_idx 0 regs=40 stack=0 before 9: (bd) if r6 <= r9 goto pc+1 regs=240 stack=0 before 6: (bd) if r6 <= r9 goto pc+2 regs=240 stack=0 before 5: (05) goto pc+0 regs=240 stack=0 before 4: (97) r6 %= 1025 regs=240 stack=0 before 3: (b7) r9 = -2147483648 regs=40 stack=0 before 2: (b7) r8 = 0 regs=40 stack=0 before 1: (b7) r7 = 0 regs=40 stack=0 before 0: (b7) r6 = 1024 math between map_value pointer and register with unbounded min value is not allowed verification time 886 usec stack depth 4 processed 49 insns (limit 1000000) max_states_per_insn 1 total_states 5 peak_states 5 mark_read 2 Fixes: b5dc0163d8fd ("bpf: precise scalar_value tracking") Reported-by: Juan Jose Lopez Jaimez Reported-by: Meador Inge Reported-by: Simon Scannell Reported-by: Nenad Stojanovski Signed-off-by: Daniel Borkmann Co-developed-by: Andrii Nakryiko Signed-off-by: Andrii Nakryiko Reviewed-by: John Fastabend Reviewed-by: Juan Jose Lopez Jaimez Reviewed-by: Meador Inge Reviewed-by: Simon Scannell Signed-off-by: Sasha Levin Signed-off-by: Ywenrui --- kernel/bpf/verifier.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index edd5685f0153..61b8a2ce9600 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1923,6 +1923,21 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, } } else if (opcode == BPF_EXIT) { return -ENOTSUPP; + } else if (BPF_SRC(insn->code) == BPF_X) { + if (!(*reg_mask & (dreg | sreg))) + return 0; + /* dreg sreg + * Both dreg and sreg need precision before + * this insn. If only sreg was marked precise + * before it would be equally necessary to + * propagate it to dreg. + */ + *reg_mask |= (sreg | dreg); + /* else dreg K + * Only dreg still needs precision before + * this insn, so for the K-based conditional + * there is nothing new to be marked. + */ } } else if (class == BPF_LD) { if (!(*reg_mask & dreg)) -- Gitee From dfc13cbe92e307034e64ccf7db61c75a44e68bcf Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Tue, 5 Sep 2023 15:04:09 -0700 Subject: [PATCH 213/243] netfilter: ipset: add the missing IP_SET_HASH_WITH_NET0 macro for ip_set_hash_netportnet.c mainline inclusion from mainline-v6.6-rc1 commit 050d91c03b28ca479df13dfb02bcd2c60dd6a878 category: bugfix issue: #I84N7U CVE:CVE-2023-42753 Signed-off-by: Ywenrui00491 The missing IP_SET_HASH_WITH_NET0 macro in ip_set_hash_netportnet can lead to the use of wrong `CIDR_POS(c)` for calculating array offsets, which can lead to integer underflow. As a result, it leads to slab out-of-bound access. This patch adds back the IP_SET_HASH_WITH_NET0 macro to ip_set_hash_netportnet to address the issue. Fixes: 886503f34d63 ("netfilter: ipset: actually allow allowable CIDR 0 in hash:net,port,net") Suggested-by: Jozsef Kadlecsik Signed-off-by: Kyle Zeng Acked-by: Jozsef Kadlecsik Signed-off-by: Florian Westphal Signed-off-by: Lu Wei --- net/netfilter/ipset/ip_set_hash_netportnet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 934c1712cba8..0523fcf687e4 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -35,6 +35,7 @@ MODULE_ALIAS("ip_set_hash:net,port,net"); #define IP_SET_HASH_WITH_PROTO #define IP_SET_HASH_WITH_NETS #define IPSET_NET_COUNT 2 +#define IP_SET_HASH_WITH_NET0 /* IPv4 variant */ -- Gitee From ecfefa906c96d15b6e076ec840a03b2b6716c67f Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 16 Jun 2022 10:13:56 +0800 Subject: [PATCH 214/243] ext4: fix use-after-free in ext4_xattr_set_entry stable inclusion from stable-5.10.137 commit bb8592efcf8ef2f62947745d3182ea05b5256a15 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: lizongfeng --------------------------------------- commit 67d7d8ad99beccd9fe92d585b87f1760dc9018e3 upstream. Hulk Robot reported a issue: ================================================================== BUG: KASAN: use-after-free in ext4_xattr_set_entry+0x18ab/0x3500 Write of size 4105 at addr ffff8881675ef5f4 by task syz-executor.0/7092 CPU: 1 PID: 7092 Comm: syz-executor.0 Not tainted 4.19.90-dirty #17 Call Trace: [...] memcpy+0x34/0x50 mm/kasan/kasan.c:303 ext4_xattr_set_entry+0x18ab/0x3500 fs/ext4/xattr.c:1747 ext4_xattr_ibody_inline_set+0x86/0x2a0 fs/ext4/xattr.c:2205 ext4_xattr_set_handle+0x940/0x1300 fs/ext4/xattr.c:2386 ext4_xattr_set+0x1da/0x300 fs/ext4/xattr.c:2498 __vfs_setxattr+0x112/0x170 fs/xattr.c:149 __vfs_setxattr_noperm+0x11b/0x2a0 fs/xattr.c:180 __vfs_setxattr_locked+0x17b/0x250 fs/xattr.c:238 vfs_setxattr+0xed/0x270 fs/xattr.c:255 setxattr+0x235/0x330 fs/xattr.c:520 path_setxattr+0x176/0x190 fs/xattr.c:539 __do_sys_lsetxattr fs/xattr.c:561 [inline] __se_sys_lsetxattr fs/xattr.c:557 [inline] __x64_sys_lsetxattr+0xc2/0x160 fs/xattr.c:557 do_syscall_64+0xdf/0x530 arch/x86/entry/common.c:298 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x459fe9 RSP: 002b:00007fa5e54b4c08 EFLAGS: 00000246 ORIG_RAX: 00000000000000bd RAX: ffffffffffffffda RBX: 000000000051bf60 RCX: 0000000000459fe9 RDX: 00000000200003c0 RSI: 0000000020000180 RDI: 0000000020000140 RBP: 000000000051bf60 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000001009 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffc73c93fc0 R14: 000000000051bf60 R15: 00007fa5e54b4d80 [...] ================================================================== Above issue may happen as follows: ------------------------------------- ext4_xattr_set ext4_xattr_set_handle ext4_xattr_ibody_find >> s->end < s->base >> no EXT4_STATE_XATTR >> xattr_check_inode is not executed ext4_xattr_ibody_set ext4_xattr_set_entry >> size_t min_offs = s->end - s->base >> UAF in memcpy we can easily reproduce this problem with the following commands: mkfs.ext4 -F /dev/sda mount -o debug_want_extra_isize=128 /dev/sda /mnt touch /mnt/file setfattr -n user.cat -v `seq -s z 4096|tr -d '[:digit:]'` /mnt/file In ext4_xattr_ibody_find, we have the following assignment logic: header = IHDR(inode, raw_inode) = raw_inode + EXT4_GOOD_OLD_INODE_SIZE + i_extra_isize is->s.base = IFIRST(header) = header + sizeof(struct ext4_xattr_ibody_header) is->s.end = raw_inode + s_inode_size In ext4_xattr_set_entry min_offs = s->end - s->base = s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - i_extra_isize - sizeof(struct ext4_xattr_ibody_header) last = s->first free = min_offs - ((void *)last - s->base) - sizeof(__u32) = s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - i_extra_isize - sizeof(struct ext4_xattr_ibody_header) - sizeof(__u32) In the calculation formula, all values except s_inode_size and i_extra_size are fixed values. When i_extra_size is the maximum value s_inode_size - EXT4_GOOD_OLD_INODE_SIZE, min_offs is -4 and free is -8. The value overflows. As a result, the preceding issue is triggered when memcpy is executed. Therefore, when finding xattr or setting xattr, check whether there is space for storing xattr in the inode to resolve this issue. Cc: stable@kernel.org Reported-by: Hulk Robot Signed-off-by: Baokun Li Reviewed-by: Ritesh Harjani (IBM) Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220616021358.2504451-3-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: lizongfeng Signed-off-by: Ywenrui --- fs/ext4/xattr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index eab334636200..b5016eb7b373 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2170,8 +2170,9 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, struct ext4_inode *raw_inode; int error; - if (EXT4_I(inode)->i_extra_isize == 0) + if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) return 0; + raw_inode = ext4_raw_inode(&is->iloc); header = IHDR(inode, raw_inode); is->s.base = is->s.first = IFIRST(header); @@ -2199,8 +2200,9 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, struct ext4_xattr_search *s = &is->s; int error; - if (EXT4_I(inode)->i_extra_isize == 0) + if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) return -ENOSPC; + error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); if (error) return error; -- Gitee From eedd8d9162eea87dba2b5d49fc5801859610ab91 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 18 Oct 2023 13:56:54 +0200 Subject: [PATCH 215/243] perf: Disallow mis-matched inherited group reads stable inclusion from stable-v5.10.199 commit 487a8e24643a0effb2ba19cad3227fc75dc3c4b7 category: bugfix issue:#I8B1VU CVE:CVE-2023-5717 Signed-off-by: Ywenrui44091 --------------------------------------- commit 32671e3799ca2e4590773fd0e63aaa4229e50c06 upstream. Because group consistency is non-atomic between parent (filedesc) and children (inherited) events, it is possible for PERF_FORMAT_GROUP read() to try and sum non-matching counter groups -- with non-sensical results. Add group_generation to distinguish the case where a parent group removes and adds an event and thus has the same number, but a different configuration of events as inherited groups. This became a problem when commit fa8c269353d5 ("perf/core: Invert perf_read_group() loops") flipped the order of child_list and sibling_list. Previously it would iterate the group (sibling_list) first, and for each sibling traverse the child_list. In this order, only the group composition of the parent is relevant. By flipping the order the group composition of the child (inherited) events becomes an issue and the mis-match in group composition becomes evident. That said; even prior to this commit, while reading of a group that is not equally inherited was not broken, it still made no sense. (Ab)use ECHILD as error return to indicate issues with child process group composition. Fixes: fa8c269353d5 ("perf/core: Invert perf_read_group() loops") Reported-by: Budimir Markovic Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231018115654.GK33217@noisy.programming.kicks-ass.net Signed-off-by: Greg Kroah-Hartman Signed-off-by: Yang Jihong --- include/linux/perf_event.h | 1 + kernel/events/core.c | 39 ++++++++++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 67a50c78232f..03e454582bd3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -659,6 +659,7 @@ struct perf_event { /* The cumulative AND of all event_caps for events in this group. */ int group_caps; + unsigned int group_generation; struct perf_event *group_leader; struct pmu *pmu; void *pmu_private; diff --git a/kernel/events/core.c b/kernel/events/core.c index 3e7a0e996c7a..01238adfdb72 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2048,6 +2048,7 @@ static void perf_group_attach(struct perf_event *event) list_add_tail(&event->sibling_list, &group_leader->sibling_list); group_leader->nr_siblings++; + group_leader->group_generation++; perf_event__header_size(group_leader); @@ -2240,6 +2241,7 @@ static void perf_group_detach(struct perf_event *event) if (leader != event) { list_del_init(&event->sibling_list); event->group_leader->nr_siblings--; + event->group_leader->group_generation++; goto out; } @@ -5217,7 +5219,7 @@ static int __perf_read_group_add(struct perf_event *leader, u64 read_format, u64 *values) { struct perf_event_context *ctx = leader->ctx; - struct perf_event *sub; + struct perf_event *sub, *parent; unsigned long flags; int n = 1; /* skip @nr */ int ret; @@ -5227,6 +5229,33 @@ static int __perf_read_group_add(struct perf_event *leader, return ret; raw_spin_lock_irqsave(&ctx->lock, flags); + /* + * Verify the grouping between the parent and child (inherited) + * events is still in tact. + * + * Specifically: + * - leader->ctx->lock pins leader->sibling_list + * - parent->child_mutex pins parent->child_list + * - parent->ctx->mutex pins parent->sibling_list + * + * Because parent->ctx != leader->ctx (and child_list nests inside + * ctx->mutex), group destruction is not atomic between children, also + * see perf_event_release_kernel(). Additionally, parent can grow the + * group. + * + * Therefore it is possible to have parent and child groups in a + * different configuration and summing over such a beast makes no sense + * what so ever. + * + * Reject this. + */ + parent = leader->parent; + if (parent && + (parent->group_generation != leader->group_generation || + parent->nr_siblings != leader->nr_siblings)) { + ret = -ECHILD; + goto unlock; + } /* * Since we co-schedule groups, {enabled,running} times of siblings @@ -5256,8 +5285,9 @@ static int __perf_read_group_add(struct perf_event *leader, values[n++] = primary_event_id(sub); } +unlock: raw_spin_unlock_irqrestore(&ctx->lock, flags); - return 0; + return ret; } static int perf_read_group(struct perf_event *event, @@ -5276,10 +5306,6 @@ static int perf_read_group(struct perf_event *event, values[0] = 1 + leader->nr_siblings; - /* - * By locking the child_mutex of the leader we effectively - * lock the child list of all siblings.. XXX explain how. - */ mutex_lock(&leader->child_mutex); ret = __perf_read_group_add(leader, read_format, values); @@ -12786,6 +12812,7 @@ static int inherit_group(struct perf_event *parent_event, !perf_get_aux_event(child_ctr, leader)) return -EINVAL; } + leader->group_generation = parent_event->group_generation; return 0; } -- Gitee From 681dbe052e6d9489e2a19b74ca1d2ff377f8ab5f Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 4 Aug 2023 15:10:59 -0400 Subject: [PATCH 216/243] USB: core: Unite old scheme and new scheme descriptor reads stable inclusion from stable-v5.10.195 commit 3cef18d13f37c80160dc7302cdd25e8749821a2d category: bugfix issue: NA CVE:CVE-2023-37453 Signed-off-by: Ywenrui44091 --------------------------------------- commit 85d07c55621676d47d873d2749b88f783cd4d5a1 upstream. In preparation for reworking the usb_get_device_descriptor() routine, it is desirable to unite the two different code paths responsible for initially determining endpoint 0's maximum packet size in a newly discovered USB device. Making this determination presents a chicken-and-egg sort of problem, in that the only way to learn the maxpacket value is to get it from the device descriptor retrieved from the device, but communicating with the device to retrieve a descriptor requires us to know beforehand the ep0 maxpacket size. In practice this problem is solved in two different ways, referred to in hub.c as the "old scheme" and the "new scheme". The old scheme (which is the approach recommended by the USB-2 spec) involves asking the device to send just the first eight bytes of its device descriptor. Such a transfer uses packets containing no more than eight bytes each, and every USB device must have an ep0 maxpacket size >= 8, so this should succeed. Since the bMaxPacketSize0 field of the device descriptor lies within the first eight bytes, this is all we need. The new scheme is an imitation of the technique used in an early Windows USB implementation, giving it the happy advantage of working with a wide variety of devices (some of them at the time would not work with the old scheme, although that's probably less true now). It involves making an initial guess of the ep0 maxpacket size, asking the device to send up to 64 bytes worth of its device descriptor (which is only 18 bytes long), and then resetting the device to clear any error condition that might have resulted from the guess being wrong. The initial guess is determined by the connection speed; it should be correct in all cases other than full speed, for which the allowed values are 8, 16, 32, and 64 (in this case the initial guess is 64). The reason for this patch is that the old- and new-scheme parts of hub_port_init() use different code paths, one involving usb_get_device_descriptor() and one not, for their initial reads of the device descriptor. Since these reads have essentially the same purpose and are made under essentially the same circumstances, this is illogical. It makes more sense to have both of them use a common subroutine. This subroutine does basically what the new scheme's code did, because that approach is more general than the one used by the old scheme. It only needs to know how many bytes to transfer and whether or not it is being called for the first iteration of a retry loop (in case of certain time-out errors). There are two main differences from the former code: We initialize the bDescriptorType field of the transfer buffer to 0 before performing the transfer, to avoid possibly accessing an uninitialized value afterward. We read the device descriptor into a temporary buffer rather than storing it directly into udev->descriptor, which the old scheme implementation used to do. Since the whole point of this first read of the device descriptor is to determine the bMaxPacketSize0 value, that is what the new routine returns (or an error code). The value is stored in a local variable rather than in udev->descriptor. As a side effect, this necessitates moving a section of code that checks the bcdUSB field for SuperSpeed devices until after the full device descriptor has been retrieved. Signed-off-by: Alan Stern Cc: Oliver Neukum Link: https://lore.kernel.org/r/495cb5d4-f956-4f4a-a875-1e67e9489510@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Lin Yujun --- drivers/usb/core/hub.c | 173 ++++++++++++++++++++++------------------- 1 file changed, 94 insertions(+), 79 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 18ee3914b468..7d32ab77c088 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4592,6 +4592,67 @@ static int hub_enable_device(struct usb_device *udev) return hcd->driver->enable_device(hcd, udev); } +/* + * Get the bMaxPacketSize0 value during initialization by reading the + * device's device descriptor. Since we don't already know this value, + * the transfer is unsafe and it ignores I/O errors, only testing for + * reasonable received values. + * + * For "old scheme" initialization, size will be 8 so we read just the + * start of the device descriptor, which should work okay regardless of + * the actual bMaxPacketSize0 value. For "new scheme" initialization, + * size will be 64 (and buf will point to a sufficiently large buffer), + * which might not be kosher according to the USB spec but it's what + * Windows does and what many devices expect. + * + * Returns: bMaxPacketSize0 or a negative error code. + */ +static int get_bMaxPacketSize0(struct usb_device *udev, + struct usb_device_descriptor *buf, int size, bool first_time) +{ + int i, rc; + + /* + * Retry on all errors; some devices are flakey. + * 255 is for WUSB devices, we actually need to use + * 512 (WUSB1.0[4.8.1]). + */ + for (i = 0; i < GET_MAXPACKET0_TRIES; ++i) { + /* Start with invalid values in case the transfer fails */ + buf->bDescriptorType = buf->bMaxPacketSize0 = 0; + rc = usb_control_msg(udev, usb_rcvaddr0pipe(), + USB_REQ_GET_DESCRIPTOR, USB_DIR_IN, + USB_DT_DEVICE << 8, 0, + buf, size, + initial_descriptor_timeout); + switch (buf->bMaxPacketSize0) { + case 8: case 16: case 32: case 64: case 255: + if (buf->bDescriptorType == USB_DT_DEVICE) { + rc = buf->bMaxPacketSize0; + break; + } + fallthrough; + default: + if (rc >= 0) + rc = -EPROTO; + break; + } + + /* + * Some devices time out if they are powered on + * when already connected. They need a second + * reset, so return early. But only on the first + * attempt, lest we get into a time-out/reset loop. + */ + if (rc > 0 || (rc == -ETIMEDOUT && first_time && + udev->speed > USB_SPEED_FULL)) + break; + } + return rc; +} + +#define GET_DESCRIPTOR_BUFSIZE 64 + /* Reset device, (re)assign address, get device descriptor. * Device connection must be stable, no more debouncing needed. * Returns device in USB_STATE_ADDRESS, except on error. @@ -4616,6 +4677,12 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, int devnum = udev->devnum; const char *driver_name; bool do_new_scheme; + int maxp0; + struct usb_device_descriptor *buf; + + buf = kmalloc(GET_DESCRIPTOR_BUFSIZE, GFP_NOIO); + if (!buf) + return -ENOMEM; /* root hub ports have a slightly longer reset period * (from USB 2.0 spec, section 7.1.7.5) @@ -4730,9 +4797,6 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, for (retries = 0; retries < GET_DESCRIPTOR_TRIES; (++retries, msleep(100))) { if (do_new_scheme) { - struct usb_device_descriptor *buf; - int r = 0; - retval = hub_enable_device(udev); if (retval < 0) { dev_err(&udev->dev, @@ -4741,52 +4805,8 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, goto fail; } -#define GET_DESCRIPTOR_BUFSIZE 64 - buf = kmalloc(GET_DESCRIPTOR_BUFSIZE, GFP_NOIO); - if (!buf) { - retval = -ENOMEM; - continue; - } - - /* Retry on all errors; some devices are flakey. - * 255 is for WUSB devices, we actually need to use - * 512 (WUSB1.0[4.8.1]). - */ - for (operations = 0; operations < GET_MAXPACKET0_TRIES; - ++operations) { - buf->bMaxPacketSize0 = 0; - r = usb_control_msg(udev, usb_rcvaddr0pipe(), - USB_REQ_GET_DESCRIPTOR, USB_DIR_IN, - USB_DT_DEVICE << 8, 0, - buf, GET_DESCRIPTOR_BUFSIZE, - initial_descriptor_timeout); - switch (buf->bMaxPacketSize0) { - case 8: case 16: case 32: case 64: case 255: - if (buf->bDescriptorType == - USB_DT_DEVICE) { - r = 0; - break; - } - fallthrough; - default: - if (r == 0) - r = -EPROTO; - break; - } - /* - * Some devices time out if they are powered on - * when already connected. They need a second - * reset. But only on the first attempt, - * lest we get into a time out/reset loop - */ - if (r == 0 || (r == -ETIMEDOUT && - retries == 0 && - udev->speed > USB_SPEED_FULL)) - break; - } - udev->descriptor.bMaxPacketSize0 = - buf->bMaxPacketSize0; - kfree(buf); + maxp0 = get_bMaxPacketSize0(udev, buf, + GET_DESCRIPTOR_BUFSIZE, retries == 0); retval = hub_port_reset(hub, port1, udev, delay, false); if (retval < 0) /* error or disconnect */ @@ -4797,14 +4817,13 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, retval = -ENODEV; goto fail; } - if (r) { - if (r != -ENODEV) + if (maxp0 < 0) { + if (maxp0 != -ENODEV) dev_err(&udev->dev, "device descriptor read/64, error %d\n", - r); - retval = -EMSGSIZE; + maxp0); + retval = maxp0; continue; } -#undef GET_DESCRIPTOR_BUFSIZE } /* @@ -4846,19 +4865,17 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, break; } - retval = usb_get_device_descriptor(udev, 8); - if (retval < 8) { + /* !do_new_scheme || wusb */ + maxp0 = get_bMaxPacketSize0(udev, buf, 8, retries == 0); + if (maxp0 < 0) { + retval = maxp0; if (retval != -ENODEV) dev_err(&udev->dev, "device descriptor read/8, error %d\n", retval); - if (retval >= 0) - retval = -EMSGSIZE; } else { u32 delay; - retval = 0; - delay = udev->parent->hub_delay; udev->hub_delay = min_t(u32, delay, USB_TP_TRANSMISSION_DELAY_MAX); @@ -4875,27 +4892,10 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, if (retval) goto fail; - /* - * Some superspeed devices have finished the link training process - * and attached to a superspeed hub port, but the device descriptor - * got from those devices show they aren't superspeed devices. Warm - * reset the port attached by the devices can fix them. - */ - if ((udev->speed >= USB_SPEED_SUPER) && - (le16_to_cpu(udev->descriptor.bcdUSB) < 0x0300)) { - dev_err(&udev->dev, "got a wrong device descriptor, " - "warm reset device\n"); - hub_port_reset(hub, port1, udev, - HUB_BH_RESET_TIME, true); - retval = -EINVAL; - goto fail; - } - - if (udev->descriptor.bMaxPacketSize0 == 0xff || - udev->speed >= USB_SPEED_SUPER) + if (maxp0 == 0xff || udev->speed >= USB_SPEED_SUPER) i = 512; else - i = udev->descriptor.bMaxPacketSize0; + i = maxp0; if (usb_endpoint_maxp(&udev->ep0.desc) != i) { if (udev->speed == USB_SPEED_LOW || !(i == 8 || i == 16 || i == 32 || i == 64)) { @@ -4921,6 +4921,20 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, goto fail; } + /* + * Some superspeed devices have finished the link training process + * and attached to a superspeed hub port, but the device descriptor + * got from those devices show they aren't superspeed devices. Warm + * reset the port attached by the devices can fix them. + */ + if ((udev->speed >= USB_SPEED_SUPER) && + (le16_to_cpu(udev->descriptor.bcdUSB) < 0x0300)) { + dev_err(&udev->dev, "got a wrong device descriptor, warm reset device\n"); + hub_port_reset(hub, port1, udev, HUB_BH_RESET_TIME, true); + retval = -EINVAL; + goto fail; + } + usb_detect_quirks(udev); if (udev->wusb == 0 && le16_to_cpu(udev->descriptor.bcdUSB) >= 0x0201) { @@ -4941,6 +4955,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, hub_port_disable(hub, port1, 0); update_devnum(udev, devnum); /* for disconnect processing */ } + kfree(buf); return retval; } -- Gitee From eb32a0cbc8f49b08bfa68dd85f677cf1c460ceab Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 4 Aug 2023 15:12:21 -0400 Subject: [PATCH 217/243] USB: core: Change usb_get_device_descriptor() API stable inclusion from stable-v5.10.195 commit 6ceffc2ecf3de8acdce2202db1c32d8c520a230e category: bugfix issue: NA CVE:CVE-2023-37453 Signed-off-by: Ywenrui44091 --------------------------------------- commit de28e469da75359a2bb8cd8778b78aa64b1be1f4 upstream. The usb_get_device_descriptor() routine reads the device descriptor from the udev device and stores it directly in udev->descriptor. This interface is error prone, because the USB subsystem expects in-memory copies of a device's descriptors to be immutable once the device has been initialized. The interface is changed so that the device descriptor is left in a kmalloc-ed buffer, not copied into the usb_device structure. A pointer to the buffer is returned to the caller, who is then responsible for kfree-ing it. The corresponding changes needed in the various callers are fairly small. Signed-off-by: Alan Stern Link: https://lore.kernel.org/r/d0111bb6-56c1-4f90-adf2-6cfe152f6561@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Lin Yujun --- drivers/usb/core/hcd.c | 10 ++++++--- drivers/usb/core/hub.c | 44 ++++++++++++++++++++------------------ drivers/usb/core/message.c | 29 +++++++++++-------------- drivers/usb/core/usb.h | 4 ++-- 4 files changed, 44 insertions(+), 43 deletions(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index ddd1d3eef912..ed654fd0d352 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -982,6 +982,7 @@ static int register_root_hub(struct usb_hcd *hcd) { struct device *parent_dev = hcd->self.controller; struct usb_device *usb_dev = hcd->self.root_hub; + struct usb_device_descriptor *descr; const int devnum = 1; int retval; @@ -993,13 +994,16 @@ static int register_root_hub(struct usb_hcd *hcd) mutex_lock(&usb_bus_idr_lock); usb_dev->ep0.desc.wMaxPacketSize = cpu_to_le16(64); - retval = usb_get_device_descriptor(usb_dev, USB_DT_DEVICE_SIZE); - if (retval != sizeof usb_dev->descriptor) { + descr = usb_get_device_descriptor(usb_dev); + if (IS_ERR(descr)) { + retval = PTR_ERR(descr); mutex_unlock(&usb_bus_idr_lock); dev_dbg (parent_dev, "can't read %s device descriptor %d\n", dev_name(&usb_dev->dev), retval); - return (retval < 0) ? retval : -EMSGSIZE; + return retval; } + usb_dev->descriptor = *descr; + kfree(descr); if (le16_to_cpu(usb_dev->descriptor.bcdUSB) >= 0x0201) { retval = usb_get_bos_descriptor(usb_dev); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 7d32ab77c088..52aad0a59ecd 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2644,12 +2644,17 @@ int usb_authorize_device(struct usb_device *usb_dev) } if (usb_dev->wusb) { - result = usb_get_device_descriptor(usb_dev, sizeof(usb_dev->descriptor)); - if (result < 0) { + struct usb_device_descriptor *descr; + + descr = usb_get_device_descriptor(usb_dev); + if (IS_ERR(descr)) { + result = PTR_ERR(descr); dev_err(&usb_dev->dev, "can't re-read device descriptor for " "authorization: %d\n", result); goto error_device_descriptor; } + usb_dev->descriptor = *descr; + kfree(descr); } usb_dev->authorized = 1; @@ -4678,7 +4683,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, const char *driver_name; bool do_new_scheme; int maxp0; - struct usb_device_descriptor *buf; + struct usb_device_descriptor *buf, *descr; buf = kmalloc(GET_DESCRIPTOR_BUFSIZE, GFP_NOIO); if (!buf) @@ -4911,15 +4916,16 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, usb_ep0_reinit(udev); } - retval = usb_get_device_descriptor(udev, USB_DT_DEVICE_SIZE); - if (retval < (signed)sizeof(udev->descriptor)) { + descr = usb_get_device_descriptor(udev); + if (IS_ERR(descr)) { + retval = PTR_ERR(descr); if (retval != -ENODEV) dev_err(&udev->dev, "device descriptor read/all, error %d\n", retval); - if (retval >= 0) - retval = -ENOMSG; goto fail; } + udev->descriptor = *descr; + kfree(descr); /* * Some superspeed devices have finished the link training process @@ -5036,7 +5042,7 @@ hub_power_remaining(struct usb_hub *hub) static int descriptors_changed(struct usb_device *udev, - struct usb_device_descriptor *old_device_descriptor, + struct usb_device_descriptor *new_device_descriptor, struct usb_host_bos *old_bos) { int changed = 0; @@ -5047,8 +5053,8 @@ static int descriptors_changed(struct usb_device *udev, int length; char *buf; - if (memcmp(&udev->descriptor, old_device_descriptor, - sizeof(*old_device_descriptor)) != 0) + if (memcmp(&udev->descriptor, new_device_descriptor, + sizeof(*new_device_descriptor)) != 0) return 1; if ((old_bos && !udev->bos) || (!old_bos && udev->bos)) @@ -5368,9 +5374,8 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1, { struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *udev = port_dev->child; - struct usb_device_descriptor descriptor; + struct usb_device_descriptor *descr; int status = -ENODEV; - int retval; dev_dbg(&port_dev->dev, "status %04x, change %04x, %s\n", portstatus, portchange, portspeed(hub, portstatus)); @@ -5397,23 +5402,20 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1, * changed device descriptors before resuscitating the * device. */ - descriptor = udev->descriptor; - retval = usb_get_device_descriptor(udev, - sizeof(udev->descriptor)); - if (retval < 0) { + descr = usb_get_device_descriptor(udev); + if (IS_ERR(descr)) { dev_dbg(&udev->dev, - "can't read device descriptor %d\n", - retval); + "can't read device descriptor %ld\n", + PTR_ERR(descr)); } else { - if (descriptors_changed(udev, &descriptor, + if (descriptors_changed(udev, descr, udev->bos)) { dev_dbg(&udev->dev, "device descriptor has changed\n"); - /* for disconnect() calls */ - udev->descriptor = descriptor; } else { status = 0; /* Nothing to do */ } + kfree(descr); } #ifdef CONFIG_PM } else if (udev->state == USB_STATE_SUSPENDED && diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index dba2baca486e..d64aaff223e7 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1039,39 +1039,34 @@ char *usb_cache_string(struct usb_device *udev, int index) } /* - * usb_get_device_descriptor - (re)reads the device descriptor (usbcore) - * @dev: the device whose device descriptor is being updated - * @size: how much of the descriptor to read + * usb_get_device_descriptor - read the device descriptor + * @udev: the device whose device descriptor should be read * Context: !in_interrupt () * - * Updates the copy of the device descriptor stored in the device structure, - * which dedicates space for this purpose. - * * Not exported, only for use by the core. If drivers really want to read * the device descriptor directly, they can call usb_get_descriptor() with * type = USB_DT_DEVICE and index = 0. * - * This call is synchronous, and may not be used in an interrupt context. - * - * Return: The number of bytes received on success, or else the status code - * returned by the underlying usb_control_msg() call. + * Returns: a pointer to a dynamically allocated usb_device_descriptor + * structure (which the caller must deallocate), or an ERR_PTR value. */ -int usb_get_device_descriptor(struct usb_device *dev, unsigned int size) +struct usb_device_descriptor *usb_get_device_descriptor(struct usb_device *udev) { struct usb_device_descriptor *desc; int ret; - if (size > sizeof(*desc)) - return -EINVAL; desc = kmalloc(sizeof(*desc), GFP_NOIO); if (!desc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); + + ret = usb_get_descriptor(udev, USB_DT_DEVICE, 0, desc, sizeof(*desc)); + if (ret == sizeof(*desc)) + return desc; - ret = usb_get_descriptor(dev, USB_DT_DEVICE, 0, desc, size); if (ret >= 0) - memcpy(&dev->descriptor, desc, size); + ret = -EMSGSIZE; kfree(desc); - return ret; + return ERR_PTR(ret); } /* diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index 82538daac8b8..3bb2e1db42b5 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -42,8 +42,8 @@ extern bool usb_endpoint_is_ignored(struct usb_device *udev, struct usb_endpoint_descriptor *epd); extern int usb_remove_device(struct usb_device *udev); -extern int usb_get_device_descriptor(struct usb_device *dev, - unsigned int size); +extern struct usb_device_descriptor *usb_get_device_descriptor( + struct usb_device *udev); extern int usb_set_isoch_delay(struct usb_device *dev); extern int usb_get_bos_descriptor(struct usb_device *dev); extern void usb_release_bos_descriptor(struct usb_device *dev); -- Gitee From dc5eb2880599a35e7d915aaff053acbf3e0c4792 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 4 Aug 2023 15:14:14 -0400 Subject: [PATCH 218/243] USB: core: Fix race by not overwriting udev->descriptor in hub_port_init() stable inclusion from stable-v5.10.195 commit 9d241c5d9a9b7ad95c90c6520272fe404d5ac88f category: bugfix issue: NA CVE:CVE-2023-37453 Signed-off-by: Ywenrui44091 --------------------------------------- commit ff33299ec8bb80cdcc073ad9c506bd79bb2ed20b upstream. Syzbot reported an out-of-bounds read in sysfs.c:read_descriptors(): BUG: KASAN: slab-out-of-bounds in read_descriptors+0x263/0x280 drivers/usb/core/sysfs.c:883 Read of size 8 at addr ffff88801e78b8c8 by task udevd/5011 CPU: 0 PID: 5011 Comm: udevd Not tainted 6.4.0-rc6-syzkaller-00195-g40f71e7cd3c6 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x150 lib/dump_stack.c:106 print_address_description.constprop.0+0x2c/0x3c0 mm/kasan/report.c:351 print_report mm/kasan/report.c:462 [inline] kasan_report+0x11c/0x130 mm/kasan/report.c:572 read_descriptors+0x263/0x280 drivers/usb/core/sysfs.c:883 ... Allocated by task 758: ... __do_kmalloc_node mm/slab_common.c:966 [inline] __kmalloc+0x5e/0x190 mm/slab_common.c:979 kmalloc include/linux/slab.h:563 [inline] kzalloc include/linux/slab.h:680 [inline] usb_get_configuration+0x1f7/0x5170 drivers/usb/core/config.c:887 usb_enumerate_device drivers/usb/core/hub.c:2407 [inline] usb_new_device+0x12b0/0x19d0 drivers/usb/core/hub.c:2545 As analyzed by Khazhy Kumykov, the cause of this bug is a race between read_descriptors() and hub_port_init(): The first routine uses a field in udev->descriptor, not expecting it to change, while the second overwrites it. Prior to commit 45bf39f8df7f ("USB: core: Don't hold device lock while reading the "descriptors" sysfs file") this race couldn't occur, because the routines were mutually exclusive thanks to the device locking. Removing that locking from read_descriptors() exposed it to the race. The best way to fix the bug is to keep hub_port_init() from changing udev->descriptor once udev has been initialized and registered. Drivers expect the descriptors stored in the kernel to be immutable; we should not undermine this expectation. In fact, this change should have been made long ago. So now hub_port_init() will take an additional argument, specifying a buffer in which to store the device descriptor it reads. (If udev has not yet been initialized, the buffer pointer will be NULL and then hub_port_init() will store the device descriptor in udev as before.) This eliminates the data race responsible for the out-of-bounds read. The changes to hub_port_init() appear more extensive than they really are, because of indentation changes resulting from an attempt to avoid writing to other parts of the usb_device structure after it has been initialized. Similar changes should be made to the code that reads the BOS descriptor, but that can be handled in a separate patch later on. This patch is sufficient to fix the bug found by syzbot. Reported-and-tested-by: syzbot+18996170f8096c6174d0@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-usb/000000000000c0ffe505fe86c9ca@google.com/#r Signed-off-by: Alan Stern Cc: Khazhy Kumykov Fixes: 45bf39f8df7f ("USB: core: Don't hold device lock while reading the "descriptors" sysfs file") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/b958b47a-9a46-4c22-a9f9-e42e42c31251@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Lin Yujun --- drivers/usb/core/hub.c | 114 +++++++++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 44 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 52aad0a59ecd..60e027e87b9e 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4667,10 +4667,17 @@ static int get_bMaxPacketSize0(struct usb_device *udev, * the port lock. For a newly detected device that is not accessible * through any global pointers, it's not necessary to lock the device, * but it is still necessary to lock the port. + * + * For a newly detected device, @dev_descr must be NULL. The device + * descriptor retrieved from the device will then be stored in + * @udev->descriptor. For an already existing device, @dev_descr + * must be non-NULL. The device descriptor will be stored there, + * not in @udev->descriptor, because descriptors for registered + * devices are meant to be immutable. */ static int hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, - int retry_counter) + int retry_counter, struct usb_device_descriptor *dev_descr) { struct usb_device *hdev = hub->hdev; struct usb_hcd *hcd = bus_to_hcd(hdev->bus); @@ -4682,6 +4689,7 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, int devnum = udev->devnum; const char *driver_name; bool do_new_scheme; + const bool initial = !dev_descr; int maxp0; struct usb_device_descriptor *buf, *descr; @@ -4720,32 +4728,34 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, } oldspeed = udev->speed; - /* USB 2.0 section 5.5.3 talks about ep0 maxpacket ... - * it's fixed size except for full speed devices. - * For Wireless USB devices, ep0 max packet is always 512 (tho - * reported as 0xff in the device descriptor). WUSB1.0[4.8.1]. - */ - switch (udev->speed) { - case USB_SPEED_SUPER_PLUS: - case USB_SPEED_SUPER: - case USB_SPEED_WIRELESS: /* fixed at 512 */ - udev->ep0.desc.wMaxPacketSize = cpu_to_le16(512); - break; - case USB_SPEED_HIGH: /* fixed at 64 */ - udev->ep0.desc.wMaxPacketSize = cpu_to_le16(64); - break; - case USB_SPEED_FULL: /* 8, 16, 32, or 64 */ - /* to determine the ep0 maxpacket size, try to read - * the device descriptor to get bMaxPacketSize0 and - * then correct our initial guess. + if (initial) { + /* USB 2.0 section 5.5.3 talks about ep0 maxpacket ... + * it's fixed size except for full speed devices. + * For Wireless USB devices, ep0 max packet is always 512 (tho + * reported as 0xff in the device descriptor). WUSB1.0[4.8.1]. */ - udev->ep0.desc.wMaxPacketSize = cpu_to_le16(64); - break; - case USB_SPEED_LOW: /* fixed at 8 */ - udev->ep0.desc.wMaxPacketSize = cpu_to_le16(8); - break; - default: - goto fail; + switch (udev->speed) { + case USB_SPEED_SUPER_PLUS: + case USB_SPEED_SUPER: + case USB_SPEED_WIRELESS: /* fixed at 512 */ + udev->ep0.desc.wMaxPacketSize = cpu_to_le16(512); + break; + case USB_SPEED_HIGH: /* fixed at 64 */ + udev->ep0.desc.wMaxPacketSize = cpu_to_le16(64); + break; + case USB_SPEED_FULL: /* 8, 16, 32, or 64 */ + /* to determine the ep0 maxpacket size, try to read + * the device descriptor to get bMaxPacketSize0 and + * then correct our initial guess. + */ + udev->ep0.desc.wMaxPacketSize = cpu_to_le16(64); + break; + case USB_SPEED_LOW: /* fixed at 8 */ + udev->ep0.desc.wMaxPacketSize = cpu_to_le16(8); + break; + default: + goto fail; + } } if (udev->speed == USB_SPEED_WIRELESS) @@ -4768,22 +4778,24 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, if (udev->speed < USB_SPEED_SUPER) dev_info(&udev->dev, "%s %s USB device number %d using %s\n", - (udev->config) ? "reset" : "new", speed, + (initial ? "new" : "reset"), speed, devnum, driver_name); - /* Set up TT records, if needed */ - if (hdev->tt) { - udev->tt = hdev->tt; - udev->ttport = hdev->ttport; - } else if (udev->speed != USB_SPEED_HIGH - && hdev->speed == USB_SPEED_HIGH) { - if (!hub->tt.hub) { - dev_err(&udev->dev, "parent hub has no TT\n"); - retval = -EINVAL; - goto fail; + if (initial) { + /* Set up TT records, if needed */ + if (hdev->tt) { + udev->tt = hdev->tt; + udev->ttport = hdev->ttport; + } else if (udev->speed != USB_SPEED_HIGH + && hdev->speed == USB_SPEED_HIGH) { + if (!hub->tt.hub) { + dev_err(&udev->dev, "parent hub has no TT\n"); + retval = -EINVAL; + goto fail; + } + udev->tt = &hub->tt; + udev->ttport = port1; } - udev->tt = &hub->tt; - udev->ttport = port1; } /* Why interleave GET_DESCRIPTOR and SET_ADDRESS this way? @@ -4812,6 +4824,12 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, maxp0 = get_bMaxPacketSize0(udev, buf, GET_DESCRIPTOR_BUFSIZE, retries == 0); + if (maxp0 > 0 && !initial && + maxp0 != udev->descriptor.bMaxPacketSize0) { + dev_err(&udev->dev, "device reset changed ep0 maxpacket size!\n"); + retval = -ENODEV; + goto fail; + } retval = hub_port_reset(hub, port1, udev, delay, false); if (retval < 0) /* error or disconnect */ @@ -4881,6 +4899,12 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, } else { u32 delay; + if (!initial && maxp0 != udev->descriptor.bMaxPacketSize0) { + dev_err(&udev->dev, "device reset changed ep0 maxpacket size!\n"); + retval = -ENODEV; + goto fail; + } + delay = udev->parent->hub_delay; udev->hub_delay = min_t(u32, delay, USB_TP_TRANSMISSION_DELAY_MAX); @@ -4924,7 +4948,10 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, retval); goto fail; } - udev->descriptor = *descr; + if (initial) + udev->descriptor = *descr; + else + *dev_descr = *descr; kfree(descr); /* @@ -5227,7 +5254,7 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, } /* reset (non-USB 3.0 devices) and get descriptor */ - status = hub_port_init(hub, udev, port1, i); + status = hub_port_init(hub, udev, port1, i, NULL); if (status < 0) goto loop; @@ -5833,7 +5860,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) struct usb_device *parent_hdev = udev->parent; struct usb_hub *parent_hub; struct usb_hcd *hcd = bus_to_hcd(udev->bus); - struct usb_device_descriptor descriptor = udev->descriptor; + struct usb_device_descriptor descriptor; struct usb_host_bos *bos; int i, j, ret = 0; int port1 = udev->portnum; @@ -5875,7 +5902,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) /* ep0 maxpacket size may change; let the HCD know about it. * Other endpoints will be handled by re-enumeration. */ usb_ep0_reinit(udev); - ret = hub_port_init(parent_hub, udev, port1, i); + ret = hub_port_init(parent_hub, udev, port1, i, &descriptor); if (ret >= 0 || ret == -ENOTCONN || ret == -ENODEV) break; } @@ -5887,7 +5914,6 @@ static int usb_reset_and_verify_device(struct usb_device *udev) /* Device might have changed firmware (DFU or similar) */ if (descriptors_changed(udev, &descriptor, bos)) { dev_info(&udev->dev, "device firmware changed\n"); - udev->descriptor = descriptor; /* for disconnect() calls */ goto re_enumerate; } -- Gitee From 78158bda616a1e60f9ab1b95a0aef834b6d9888a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 11 Aug 2023 13:38:46 -0400 Subject: [PATCH 219/243] USB: core: Fix oversight in SuperSpeed initialization stable inclusion from stable-v5.10.195 commit beba5051dd349dd0ea25c591da0f576bada09865 category: bugfix issue: NA CVE:CVE-2023-37453 Signed-off-by: Ywenrui44091 --------------------------------------- commit 59cf445754566984fd55af19ba7146c76e6627bc upstream. Commit 85d07c556216 ("USB: core: Unite old scheme and new scheme descriptor reads") altered the way USB devices are enumerated following detection, and in the process it messed up the initialization of SuperSpeed (or faster) devices: [ 31.650759] usb 2-1: new SuperSpeed Plus Gen 2x1 USB device number 2 using xhci_hcd [ 31.663107] usb 2-1: device descriptor read/8, error -71 [ 31.952697] usb 2-1: new SuperSpeed Plus Gen 2x1 USB device number 3 using xhci_hcd [ 31.965122] usb 2-1: device descriptor read/8, error -71 [ 32.080991] usb usb2-port1: attempt power cycle ... The problem was caused by the commit forgetting that in SuperSpeed or faster devices, the device descriptor uses a logarithmic encoding of the bMaxPacketSize0 value. (For some reason I thought the 255 case in the switch statement was meant for these devices, but it isn't -- it was meant for Wireless USB and is no longer needed.) We can fix the oversight by testing for buf->bMaxPacketSize0 = 9 (meaning 512, the actual maxpacket size for ep0 on all SuperSpeed devices) and straightening out the logic that checks and adjusts our initial guesses of the maxpacket value. Reported-and-tested-by: Thinh Nguyen Closes: https://lore.kernel.org/linux-usb/20230810002257.nadxmfmrobkaxgnz@synopsys.com/ Signed-off-by: Alan Stern Fixes: 85d07c556216 ("USB: core: Unite old scheme and new scheme descriptor reads") Link: https://lore.kernel.org/r/8809e6c5-59d5-4d2d-ac8f-6d106658ad73@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman Signed-off-by: Lin Yujun --- drivers/usb/core/hub.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 60e027e87b9e..585a0fea3cd8 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4631,7 +4631,7 @@ static int get_bMaxPacketSize0(struct usb_device *udev, buf, size, initial_descriptor_timeout); switch (buf->bMaxPacketSize0) { - case 8: case 16: case 32: case 64: case 255: + case 8: case 16: case 32: case 64: case 9: if (buf->bDescriptorType == USB_DT_DEVICE) { rc = buf->bMaxPacketSize0; break; @@ -4921,23 +4921,35 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, if (retval) goto fail; - if (maxp0 == 0xff || udev->speed >= USB_SPEED_SUPER) - i = 512; - else - i = maxp0; - if (usb_endpoint_maxp(&udev->ep0.desc) != i) { - if (udev->speed == USB_SPEED_LOW || - !(i == 8 || i == 16 || i == 32 || i == 64)) { - dev_err(&udev->dev, "Invalid ep0 maxpacket: %d\n", i); - retval = -EMSGSIZE; - goto fail; - } + /* + * Check the ep0 maxpacket guess and correct it if necessary. + * maxp0 is the value stored in the device descriptor; + * i is the value it encodes (logarithmic for SuperSpeed or greater). + */ + i = maxp0; + if (udev->speed >= USB_SPEED_SUPER) { + if (maxp0 <= 16) + i = 1 << maxp0; + else + i = 0; /* Invalid */ + } + if (usb_endpoint_maxp(&udev->ep0.desc) == i) { + ; /* Initial ep0 maxpacket guess is right */ + } else if ((udev->speed == USB_SPEED_FULL || + udev->speed == USB_SPEED_HIGH) && + (i == 8 || i == 16 || i == 32 || i == 64)) { + /* Initial guess is wrong; use the descriptor's value */ if (udev->speed == USB_SPEED_FULL) dev_dbg(&udev->dev, "ep0 maxpacket = %d\n", i); else dev_warn(&udev->dev, "Using ep0 maxpacket: %d\n", i); udev->ep0.desc.wMaxPacketSize = cpu_to_le16(i); usb_ep0_reinit(udev); + } else { + /* Initial guess is wrong and descriptor's value is invalid */ + dev_err(&udev->dev, "Invalid ep0 maxpacket: %d\n", maxp0); + retval = -EMSGSIZE; + goto fail; } descr = usb_get_device_descriptor(udev); -- Gitee From 4385f58ebdc477dc60e940a0f219eece053020ee Mon Sep 17 00:00:00 2001 From: Wander Lairson Costa Date: Fri, 1 Sep 2023 10:50:20 -0300 Subject: [PATCH 220/243] netfilter: nfnetlink_osf: avoid OOB read stable inclusion from stable-v5.10.195 commit 780f60dde29692c42091602fee9c25e9e391f3dc category: bugfix issue: NA CVE:CVE-2023-39189 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit f4f8a7803119005e87b716874bec07c751efafec ] The opt_num field is controlled by user mode and is not currently validated inside the kernel. An attacker can take advantage of this to trigger an OOB read and potentially leak information. BUG: KASAN: slab-out-of-bounds in nf_osf_match_one+0xbed/0xd10 net/netfilter/nfnetlink_osf.c:88 Read of size 2 at addr ffff88804bc64272 by task poc/6431 CPU: 1 PID: 6431 Comm: poc Not tainted 6.0.0-rc4 #1 Call Trace: nf_osf_match_one+0xbed/0xd10 net/netfilter/nfnetlink_osf.c:88 nf_osf_find+0x186/0x2f0 net/netfilter/nfnetlink_osf.c:281 nft_osf_eval+0x37f/0x590 net/netfilter/nft_osf.c:47 expr_call_ops_eval net/netfilter/nf_tables_core.c:214 nft_do_chain+0x2b0/0x1490 net/netfilter/nf_tables_core.c:264 nft_do_chain_ipv4+0x17c/0x1f0 net/netfilter/nft_chain_filter.c:23 [..] Also add validation to genre, subtype and version fields. Fixes: 11eeef41d5f6 ("netfilter: passive OS fingerprint xtables match") Reported-by: Lucas Leong Signed-off-by: Wander Lairson Costa Signed-off-by: Florian Westphal Signed-off-by: Sasha Levin Signed-off-by: Liu Jian --- net/netfilter/nfnetlink_osf.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 51e3953b414c..260a655def34 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -316,6 +316,14 @@ static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl, f = nla_data(osf_attrs[OSF_ATTR_FINGER]); + if (f->opt_num > ARRAY_SIZE(f->opt)) + return -EINVAL; + + if (!memchr(f->genre, 0, MAXGENRELEN) || + !memchr(f->subtype, 0, MAXGENRELEN) || + !memchr(f->version, 0, MAXGENRELEN)) + return -EINVAL; + kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL); if (!kf) return -ENOMEM; -- Gitee From 788c89af36ee1176e2676e70cc34221e1bc858a9 Mon Sep 17 00:00:00 2001 From: Wander Lairson Costa Date: Mon, 28 Aug 2023 10:21:07 -0300 Subject: [PATCH 221/243] netfilter: xt_u32: validate user space input mainline inclusion from mainline-v6.6-rc1 commit 69c5d284f67089b4750d28ff6ac6f52ec224b330 category: bugfix issue: NA CVE:CVE-2023-39192 Signed-off-by: Ywenrui44091 --------------------------------------- The xt_u32 module doesn't validate the fields in the xt_u32 structure. An attacker may take advantage of this to trigger an OOB read by setting the size fields with a value beyond the arrays boundaries. Add a checkentry function to validate the structure. This was originally reported by the ZDI project (ZDI-CAN-18408). Fixes: 1b50b8a371e9 ("[NETFILTER]: Add u32 match") Cc: stable@vger.kernel.org Signed-off-by: Wander Lairson Costa Signed-off-by: Pablo Neira Ayuso Signed-off-by: Lu Wei --- net/netfilter/xt_u32.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c index 177b40d08098..117d4615d668 100644 --- a/net/netfilter/xt_u32.c +++ b/net/netfilter/xt_u32.c @@ -96,11 +96,32 @@ static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par) return ret ^ data->invert; } +static int u32_mt_checkentry(const struct xt_mtchk_param *par) +{ + const struct xt_u32 *data = par->matchinfo; + const struct xt_u32_test *ct; + unsigned int i; + + if (data->ntests > ARRAY_SIZE(data->tests)) + return -EINVAL; + + for (i = 0; i < data->ntests; ++i) { + ct = &data->tests[i]; + + if (ct->nnums > ARRAY_SIZE(ct->location) || + ct->nvalues > ARRAY_SIZE(ct->value)) + return -EINVAL; + } + + return 0; +} + static struct xt_match xt_u32_mt_reg __read_mostly = { .name = "u32", .revision = 0, .family = NFPROTO_UNSPEC, .match = u32_mt, + .checkentry = u32_mt_checkentry, .matchsize = sizeof(struct xt_u32), .me = THIS_MODULE, }; -- Gitee From dbd18a0d9e22399c0da1a6b6234e052c56c399dd Mon Sep 17 00:00:00 2001 From: Wander Lairson Costa Date: Mon, 28 Aug 2023 19:12:55 -0300 Subject: [PATCH 222/243] netfilter: xt_sctp: validate the flag_info count stable inclusion from stable-v5.10.163 commit 5541827d13cf19b905594eaee586527476efaa61 category: bugfix issue:NA CVE:CVE-2023-39193 Signed-off-by: Ywenrui44091 --------------------------------------- commit e99476497687ef9e850748fe6d232264f30bc8f9 upstream. sctp_mt_check doesn't validate the flag_count field. An attacker can take advantage of that to trigger a OOB read and leak memory information. Add the field validation in the checkentry function. Fixes: 2e4e6a17af35 ("[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables") Cc: stable@vger.kernel.org Reported-by: Lucas Leong Signed-off-by: Wander Lairson Costa Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman Signed-off-by: Zhengchao Shao --- net/netfilter/xt_sctp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index 680015ba7cb6..d4bf089c9e3f 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -150,6 +150,8 @@ static int sctp_mt_check(const struct xt_mtchk_param *par) { const struct xt_sctp_info *info = par->matchinfo; + if (info->flag_count > ARRAY_SIZE(info->flag_info)) + return -EINVAL; if (info->flags & ~XT_SCTP_VALID_FLAGS) return -EINVAL; if (info->invflags & ~XT_SCTP_VALID_FLAGS) -- Gitee From 1eacc85d92a662b840ec304e1f4fe57c065ff960 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Tue, 27 Jun 2023 11:31:38 +0800 Subject: [PATCH 223/243] net: xfrm: Fix xfrm_address_filter OOB read stable inclusion from stable-v5.10.192 commit 7e50815d29037e08d3d26f3ebc41bcec729847b7 category: bugfix issue: NA CVE:CVE-2023-39194 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit dfa73c17d55b921e1d4e154976de35317e43a93a ] We found below OOB crash: [ 44.211730] ================================================================== [ 44.212045] BUG: KASAN: slab-out-of-bounds in memcmp+0x8b/0xb0 [ 44.212045] Read of size 8 at addr ffff88800870f320 by task poc.xfrm/97 [ 44.212045] [ 44.212045] CPU: 0 PID: 97 Comm: poc.xfrm Not tainted 6.4.0-rc7-00072-gdad9774deaf1-dirty #4 [ 44.212045] Call Trace: [ 44.212045] [ 44.212045] dump_stack_lvl+0x37/0x50 [ 44.212045] print_report+0xcc/0x620 [ 44.212045] ? __virt_addr_valid+0xf3/0x170 [ 44.212045] ? memcmp+0x8b/0xb0 [ 44.212045] kasan_report+0xb2/0xe0 [ 44.212045] ? memcmp+0x8b/0xb0 [ 44.212045] kasan_check_range+0x39/0x1c0 [ 44.212045] memcmp+0x8b/0xb0 [ 44.212045] xfrm_state_walk+0x21c/0x420 [ 44.212045] ? __pfx_dump_one_state+0x10/0x10 [ 44.212045] xfrm_dump_sa+0x1e2/0x290 [ 44.212045] ? __pfx_xfrm_dump_sa+0x10/0x10 [ 44.212045] ? __kernel_text_address+0xd/0x40 [ 44.212045] ? kasan_unpoison+0x27/0x60 [ 44.212045] ? mutex_lock+0x60/0xe0 [ 44.212045] ? __pfx_mutex_lock+0x10/0x10 [ 44.212045] ? kasan_save_stack+0x22/0x50 [ 44.212045] netlink_dump+0x322/0x6c0 [ 44.212045] ? __pfx_netlink_dump+0x10/0x10 [ 44.212045] ? mutex_unlock+0x7f/0xd0 [ 44.212045] ? __pfx_mutex_unlock+0x10/0x10 [ 44.212045] __netlink_dump_start+0x353/0x430 [ 44.212045] xfrm_user_rcv_msg+0x3a4/0x410 [ 44.212045] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 44.212045] ? __pfx_xfrm_user_rcv_msg+0x10/0x10 [ 44.212045] ? __pfx_xfrm_dump_sa+0x10/0x10 [ 44.212045] ? __pfx_xfrm_dump_sa_done+0x10/0x10 [ 44.212045] ? __stack_depot_save+0x382/0x4e0 [ 44.212045] ? filter_irq_stacks+0x1c/0x70 [ 44.212045] ? kasan_save_stack+0x32/0x50 [ 44.212045] ? kasan_save_stack+0x22/0x50 [ 44.212045] ? kasan_set_track+0x25/0x30 [ 44.212045] ? __kasan_slab_alloc+0x59/0x70 [ 44.212045] ? kmem_cache_alloc_node+0xf7/0x260 [ 44.212045] ? kmalloc_reserve+0xab/0x120 [ 44.212045] ? __alloc_skb+0xcf/0x210 [ 44.212045] ? netlink_sendmsg+0x509/0x700 [ 44.212045] ? sock_sendmsg+0xde/0xe0 [ 44.212045] ? __sys_sendto+0x18d/0x230 [ 44.212045] ? __x64_sys_sendto+0x71/0x90 [ 44.212045] ? do_syscall_64+0x3f/0x90 [ 44.212045] ? entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 44.212045] ? netlink_sendmsg+0x509/0x700 [ 44.212045] ? sock_sendmsg+0xde/0xe0 [ 44.212045] ? __sys_sendto+0x18d/0x230 [ 44.212045] ? __x64_sys_sendto+0x71/0x90 [ 44.212045] ? do_syscall_64+0x3f/0x90 [ 44.212045] ? entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 44.212045] ? kasan_save_stack+0x22/0x50 [ 44.212045] ? kasan_set_track+0x25/0x30 [ 44.212045] ? kasan_save_free_info+0x2e/0x50 [ 44.212045] ? __kasan_slab_free+0x10a/0x190 [ 44.212045] ? kmem_cache_free+0x9c/0x340 [ 44.212045] ? netlink_recvmsg+0x23c/0x660 [ 44.212045] ? sock_recvmsg+0xeb/0xf0 [ 44.212045] ? __sys_recvfrom+0x13c/0x1f0 [ 44.212045] ? __x64_sys_recvfrom+0x71/0x90 [ 44.212045] ? do_syscall_64+0x3f/0x90 [ 44.212045] ? entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 44.212045] ? copyout+0x3e/0x50 [ 44.212045] netlink_rcv_skb+0xd6/0x210 [ 44.212045] ? __pfx_xfrm_user_rcv_msg+0x10/0x10 [ 44.212045] ? __pfx_netlink_rcv_skb+0x10/0x10 [ 44.212045] ? __pfx_sock_has_perm+0x10/0x10 [ 44.212045] ? mutex_lock+0x8d/0xe0 [ 44.212045] ? __pfx_mutex_lock+0x10/0x10 [ 44.212045] xfrm_netlink_rcv+0x44/0x50 [ 44.212045] netlink_unicast+0x36f/0x4c0 [ 44.212045] ? __pfx_netlink_unicast+0x10/0x10 [ 44.212045] ? netlink_recvmsg+0x500/0x660 [ 44.212045] netlink_sendmsg+0x3b7/0x700 [ 44.212045] ? __pfx_netlink_sendmsg+0x10/0x10 [ 44.212045] ? __pfx_netlink_sendmsg+0x10/0x10 [ 44.212045] sock_sendmsg+0xde/0xe0 [ 44.212045] __sys_sendto+0x18d/0x230 [ 44.212045] ? __pfx___sys_sendto+0x10/0x10 [ 44.212045] ? rcu_core+0x44a/0xe10 [ 44.212045] ? __rseq_handle_notify_resume+0x45b/0x740 [ 44.212045] ? _raw_spin_lock_irq+0x81/0xe0 [ 44.212045] ? __pfx___rseq_handle_notify_resume+0x10/0x10 [ 44.212045] ? __pfx_restore_fpregs_from_fpstate+0x10/0x10 [ 44.212045] ? __pfx_blkcg_maybe_throttle_current+0x10/0x10 [ 44.212045] ? __pfx_task_work_run+0x10/0x10 [ 44.212045] __x64_sys_sendto+0x71/0x90 [ 44.212045] do_syscall_64+0x3f/0x90 [ 44.212045] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 44.212045] RIP: 0033:0x44b7da [ 44.212045] RSP: 002b:00007ffdc8838548 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 44.212045] RAX: ffffffffffffffda RBX: 00007ffdc8839978 RCX: 000000000044b7da [ 44.212045] RDX: 0000000000000038 RSI: 00007ffdc8838770 RDI: 0000000000000003 [ 44.212045] RBP: 00007ffdc88385b0 R08: 00007ffdc883858c R09: 000000000000000c [ 44.212045] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 [ 44.212045] R13: 00007ffdc8839968 R14: 00000000004c37d0 R15: 0000000000000001 [ 44.212045] [ 44.212045] [ 44.212045] Allocated by task 97: [ 44.212045] kasan_save_stack+0x22/0x50 [ 44.212045] kasan_set_track+0x25/0x30 [ 44.212045] __kasan_kmalloc+0x7f/0x90 [ 44.212045] __kmalloc_node_track_caller+0x5b/0x140 [ 44.212045] kmemdup+0x21/0x50 [ 44.212045] xfrm_dump_sa+0x17d/0x290 [ 44.212045] netlink_dump+0x322/0x6c0 [ 44.212045] __netlink_dump_start+0x353/0x430 [ 44.212045] xfrm_user_rcv_msg+0x3a4/0x410 [ 44.212045] netlink_rcv_skb+0xd6/0x210 [ 44.212045] xfrm_netlink_rcv+0x44/0x50 [ 44.212045] netlink_unicast+0x36f/0x4c0 [ 44.212045] netlink_sendmsg+0x3b7/0x700 [ 44.212045] sock_sendmsg+0xde/0xe0 [ 44.212045] __sys_sendto+0x18d/0x230 [ 44.212045] __x64_sys_sendto+0x71/0x90 [ 44.212045] do_syscall_64+0x3f/0x90 [ 44.212045] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 44.212045] [ 44.212045] The buggy address belongs to the object at ffff88800870f300 [ 44.212045] which belongs to the cache kmalloc-64 of size 64 [ 44.212045] The buggy address is located 32 bytes inside of [ 44.212045] allocated 36-byte region [ffff88800870f300, ffff88800870f324) [ 44.212045] [ 44.212045] The buggy address belongs to the physical page: [ 44.212045] page:00000000e4de16ee refcount:1 mapcount:0 mapping:000000000 ... [ 44.212045] flags: 0x100000000000200(slab|node=0|zone=1) [ 44.212045] page_type: 0xffffffff() [ 44.212045] raw: 0100000000000200 ffff888004c41640 dead000000000122 0000000000000000 [ 44.212045] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000 [ 44.212045] page dumped because: kasan: bad access detected [ 44.212045] [ 44.212045] Memory state around the buggy address: [ 44.212045] ffff88800870f200: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 44.212045] ffff88800870f280: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc [ 44.212045] >ffff88800870f300: 00 00 00 00 04 fc fc fc fc fc fc fc fc fc fc fc [ 44.212045] ^ [ 44.212045] ffff88800870f380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 44.212045] ffff88800870f400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 44.212045] ================================================================== By investigating the code, we find the root cause of this OOB is the lack of checks in xfrm_dump_sa(). The buggy code allows a malicious user to pass arbitrary value of filter->splen/dplen. Hence, with crafted xfrm states, the attacker can achieve 8 bytes heap OOB read, which causes info leak. if (attrs[XFRMA_ADDRESS_FILTER]) { filter = kmemdup(nla_data(attrs[XFRMA_ADDRESS_FILTER]), sizeof(*filter), GFP_KERNEL); if (filter == NULL) return -ENOMEM; // NO MORE CHECKS HERE !!! } This patch fixes the OOB by adding necessary boundary checks, just like the code in pfkey_dump() function. Fixes: d3623099d350 ("ipsec: add support of limited SA dump") Signed-off-by: Lin Ma Signed-off-by: Steffen Klassert Signed-off-by: Sasha Levin Signed-off-by: Wang Hai --- net/xfrm/xfrm_user.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d0fdfbf4c5f7..225dff10f786 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1067,6 +1067,15 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) sizeof(*filter), GFP_KERNEL); if (filter == NULL) return -ENOMEM; + + /* see addr_match(), (prefix length >> 5) << 2 + * will be used to compare xfrm_address_t + */ + if (filter->splen > (sizeof(xfrm_address_t) << 3) || + filter->dplen > (sizeof(xfrm_address_t) << 3)) { + kfree(filter); + return -EINVAL; + } } if (attrs[XFRMA_PROTO]) -- Gitee From b8e6ccfc9ac6d5572f458317d7cf4853befe8acb Mon Sep 17 00:00:00 2001 From: Kyle Zeng Date: Thu, 14 Sep 2023 22:12:57 -0700 Subject: [PATCH 224/243] ipv4: fix null-deref in ipv4_link_failure mainline inclusion from mainline-v6.6-rc3 commit 0113d9c9d1ccc07f5a3710dac4aa24b6d711278c category: bugfix issue: #I87CVU CVE: CVE-2023-42754 Signed-off-by: wanxiaoqing --------------------------------------- Currently, we assume the skb is associated with a device before calling __ip_options_compile, which is not always the case if it is re-routed by ipvs. When skb->dev is NULL, dev_net(skb->dev) will become null-dereference. This patch adds a check for the edge case and switch to use the net_device from the rtable when skb->dev is NULL. Fixes: ed0de45a1008 ("ipv4: recompile ip options in ipv4_link_failure") Suggested-by: David Ahern Signed-off-by: Kyle Zeng Cc: Stephen Suryaputra Cc: Vadim Fedorenko Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Lu Wei Signed-off-by: wanxiaoqing Signed-off-by: Ywenrui --- net/ipv4/route.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ce787c386793..0fa018209170 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1227,6 +1227,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) static void ipv4_send_dest_unreach(struct sk_buff *skb) { + struct net_device *dev; struct ip_options opt; int res; @@ -1244,7 +1245,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb) opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); rcu_read_lock(); - res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL); + dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev; + res = __ip_options_compile(dev_net(dev), &opt, skb, NULL); rcu_read_unlock(); if (res) -- Gitee From 2990bc01e5027e8380506616845a611c1e6c0f2a Mon Sep 17 00:00:00 2001 From: Jun Nie Date: Tue, 3 Jan 2023 09:45:16 +0800 Subject: [PATCH 225/243] ext4: optimize ea_inode block expansion stable inclusion from stable-v5.10.173~147 commit d738789ae9ec47d3458a008788f3cdc862ebf0cb category: bugfix issue: #I7ELJH CVE:NA Signed-off-by: Ywenrui44091 --------------------------------------- commit 1e9d62d252812575ded7c620d8fc67c32ff06c16 upstream. Copy ea data from inode entry when expanding ea block if possible. Then remove the ea entry if expansion success. Thus memcpy to a temporary buffer may be avoided. If the expansion fails, we do not need to recovery the removed ea entry neither in this way. Reported-by: syzbot+2dacb8f015bf1420155f@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=3613786cb88c93aa1c6a279b1df6a7b201347d08 Link: https://lore.kernel.org/r/20230103014517.495275-2-jun.nie@linaro.org Cc: stable@kernel.org Signed-off-by: Jun Nie Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ywenrui44091 --- fs/ext4/xattr.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index b5016eb7b373..deff4d453ccf 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2560,9 +2560,8 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); - buffer = kmalloc(value_size, GFP_NOFS); b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); - if (!is || !bs || !buffer || !b_entry_name) { + if (!is || !bs || !b_entry_name) { error = -ENOMEM; goto out; } @@ -2574,12 +2573,18 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, /* Save the entry name and the entry value */ if (entry->e_value_inum) { + buffer = kvmalloc(value_size, GFP_NOFS); + if (!buffer) { + error = -ENOMEM; + goto out; + } + error = ext4_xattr_inode_get(inode, entry, buffer, value_size); if (error) goto out; } else { size_t value_offs = le16_to_cpu(entry->e_value_offs); - memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size); + buffer = (void *)IFIRST(header) + value_offs; } memcpy(b_entry_name, entry->e_name, entry->e_name_len); @@ -2594,25 +2599,26 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, if (error) goto out; - /* Remove the chosen entry from the inode */ - error = ext4_xattr_ibody_set(handle, inode, &i, is); - if (error) - goto out; - i.value = buffer; i.value_len = value_size; error = ext4_xattr_block_find(inode, &i, bs); if (error) goto out; - /* Add entry which was removed from the inode into the block */ + /* Move ea entry from the inode into the block */ error = ext4_xattr_block_set(handle, inode, &i, bs); if (error) goto out; - error = 0; + + /* Remove the chosen entry from the inode */ + i.value = NULL; + i.value_len = 0; + error = ext4_xattr_ibody_set(handle, inode, &i, is); + out: kfree(b_entry_name); - kfree(buffer); + if (entry->e_value_inum && buffer) + kvfree(buffer); if (is) brelse(is->iloc.bh); if (bs) -- Gitee From 84b083bfcf9d89b97a1f8767983f64bee4cc147d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 16 Feb 2023 10:55:48 -0800 Subject: [PATCH 226/243] ext4: fix another off-by-one fsmap error on 1k block filesystems stable inclusion from stable-v5.10.175~83 commit 1d2366624b4c19a2ba6baf67fe57f4a1b0f67c05 category: bugfix issue: #I7ELJH CVE:NA Signed-off-by: Ywenrui44091 --------------------------------------- commit c993799baf9c5861f8df91beb80e1611b12efcbd upstream. Apparently syzbot figured out that issuing this FSMAP call: struct fsmap_head cmd = { .fmh_count = ...; .fmh_keys = { { .fmr_device = /* ext4 dev */, .fmr_physical = 0, }, { .fmr_device = /* ext4 dev */, .fmr_physical = 0, }, }, ... }; ret = ioctl(fd, FS_IOC_GETFSMAP, &cmd); Produces this crash if the underlying filesystem is a 1k-block ext4 filesystem: kernel BUG at fs/ext4/ext4.h:3331! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 3 PID: 3227965 Comm: xfs_io Tainted: G W O 6.2.0-rc8-achx Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014 RIP: 0010:ext4_mb_load_buddy_gfp+0x47c/0x570 [ext4] RSP: 0018:ffffc90007c03998 EFLAGS: 00010246 RAX: ffff888004978000 RBX: ffffc90007c03a20 RCX: ffff888041618000 RDX: 0000000000000000 RSI: 00000000000005a4 RDI: ffffffffa0c99b11 RBP: ffff888012330000 R08: ffffffffa0c2b7d0 R09: 0000000000000400 R10: ffffc90007c03950 R11: 0000000000000000 R12: 0000000000000001 R13: 00000000ffffffff R14: 0000000000000c40 R15: ffff88802678c398 FS: 00007fdf2020c880(0000) GS:ffff88807e100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffd318a5fe8 CR3: 000000007f80f001 CR4: 00000000001706e0 Call Trace: ext4_mballoc_query_range+0x4b/0x210 [ext4 dfa189daddffe8fecd3cdfd00564e0f265a8ab80] ext4_getfsmap_datadev+0x713/0x890 [ext4 dfa189daddffe8fecd3cdfd00564e0f265a8ab80] ext4_getfsmap+0x2b7/0x330 [ext4 dfa189daddffe8fecd3cdfd00564e0f265a8ab80] ext4_ioc_getfsmap+0x153/0x2b0 [ext4 dfa189daddffe8fecd3cdfd00564e0f265a8ab80] __ext4_ioctl+0x2a7/0x17e0 [ext4 dfa189daddffe8fecd3cdfd00564e0f265a8ab80] __x64_sys_ioctl+0x82/0xa0 do_syscall_64+0x2b/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fdf20558aff RSP: 002b:00007ffd318a9e30 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00000000000200c0 RCX: 00007fdf20558aff RDX: 00007fdf1feb2010 RSI: 00000000c0c0583b RDI: 0000000000000003 RBP: 00005625c0634be0 R08: 00005625c0634c40 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fdf1feb2010 R13: 00005625be70d994 R14: 0000000000000800 R15: 0000000000000000 For GETFSMAP calls, the caller selects a physical block device by writing its block number into fsmap_head.fmh_keys[01].fmr_device. To query mappings for a subrange of the device, the starting byte of the range is written to fsmap_head.fmh_keys[0].fmr_physical and the last byte of the range goes in fsmap_head.fmh_keys[1].fmr_physical. IOWs, to query what mappings overlap with bytes 3-14 of /dev/sda, you'd set the inputs as follows: fmh_keys[0] = { .fmr_device = major(8, 0), .fmr_physical = 3}, fmh_keys[1] = { .fmr_device = major(8, 0), .fmr_physical = 14}, Which would return you whatever is mapped in the 12 bytes starting at physical offset 3. The crash is due to insufficient range validation of keys[1] in ext4_getfsmap_datadev. On 1k-block filesystems, block 0 is not part of the filesystem, which means that s_first_data_block is nonzero. ext4_get_group_no_and_offset subtracts this quantity from the blocknr argument before cracking it into a group number and a block number within a group. IOWs, block group 0 spans blocks 1-8192 (1-based) instead of 0-8191 (0-based) like what happens with larger blocksizes. The net result of this encoding is that blocknr < s_first_data_block is not a valid input to this function. The end_fsb variable is set from the keys that are copied from userspace, which means that in the above example, its value is zero. That leads to an underflow here: blocknr = blocknr - le32_to_cpu(es->s_first_data_block); The division then operates on -1: offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >> EXT4_SB(sb)->s_cluster_bits; Leaving an impossibly large group number (2^32-1) in blocknr. ext4_getfsmap_check_keys checked that keys[0].fmr_physical and keys[1].fmr_physical are in increasing order, but ext4_getfsmap_datadev adjusts keys[0].fmr_physical to be at least s_first_data_block. This implies that we have to check it again after the adjustment, which is the piece that I forgot. Reported-by: syzbot+6be2b977c89f79b6b153@syzkaller.appspotmail.com Fixes: 4a4956249dac ("ext4: fix off-by-one fsmap error on 1k block filesystems") Link: https://syzkaller.appspot.com/bug?id=79d5768e9bfe362911ac1a5057a36fc6b5c30002 Cc: stable@vger.kernel.org Signed-off-by: Darrick J. Wong Link: https://lore.kernel.org/r/Y+58NPTH7VNGgzdd@magnolia Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ywenrui44091 --- fs/ext4/fsmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 4493ef0c715e..cdf9bfe10137 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -486,6 +486,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb, keys[0].fmr_physical = bofs; if (keys[1].fmr_physical >= eofs) keys[1].fmr_physical = eofs - 1; + if (keys[1].fmr_physical < keys[0].fmr_physical) + return 0; start_fsb = keys[0].fmr_physical; end_fsb = keys[1].fmr_physical; -- Gitee From ed8c768a8c878ae81ca40c25977b31e4360b5762 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 23 May 2023 14:17:25 +0800 Subject: [PATCH 227/243] f2fs: fix to avoid NULL pointer dereference f2fs_write_end_io() stable inclusion from stable-v5.10.188~133 commit b39ef5b52f10b819bd0ceeb22e8f7df7800880ca category: bugfix issue: #I79GXD CVE: CVE-2023-2898 Signed-off-by: Ywenrui00491 --------------------------------------- commit d8189834d4348ae608083e1f1f53792cfcc2a9bc upstream. butt3rflyh4ck reports a bug as below: When a thread always calls F2FS_IOC_RESIZE_FS to resize fs, if resize fs is failed, f2fs kernel thread would invoke callback function to update f2fs io info, it would call f2fs_write_end_io and may trigger null-ptr-deref in NODE_MAPPING. general protection fault, probably for non-canonical address KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] RIP: 0010:NODE_MAPPING fs/f2fs/f2fs.h:1972 [inline] RIP: 0010:f2fs_write_end_io+0x727/0x1050 fs/f2fs/data.c:370 bio_endio+0x5af/0x6c0 block/bio.c:1608 req_bio_endio block/blk-mq.c:761 [inline] blk_update_request+0x5cc/0x1690 block/blk-mq.c:906 blk_mq_end_request+0x59/0x4c0 block/blk-mq.c:1023 lo_complete_rq+0x1c6/0x280 drivers/block/loop.c:370 blk_complete_reqs+0xad/0xe0 block/blk-mq.c:1101 __do_softirq+0x1d4/0x8ef kernel/softirq.c:571 run_ksoftirqd kernel/softirq.c:939 [inline] run_ksoftirqd+0x31/0x60 kernel/softirq.c:931 smpboot_thread_fn+0x659/0x9e0 kernel/smpboot.c:164 kthread+0x33e/0x440 kernel/kthread.c:379 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 The root cause is below race case can cause leaving dirty metadata in f2fs after filesystem is remount as ro: Thread A Thread B - f2fs_ioc_resize_fs - f2fs_readonly --- return false - f2fs_resize_fs - f2fs_remount - write_checkpoint - set f2fs as ro - free_segment_range - update meta_inode's data Then, if f2fs_put_super() fails to write_checkpoint due to readonly status, and meta_inode's dirty data will be writebacked after node_inode is put, finally, f2fs_write_end_io will access NULL pointer on sbi->node_inode. Thread A IRQ context - f2fs_put_super - write_checkpoint fails - iput(node_inode) - node_inode = NULL - iput(meta_inode) - write_inode_now - f2fs_write_meta_page - f2fs_write_end_io - NODE_MAPPING(sbi) : access NULL pointer on node_inode Fixes: b4b10061ef98 ("f2fs: refactor resize_fs to avoid meta updates in progress") Reported-by: butt3rflyh4ck Closes: https://lore.kernel.org/r/1684480657-2375-1-git-send-email-yangtiezhu@loongson.cn Tested-by: butt3rflyh4ck Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Stefan Ghinea Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ywenrui00491 --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 22 +++++++++++++++++++--- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 48782861cf62..7e38a3e0442b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3519,7 +3519,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, bool force, unsigned int segno); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); -int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count); +int f2fs_resize_fs(struct file *filp, __u64 block_count); int __init f2fs_create_garbage_collection_cache(void); void f2fs_destroy_garbage_collection_cache(void); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1fbaab1f7aba..9ba46b90796e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3347,7 +3347,7 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) sizeof(block_count))) return -EFAULT; - return f2fs_resize_fs(sbi, block_count); + return f2fs_resize_fs(filp, block_count); } static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d87861e8a064..f3b7f9179f11 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -7,6 +7,7 @@ */ #include #include +#include #include #include #include @@ -1968,8 +1969,9 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) } } -int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) +int f2fs_resize_fs(struct file *filp, __u64 block_count) { + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); __u64 old_block_count, shrunk_blocks; struct cp_control cpc = { CP_RESIZE, 0, 0, 0 }; unsigned int secs; @@ -2007,12 +2009,18 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) return -EINVAL; } + err = mnt_want_write_file(filp); + if (err) + return err; + shrunk_blocks = old_block_count - block_count; secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); /* stop other GC */ - if (!down_write_trylock(&sbi->gc_lock)) - return -EAGAIN; + if (!down_write_trylock(&sbi->gc_lock)) { + err = -EAGAIN; + goto out_drop_write; + } /* stop CP to protect MAIN_SEC in free_segment_range */ f2fs_lock_op(sbi); @@ -2032,12 +2040,20 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) out_unlock: f2fs_unlock_op(sbi); up_write(&sbi->gc_lock); +out_drop_write: + mnt_drop_write_file(filp); if (err) return err; set_sbi_flag(sbi, SBI_IS_RESIZEFS); freeze_super(sbi->sb); + + if (f2fs_readonly(sbi->sb)) { + thaw_super(sbi->sb); + return -EROFS; + } + down_write(&sbi->gc_lock); mutex_lock(&sbi->cp_mutex); -- Gitee From d94114685ef22b88feb07e2f6358b1992c983288 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 1 Dec 2023 15:47:13 +0100 Subject: [PATCH 228/243] netfilter: nft_set_pipapo: skip inactive elements during set walk stable inclusion from stable-v5.10.204~11 commit bf72b44fe81be08a9fcd58aabf417cd3337ffc99 category: bugfix issue: #I8RUB5 CVE: CVE-2023-6817 Signed-off-by: yaowenrui --------------------------------------- commit 317eb9685095678f2c9f5a8189de698c5354316a upstream. Otherwise set elements can be deactivated twice which will cause a crash. Reported-by: Xingyuan Mo Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_set_pipapo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index f67c4436c5d3..79f3cab1afd5 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1972,7 +1972,7 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, goto cont; e = f->mt[r].e; - if (nft_set_elem_expired(&e->ext)) + if (!nft_set_elem_active(&e->ext, iter->genmask)) goto cont; elem.priv = e; -- Gitee From eb2c67ff6a0f3cb713a6dd0570563bfc413aee08 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 16 Jun 2022 11:06:23 -0700 Subject: [PATCH 229/243] perf/core: Add a new read format to get a number of lost samples stable inclusion from stable-v5.10.204 commit 8bd3d61624d503ebe8b8469021a03bef359f89de category: bugfix issue: #I8T405 CVE: NA Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 119a784c81270eb88e573174ed2209225d646656 ] Sometimes we want to know an accurate number of samples even if it's lost. Currenlty PERF_RECORD_LOST is generated for a ring-buffer which might be shared with other events. So it's hard to know per-event lost count. Add event->lost_samples field and PERF_FORMAT_LOST to retrieve it from userspace. Original-patch-by: Jiri Olsa Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220616180623.1358843-1-namhyung@kernel.org Stable-dep-of: 382c27f4ed28 ("perf: Fix perf_event_validate_size()") Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- include/linux/perf_event.h | 2 ++ include/uapi/linux/perf_event.h | 5 ++++- kernel/events/core.c | 21 ++++++++++++++++++--- kernel/events/ring_buffer.c | 5 ++++- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 03e454582bd3..f3b7c7bf0596 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -744,6 +744,8 @@ struct perf_event { struct pid_namespace *ns; u64 id; + atomic64_t lost_samples; + u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b95d3c485d27..6ca63ab6bee5 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -279,6 +279,7 @@ enum { * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } && !PERF_FORMAT_GROUP * * { u64 nr; @@ -286,6 +287,7 @@ enum { * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } cntr[nr]; * } && PERF_FORMAT_GROUP * }; @@ -295,8 +297,9 @@ enum perf_event_read_format { PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, PERF_FORMAT_GROUP = 1U << 3, + PERF_FORMAT_LOST = 1U << 4, - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ + PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 01238adfdb72..884dff5a9698 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1922,6 +1922,9 @@ static void __perf_event_read_size(struct perf_event *event, int nr_siblings) if (event->attr.read_format & PERF_FORMAT_ID) entry += sizeof(u64); + if (event->attr.read_format & PERF_FORMAT_LOST) + entry += sizeof(u64); + if (event->attr.read_format & PERF_FORMAT_GROUP) { nr += nr_siblings; size += sizeof(u64); @@ -5278,11 +5281,15 @@ static int __perf_read_group_add(struct perf_event *leader, values[n++] += perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&leader->lost_samples); for_each_sibling_event(sub, leader) { values[n++] += perf_event_count(sub); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&sub->lost_samples); } unlock: @@ -5336,7 +5343,7 @@ static int perf_read_one(struct perf_event *event, u64 read_format, char __user *buf) { u64 enabled, running; - u64 values[4]; + u64 values[5]; int n = 0; values[n++] = __perf_event_read_value(event, &enabled, &running); @@ -5346,6 +5353,8 @@ static int perf_read_one(struct perf_event *event, values[n++] = running; if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&event->lost_samples); if (copy_to_user(buf, values, n * sizeof(u64))) return -EFAULT; @@ -6817,7 +6826,7 @@ static void perf_output_read_one(struct perf_output_handle *handle, u64 enabled, u64 running) { u64 read_format = event->attr.read_format; - u64 values[4]; + u64 values[5]; int n = 0; values[n++] = perf_event_count(event); @@ -6831,6 +6840,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, } if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&event->lost_samples); __output_copy(handle, values, n * sizeof(u64)); } @@ -6841,7 +6852,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, { struct perf_event *leader = event->group_leader, *sub; u64 read_format = event->attr.read_format; - u64 values[5]; + u64 values[6]; int n = 0; values[n++] = 1 + leader->nr_siblings; @@ -6859,6 +6870,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, values[n++] = perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&leader->lost_samples); __output_copy(handle, values, n * sizeof(u64)); @@ -6872,6 +6885,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, values[n++] = perf_event_count(sub); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&sub->lost_samples); __output_copy(handle, values, n * sizeof(u64)); } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ef91ae75ca56..3edb9c91e027 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -172,8 +172,10 @@ __perf_output_begin(struct perf_output_handle *handle, goto out; if (unlikely(rb->paused)) { - if (rb->nr_pages) + if (rb->nr_pages) { local_inc(&rb->lost); + atomic64_inc(&event->lost_samples); + } goto out; } @@ -254,6 +256,7 @@ __perf_output_begin(struct perf_output_handle *handle, fail: local_inc(&rb->lost); + atomic64_inc(&event->lost_samples); perf_output_put_handle(handle); out: rcu_read_unlock(); -- Gitee From 4933126566b3ba4a7c25c8a68a909cc155ca794f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 29 Nov 2023 15:24:52 +0100 Subject: [PATCH 230/243] perf: Fix perf_event_validate_size() stable inclusion from stable-v5.10.204 commit 208dd116f96ea19e5d38d7b80fce49bc5ce1bbe8 category: bugfix issue: #I8T405 CVE: CVE-2023-6931 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 382c27f4ed28f803b1f1473ac2d8db0afc795a1b ] Budimir noted that perf_event_validate_size() only checks the size of the newly added event, even though the sizes of all existing events can also change due to not all events having the same read_format. When we attach the new event, perf_group_attach(), we do re-compute the size for all events. Fixes: a723968c0ed3 ("perf: Fix u16 overflows") Reported-by: Budimir Markovic Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- kernel/events/core.c | 61 +++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 884dff5a9698..5eae16ee753a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1907,31 +1907,34 @@ static inline void perf_event__state_init(struct perf_event *event) PERF_EVENT_STATE_INACTIVE; } -static void __perf_event_read_size(struct perf_event *event, int nr_siblings) +static int __perf_event_read_size(u64 read_format, int nr_siblings) { int entry = sizeof(u64); /* value */ int size = 0; int nr = 1; - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) size += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) size += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_ID) + if (read_format & PERF_FORMAT_ID) entry += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_LOST) + if (read_format & PERF_FORMAT_LOST) entry += sizeof(u64); - if (event->attr.read_format & PERF_FORMAT_GROUP) { + if (read_format & PERF_FORMAT_GROUP) { nr += nr_siblings; size += sizeof(u64); } - size += entry * nr; - event->read_size = size; + /* + * Since perf_event_validate_size() limits this to 16k and inhibits + * adding more siblings, this will never overflow. + */ + return size + nr * entry; } static void __perf_event_header_size(struct perf_event *event, u64 sample_type) @@ -1975,8 +1978,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type) */ static void perf_event__header_size(struct perf_event *event) { - __perf_event_read_size(event, - event->group_leader->nr_siblings); + event->read_size = + __perf_event_read_size(event->attr.read_format, + event->group_leader->nr_siblings); __perf_event_header_size(event, event->attr.sample_type); } @@ -2007,24 +2011,35 @@ static void perf_event__id_header_size(struct perf_event *event) event->id_header_size = size; } +/* + * Check that adding an event to the group does not result in anybody + * overflowing the 64k event limit imposed by the output buffer. + * + * Specifically, check that the read_size for the event does not exceed 16k, + * read_size being the one term that grows with groups size. Since read_size + * depends on per-event read_format, also (re)check the existing events. + * + * This leaves 48k for the constant size fields and things like callchains, + * branch stacks and register sets. + */ static bool perf_event_validate_size(struct perf_event *event) { - /* - * The values computed here will be over-written when we actually - * attach the event. - */ - __perf_event_read_size(event, event->group_leader->nr_siblings + 1); - __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ); - perf_event__id_header_size(event); + struct perf_event *sibling, *group_leader = event->group_leader; - /* - * Sum the lot; should not exceed the 64k limit we have on records. - * Conservative limit to allow for callchains and other variable fields. - */ - if (event->read_size + event->header_size + - event->id_header_size + sizeof(struct perf_event_header) >= 16*1024) + if (__perf_event_read_size(event->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) return false; + if (__perf_event_read_size(group_leader->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) + return false; + + for_each_sibling_event(sibling, group_leader) { + if (__perf_event_read_size(sibling->attr.read_format, + group_leader->nr_siblings + 1) > 16*1024) + return false; + } + return true; } -- Gitee From d7cde500b071af684b6ee7fe0faa98e5695f1133 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 23 Nov 2023 15:13:14 +0800 Subject: [PATCH 231/243] ipv4: igmp: fix refcnt uaf issue when receiving igmp query packet stable inclusion from stable-v5.10.203 commit 772fe1da9a8d4dcd8993abaecbde04789c52a4c2 category: bugfix issue: #I8T41B CVE: CVE-2023-6932 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit e2b706c691905fe78468c361aaabc719d0a496f1 ] When I perform the following test operations: 1.ip link add br0 type bridge 2.brctl addif br0 eth0 3.ip addr add 239.0.0.1/32 dev eth0 4.ip addr add 239.0.0.1/32 dev br0 5.ip addr add 224.0.0.1/32 dev br0 6.while ((1)) do ifconfig br0 up ifconfig br0 down done 7.send IGMPv2 query packets to port eth0 continuously. For example, ./mausezahn ethX -c 0 "01 00 5e 00 00 01 00 72 19 88 aa 02 08 00 45 00 00 1c 00 01 00 00 01 02 0e 7f c0 a8 0a b7 e0 00 00 01 11 64 ee 9b 00 00 00 00" The preceding tests may trigger the refcnt uaf issue of the mc list. The stack is as follows: refcount_t: addition on 0; use-after-free. WARNING: CPU: 21 PID: 144 at lib/refcount.c:25 refcount_warn_saturate (lib/refcount.c:25) CPU: 21 PID: 144 Comm: ksoftirqd/21 Kdump: loaded Not tainted 6.7.0-rc1-next-20231117-dirty #80 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:refcount_warn_saturate (lib/refcount.c:25) RSP: 0018:ffffb68f00657910 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff8a00c3bf96c0 RCX: ffff8a07b6160908 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff8a07b6160900 RBP: ffff8a00cba36862 R08: 0000000000000000 R09: 00000000ffff7fff R10: ffffb68f006577c0 R11: ffffffffb0fdcdc8 R12: ffff8a00c3bf9680 R13: ffff8a00c3bf96f0 R14: 0000000000000000 R15: ffff8a00d8766e00 FS: 0000000000000000(0000) GS:ffff8a07b6140000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055f10b520b28 CR3: 000000039741a000 CR4: 00000000000006f0 Call Trace: igmp_heard_query (net/ipv4/igmp.c:1068) igmp_rcv (net/ipv4/igmp.c:1132) ip_protocol_deliver_rcu (net/ipv4/ip_input.c:205) ip_local_deliver_finish (net/ipv4/ip_input.c:234) __netif_receive_skb_one_core (net/core/dev.c:5529) netif_receive_skb_internal (net/core/dev.c:5729) netif_receive_skb (net/core/dev.c:5788) br_handle_frame_finish (net/bridge/br_input.c:216) nf_hook_bridge_pre (net/bridge/br_input.c:294) __netif_receive_skb_core (net/core/dev.c:5423) __netif_receive_skb_list_core (net/core/dev.c:5606) __netif_receive_skb_list (net/core/dev.c:5674) netif_receive_skb_list_internal (net/core/dev.c:5764) napi_gro_receive (net/core/gro.c:609) e1000_clean_rx_irq (drivers/net/ethernet/intel/e1000/e1000_main.c:4467) e1000_clean (drivers/net/ethernet/intel/e1000/e1000_main.c:3805) __napi_poll (net/core/dev.c:6533) net_rx_action (net/core/dev.c:6735) __do_softirq (kernel/softirq.c:554) run_ksoftirqd (kernel/softirq.c:913) smpboot_thread_fn (kernel/smpboot.c:164) kthread (kernel/kthread.c:388) ret_from_fork (arch/x86/kernel/process.c:153) ret_from_fork_asm (arch/x86/entry/entry_64.S:250) The root causes are as follows: Thread A Thread B ... netif_receive_skb br_dev_stop ... br_multicast_leave_snoopers ... __ip_mc_dec_group ... __igmp_group_dropped igmp_rcv igmp_stop_timer igmp_heard_query //ref = 1 ip_ma_put igmp_mod_timer refcount_dec_and_test igmp_start_timer //ref = 0 ... refcount_inc //ref increases from 0 When the device receives an IGMPv2 Query message, it starts the timer immediately, regardless of whether the device is running. If the device is down and has left the multicast group, it will cause the mc list refcount uaf issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhengchao Shao Reviewed-by: Eric Dumazet Reviewed-by: Hangbin Liu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/ipv4/igmp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 0c321996c6eb..9eb87a6aae67 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -216,8 +216,10 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay) int tv = prandom_u32() % max_delay; im->tm_running = 1; - if (!mod_timer(&im->timer, jiffies+tv+2)) - refcount_inc(&im->refcnt); + if (refcount_inc_not_zero(&im->refcnt)) { + if (mod_timer(&im->timer, jiffies + tv + 2)) + ip_ma_put(im); + } } static void igmp_gq_start_timer(struct in_device *in_dev) -- Gitee From 0556f4171d8303cd335a1563ee5f22eda36423f1 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 9 Mar 2021 17:16:32 -0800 Subject: [PATCH 232/243] nvmet: remove unnecessary ctrl parameter stable inclusion from stable-v5.10.203 commit 86a7f67d7605943d65af7315711cea7e5c1b50d8 category: bugfix issue: #I8IY9Q CVE: NA Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit de5878048e11f1ec44164ebb8994de132074367a ] The function nvmet_ctrl_find_get() accepts out pointer to nvmet_ctrl structure. This function returns the same error value from two places that is :- NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR. Move this to the caller so we can change the return type to nvmet_ctrl. Now that we can changed the return type, instead of taking out pointer to the nvmet_ctrl structure remove that function parameter and return the valid nvmet_ctrl pointer on success and NULL on failure. Also, add and rename the goto labels for more readability with comments. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Stable-dep-of: 1c22e0295a5e ("nvmet: nul-terminate the NQNs passed in the connect command") Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/nvme/target/core.c | 21 +++++++++++---------- drivers/nvme/target/fabrics-cmd.c | 11 ++++++----- drivers/nvme/target/nvmet.h | 5 +++-- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 9a8fa2e582d5..ee423eefd82f 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1185,19 +1185,19 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl) ctrl->cap |= NVMET_QUEUE_SIZE - 1; } -u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, - struct nvmet_req *req, struct nvmet_ctrl **ret) +struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, + const char *hostnqn, u16 cntlid, + struct nvmet_req *req) { + struct nvmet_ctrl *ctrl = NULL; struct nvmet_subsys *subsys; - struct nvmet_ctrl *ctrl; - u16 status = 0; subsys = nvmet_find_get_subsys(req->port, subsysnqn); if (!subsys) { pr_warn("connect request for invalid subsystem %s!\n", subsysnqn); req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); - return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; + goto out; } mutex_lock(&subsys->lock); @@ -1210,20 +1210,21 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, if (!kref_get_unless_zero(&ctrl->ref)) continue; - *ret = ctrl; - goto out; + /* ctrl found */ + goto found; } } + ctrl = NULL; /* ctrl not found */ pr_warn("could not find controller %d for subsys %s / host %s\n", cntlid, subsysnqn, hostnqn); req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); - status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; -out: +found: mutex_unlock(&subsys->lock); nvmet_subsys_put(subsys); - return status; +out: + return ctrl; } u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index e62d3d0fa6c8..5baaace31c68 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -223,7 +223,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) { struct nvmf_connect_command *c = &req->cmd->connect; struct nvmf_connect_data *d; - struct nvmet_ctrl *ctrl = NULL; + struct nvmet_ctrl *ctrl; u16 qid = le16_to_cpu(c->qid); u16 status = 0; @@ -250,11 +250,12 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) goto out; } - status = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn, - le16_to_cpu(d->cntlid), - req, &ctrl); - if (status) + ctrl = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn, + le16_to_cpu(d->cntlid), req); + if (!ctrl) { + status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; goto out; + } if (unlikely(qid > ctrl->subsys->max_qid)) { pr_warn("invalid queue id (%d)\n", qid); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 4bf6d21290c2..ef162b64fabe 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -430,8 +430,9 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl); void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new); u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp); -u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, - struct nvmet_req *req, struct nvmet_ctrl **ret); +struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, + const char *hostnqn, u16 cntlid, + struct nvmet_req *req); void nvmet_ctrl_put(struct nvmet_ctrl *ctrl); u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd); -- Gitee From b9d3a8a00b9bf286dac1fe9bb60bde8b7f5407fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Nov 2023 08:13:36 -0500 Subject: [PATCH 233/243] nvmet: nul-terminate the NQNs passed in the connect command stable inclusion from stable-v5.10.203 commit 2be451e7a2f124899546c1bb5c6d509a927968c8 category: bugfix issue: #I8IY9Q CVE: CVE-2023-6121 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 1c22e0295a5eb571c27b53c7371f95699ef705ff ] The host and subsystem NQNs are passed in the connect command payload and interpreted as nul-terminated strings. Ensure they actually are nul-terminated before using them. Fixes: a07b4970f464 "nvmet: add a generic NVMe target") Reported-by: Alon Zahavi Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/nvme/target/fabrics-cmd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 5baaace31c68..fb4f62982cb7 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -189,6 +189,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } + d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; + d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req, le32_to_cpu(c->kato), &ctrl); if (status) { @@ -250,6 +252,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) goto out; } + d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; + d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0'; ctrl = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn, le16_to_cpu(d->cntlid), req); if (!ctrl) { -- Gitee From 38e848fddf725c1e25af4b7a916af9acef6e5ad3 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sat, 9 Dec 2023 05:05:38 -0500 Subject: [PATCH 234/243] net/rose: Fix Use-After-Free in rose_ioctl stable inclusion from stable-v5.10.205~48 commit 7eda5960a5332654b10d951e735750ed60d7f0a9 category: bugfix issue: NA CVE: CVE-2023-51782 Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 810c38a369a0a0ce625b5c12169abce1dd9ccd53 ] Because rose_ioctl() accesses sk->sk_receive_queue without holding a sk->sk_receive_queue.lock, it can cause a race with rose_accept(). A use-after-free for skb occurs with the following flow. ``` rose_ioctl() -> skb_peek() rose_accept() -> skb_dequeue() -> kfree_skb() ``` Add sk->sk_receive_queue.lock to rose_ioctl() to fix this issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Link: https://lore.kernel.org/r/20231209100538.GA407321@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/rose/af_rose.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index cf7d974e0f61..47fee93f22e4 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1292,9 +1292,11 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case TIOCINQ: { struct sk_buff *skb; long amount = 0L; - /* These two are safe on a single CPU system as only user tasks fiddle here */ + + spin_lock_irq(&sk->sk_receive_queue.lock); if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) amount = skb->len; + spin_unlock_irq(&sk->sk_receive_queue.lock); return put_user(amount, (unsigned int __user *) argp); } -- Gitee From a231cec5d58d78d229b6951fd4dc3be960a3456a Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Tue, 12 Dec 2023 23:10:56 -0500 Subject: [PATCH 235/243] appletalk: Fix Use-After-Free in atalk_ioctl stable inclusion from stable-v5.10.205~39 commit a232eb81c7cb5d4dbd325d4611ed029b7fa07596 category: bugfix issue: NA CVE: CVE-2023-51781 Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 189ff16722ee36ced4d2a2469d4ab65a8fee4198 ] Because atalk_ioctl() accesses sk->sk_receive_queue without holding a sk->sk_receive_queue.lock, it can cause a race with atalk_recvmsg(). A use-after-free for skb occurs with the following flow. ``` atalk_ioctl() -> skb_peek() atalk_recvmsg() -> skb_recv_datagram() -> skb_free_datagram() ``` Add sk->sk_receive_queue.lock to atalk_ioctl() to fix this issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Link: https://lore.kernel.org/r/20231213041056.GA519680@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/appletalk/ddp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index c94b212d8e7c..46adb8cefccf 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1811,15 +1811,14 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } case TIOCINQ: { - /* - * These two are safe on a single CPU system as only - * user tasks fiddle here - */ - struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); + struct sk_buff *skb; long amount = 0; + spin_lock_irq(&sk->sk_receive_queue.lock); + skb = skb_peek(&sk->sk_receive_queue); if (skb) amount = skb->len - sizeof(struct ddpehdr); + spin_unlock_irq(&sk->sk_receive_queue.lock); rc = put_user(amount, (int __user *)argp); break; } -- Gitee From b109fc343c424ae0155f12cbed2e2d7c57500ca5 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sat, 9 Dec 2023 04:42:10 -0500 Subject: [PATCH 236/243] atm: Fix Use-After-Free in do_vcc_ioctl stable inclusion from stable-v5.10.205~49 commit 64a032015c336ca1795b3e1b1d1f94085ada3553 category: bugfix issue: NA CVE: CVE-2023-51780 Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 24e90b9e34f9e039f56b5f25f6e6eb92cdd8f4b3 ] Because do_vcc_ioctl() accesses sk->sk_receive_queue without holding a sk->sk_receive_queue.lock, it can cause a race with vcc_recvmsg(). A use-after-free for skb occurs with the following flow. ``` do_vcc_ioctl() -> skb_peek() vcc_recvmsg() -> skb_recv_datagram() -> skb_free_datagram() ``` Add sk->sk_receive_queue.lock to do_vcc_ioctl() to fix this issue. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Link: https://lore.kernel.org/r/20231209094210.GA403126@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/atm/ioctl.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 838ebf0cabbf..f81f8d56f5c0 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -73,14 +73,17 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, case SIOCINQ: { struct sk_buff *skb; + int amount; if (sock->state != SS_CONNECTED) { error = -EINVAL; goto done; } + spin_lock_irq(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); - error = put_user(skb ? skb->len : 0, - (int __user *)argp) ? -EFAULT : 0; + amount = skb ? skb->len : 0; + spin_unlock_irq(&sk->sk_receive_queue.lock); + error = put_user(amount, (int __user *)argp) ? -EFAULT : 0; goto done; } case ATM_SETSC: -- Gitee From a24fed2e616381b8939f6fb9125e9dadff337923 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Sat, 9 Dec 2023 05:55:18 -0500 Subject: [PATCH 237/243] Bluetooth: af_bluetooth: Fix Use-After-Free in bt_sock_recvmsg mainline inclusion from mainline-v6.7-rc7~21^2~8^2 commit 2e07e8348ea454615e268222ae3fc240421be768 category: bugfix issue: #I8T1LS CVE: CVE-2023-51779 Signed-off-by: yaowenrui --------------------------------------- This can cause a race with bt_sock_ioctl() because bt_sock_recvmsg() gets the skb from sk->sk_receive_queue and then frees it without holding lock_sock. A use-after-free for a skb occurs with the following flow. ``` bt_sock_recvmsg() -> skb_recv_datagram() -> skb_free_datagram() bt_sock_ioctl() -> skb_peek() ``` Add lock_sock to bt_sock_recvmsg() to fix this issue. Cc: stable@vger.kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Hyunwoo Kim Signed-off-by: Luiz Augusto von Dentz Signed-off-by: wanxiaoqing --- net/bluetooth/af_bluetooth.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 4ef6a54403aa..b577e65a0c3e 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -263,11 +263,14 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (flags & MSG_OOB) return -EOPNOTSUPP; + lock_sock(sk); + skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) - return 0; + err = 0; + release_sock(sk); return err; } @@ -293,6 +296,8 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, skb_free_datagram(sk, skb); + release_sock(sk); + if (flags & MSG_TRUNC) copied = skblen; -- Gitee From b03a46dd2ff3b8d41f69577b34add23b0326f11d Mon Sep 17 00:00:00 2001 From: waterwin Date: Thu, 1 Jun 2023 13:46:13 +0000 Subject: [PATCH 238/243] delete redundancy version Signed-off-by: waterwin Signed-off-by: yangbo Change-Id: If60fd0feaa843d539ec7fb6396a3dbe34d98a09c --- fs/hmdfs/comm/connection.c | 118 +++++++++++------------------ fs/hmdfs/comm/connection.h | 1 - fs/hmdfs/comm/crypto.c | 5 +- fs/hmdfs/comm/device_node.c | 11 ++- fs/hmdfs/comm/message_verify.c | 64 ++++++++-------- fs/hmdfs/comm/node_cb.c | 3 - fs/hmdfs/comm/node_cb.h | 1 - fs/hmdfs/comm/protocol.h | 9 +-- fs/hmdfs/comm/socket_adapter.c | 60 +++------------ fs/hmdfs/comm/socket_adapter.h | 18 +---- fs/hmdfs/comm/transport.c | 19 ++--- fs/hmdfs/file_remote.c | 6 +- fs/hmdfs/hmdfs_client.c | 1 - fs/hmdfs/hmdfs_server.c | 1 - fs/hmdfs/inode_remote.c | 133 ++++++++++++++------------------- fs/hmdfs/main.c | 6 +- fs/hmdfs/stash.c | 6 -- 17 files changed, 165 insertions(+), 297 deletions(-) diff --git a/fs/hmdfs/comm/connection.c b/fs/hmdfs/comm/connection.c index 7613da514c7c..aec9cabf6931 100644 --- a/fs/hmdfs/comm/connection.c +++ b/fs/hmdfs/comm/connection.c @@ -361,8 +361,7 @@ static int do_send_handshake(struct connection *conn_impl, __u8 ops, sizeof(struct connection_handshake_req) + len; if (((ops == CONNECT_MESG_HANDSHAKE_RESPONSE) || - (ops == CONNECT_MESG_HANDSHAKE_ACK)) && - (conn_impl->node->version >= DFS_2_0)) { + (ops == CONNECT_MESG_HANDSHAKE_ACK))) { extend_len = hs_get_extend_data_len(); send_len += extend_len; } @@ -379,15 +378,14 @@ static int do_send_handshake(struct connection *conn_impl, __u8 ops, memcpy(hs_data->dev_id, buf, len); if (((ops == CONNECT_MESG_HANDSHAKE_RESPONSE) || - ops == CONNECT_MESG_HANDSHAKE_ACK) && - (conn_impl->node->version >= DFS_2_0)) { + ops == CONNECT_MESG_HANDSHAKE_ACK)) { hs_extend_data = (uint8_t *)hs_data + sizeof(struct connection_handshake_req) + len; hs_fill_extend_data(conn_impl, ops, hs_extend_data, extend_len); } hs_head->magic = HMDFS_MSG_MAGIC; - hs_head->version = DFS_2_0; + hs_head->version = HMDFS_VERSION; hs_head->flags |= 0x1; hmdfs_info("Send handshake message: ops = %d, fd = %d", ops, ((struct tcp_handle *)(conn_impl->connect_handle))->fd); @@ -612,48 +610,31 @@ void connection_to_working(struct hmdfs_peer *node) peer_online(node); } -static int connection_check_version(__u8 version) -{ - __u8 min_ver = USERSPACE_MAX_VER; - - if (version <= min_ver || version >= MAX_VERSION) { - hmdfs_info("version err. version %u", version); - return -1; - } - return 0; -} - void connection_handshake_recv_handler(struct connection *conn_impl, void *buf, void *data, __u32 data_len) { - __u8 version; __u8 ops; __u8 status; int fd = ((struct tcp_handle *)(conn_impl->connect_handle))->fd; struct connection_msg_head *head = (struct connection_msg_head *)buf; int ret; - version = head->version; - conn_impl->node->version = version; - if (connection_check_version(version) != 0) + if (head->version != HMDFS_VERSION) goto out; - conn_impl->node->conn_operations = hmdfs_get_peer_operation(version); + + conn_impl->node->version = head->version; ops = head->operations; status = conn_impl->status; switch (ops) { case CONNECT_MESG_HANDSHAKE_REQUEST: hmdfs_info( - "Recved handshake request: device_id = %llu, version = %d, head->len = %d, tcp->fd = %d", - conn_impl->node->device_id, version, head->datasize, fd); + "Recved handshake request: device_id = %llu, head->len = %d, tcp->fd = %d", + conn_impl->node->device_id, head->datasize, fd); connection_send_handshake(conn_impl, CONNECT_MESG_HANDSHAKE_RESPONSE, head->msg_id); - if (conn_impl->node->version >= DFS_2_0) { - conn_impl->status = CONNECT_STAT_WAIT_ACK; - conn_impl->node->status = NODE_STAT_SHAKING; - } else { - conn_impl->status = CONNECT_STAT_WORKING; - } + conn_impl->status = CONNECT_STAT_WAIT_ACK; + conn_impl->node->status = NODE_STAT_SHAKING; break; case CONNECT_MESG_HANDSHAKE_RESPONSE: hmdfs_info( @@ -665,45 +646,41 @@ void connection_handshake_recv_handler(struct connection *conn_impl, void *buf, goto out; } - if (conn_impl->node->version >= DFS_2_0) { - ret = hs_proc_msg_data(conn_impl, ops, data, data_len); - if (ret) - goto nego_err; - connection_send_handshake(conn_impl, - CONNECT_MESG_HANDSHAKE_ACK, - head->msg_id); - hmdfs_info("respon rcv handle,conn_impl->crypto=0x%0x", - conn_impl->crypto); + ret = hs_proc_msg_data(conn_impl, ops, data, data_len); + if (ret) + goto nego_err; + connection_send_handshake(conn_impl, + CONNECT_MESG_HANDSHAKE_ACK, + head->msg_id); + hmdfs_info("respon rcv handle,conn_impl->crypto=0x%0x", + conn_impl->crypto); #ifdef CONFIG_HMDFS_FS_ENCRYPTION - ret = connection_handshake_init_tls(conn_impl, ops); - if (ret) { - hmdfs_err("init_tls_key fail, ops %u", ops); - goto out; - } -#endif + ret = connection_handshake_init_tls(conn_impl, ops); + if (ret) { + hmdfs_err("init_tls_key fail, ops %u", ops); + goto out; } +#endif conn_impl->status = CONNECT_STAT_WORKING; peer_online(conn_impl->node); break; case CONNECT_MESG_HANDSHAKE_ACK: - if (conn_impl->node->version >= DFS_2_0) { - ret = hs_proc_msg_data(conn_impl, ops, data, data_len); - if (ret) - goto nego_err; - hmdfs_info("ack rcv handle, conn_impl->crypto=0x%0x", - conn_impl->crypto); + ret = hs_proc_msg_data(conn_impl, ops, data, data_len); + if (ret) + goto nego_err; + hmdfs_info("ack rcv handle, conn_impl->crypto=0x%0x", + conn_impl->crypto); #ifdef CONFIG_HMDFS_FS_ENCRYPTION - ret = connection_handshake_init_tls(conn_impl, ops); - if (ret) { - hmdfs_err("init_tls_key fail, ops %u", ops); - goto out; - } -#endif - conn_impl->status = CONNECT_STAT_WORKING; - peer_online(conn_impl->node); - break; + ret = connection_handshake_init_tls(conn_impl, ops); + if (ret) { + hmdfs_err("init_tls_key fail, ops %u", ops); + goto out; } +#endif + conn_impl->status = CONNECT_STAT_WORKING; + peer_online(conn_impl->node); + break; fallthrough; default: break; @@ -713,8 +690,7 @@ void connection_handshake_recv_handler(struct connection *conn_impl, void *buf, return; nego_err: conn_impl->status = CONNECT_STAT_NEGO_FAIL; - connection_handshake_notify(conn_impl->node, - NOTIFY_OFFLINE); + connection_handshake_notify(conn_impl->node, NOTIFY_OFFLINE); hmdfs_err("protocol negotiation failed, remote device_id = %llu, tcp->fd = %d", conn_impl->node->device_id, fd); goto out; @@ -750,10 +726,9 @@ static void update_tls_crypto_key(struct connection *conn, static bool cmd_update_tls_crypto_key(struct connection *conn, struct hmdfs_head_cmd *head) { - __u8 version = conn->node->version; struct tcp_handle *tcp = conn->connect_handle; - if (version < DFS_2_0 || conn->type != CONNECT_TYPE_TCP || !tcp) + if (conn->type != CONNECT_TYPE_TCP || !tcp) return false; return head->operations.command == F_CONNECT_REKEY; } @@ -768,7 +743,7 @@ void connection_working_recv_handler(struct connection *conn_impl, void *buf, return; } #endif - conn_impl->node->conn_operations->recvmsg(conn_impl->node, buf, data); + hmdfs_recv_mesg_callback(conn_impl->node, buf, data); } static void connection_release(struct kref *ref) @@ -1093,7 +1068,7 @@ static struct hmdfs_peer *add_peer_unsafe(struct hmdfs_sb_info *sbi, } static struct hmdfs_peer *alloc_peer(struct hmdfs_sb_info *sbi, uint8_t *cid, - const struct connection_operations *conn_operations, uint32_t devsl) + uint32_t devsl) { struct hmdfs_peer *node = kzalloc(sizeof(*node), GFP_KERNEL); @@ -1149,8 +1124,8 @@ static struct hmdfs_peer *alloc_peer(struct hmdfs_sb_info *sbi, uint8_t *cid, INIT_LIST_HEAD(&node->list); kref_init(&node->ref_cnt); node->owner = sbi->seq; - node->conn_operations = conn_operations; node->sbi = sbi; + node->version = HMDFS_VERSION; node->status = NODE_STAT_SHAKING; node->conn_time = jiffies; memcpy(node->cid, cid, HMDFS_CID_SIZE); @@ -1210,7 +1185,6 @@ struct hmdfs_peer *hmdfs_get_peer(struct hmdfs_sb_info *sbi, uint8_t *cid, uint32_t devsl) { struct hmdfs_peer *peer = NULL, *on_sbi_peer = NULL; - const struct connection_operations *conn_opr_ptr = NULL; mutex_lock(&sbi->connections.node_lock); peer = lookup_peer_by_cid_unsafe(sbi, cid); @@ -1221,12 +1195,7 @@ struct hmdfs_peer *hmdfs_get_peer(struct hmdfs_sb_info *sbi, uint8_t *cid, goto out; } - conn_opr_ptr = hmdfs_get_peer_operation(DFS_2_0); - if (unlikely(!conn_opr_ptr)) { - hmdfs_info("Fatal! Cannot get peer operation"); - goto out; - } - peer = alloc_peer(sbi, cid, conn_opr_ptr, devsl); + peer = alloc_peer(sbi, cid, devsl); if (unlikely(!peer)) { hmdfs_info("Failed to alloc a peer"); goto out; @@ -1290,13 +1259,12 @@ int hmdfs_alloc_msg_idr(struct hmdfs_peer *peer, enum MSG_IDR_TYPE type, { int ret = -EAGAIN; struct hmdfs_msg_idr_head *head = ptr; - int end = peer->version < DFS_2_0 ? (USHRT_MAX + 1) : 0; idr_preload(GFP_KERNEL); spin_lock(&peer->idr_lock); if (!peer->offline_start) ret = idr_alloc_cyclic(&peer->msg_idr, ptr, - 1, end, GFP_NOWAIT); + 1, 0, GFP_NOWAIT); if (ret >= 0) { kref_init(&head->ref); head->msg_id = ret; diff --git a/fs/hmdfs/comm/connection.h b/fs/hmdfs/comm/connection.h index 6137c549824c..8178590d4e3d 100644 --- a/fs/hmdfs/comm/connection.h +++ b/fs/hmdfs/comm/connection.h @@ -169,7 +169,6 @@ struct hmdfs_peer { int recvbuf_maxsize; struct crypto_aead *tfm; char cid[HMDFS_CID_SIZE + 1]; - const struct connection_operations *conn_operations; struct hmdfs_sb_info *sbi; struct workqueue_struct *async_wq; struct workqueue_struct *req_handle_wq; diff --git a/fs/hmdfs/comm/crypto.c b/fs/hmdfs/comm/crypto.c index 60bb08f1697f..eaa755185fc7 100644 --- a/fs/hmdfs/comm/crypto.c +++ b/fs/hmdfs/comm/crypto.c @@ -56,7 +56,7 @@ int tls_crypto_info_init(struct connection *conn_impl) u8 key_meterial[HMDFS_KEY_SIZE]; struct tcp_handle *tcp = (struct tcp_handle *)(conn_impl->connect_handle); - if (conn_impl->node->version < DFS_2_0 || !tcp) + if (!tcp) return -EINVAL; // send update_key(conn_impl->send_key, key_meterial, HKDF_TYPE_IV); @@ -175,10 +175,9 @@ static int tls_set_rx(struct tcp_handle *tcp) int set_crypto_info(struct connection *conn_impl, int set_type) { int ret = 0; - __u8 version = conn_impl->node->version; struct tcp_handle *tcp = (struct tcp_handle *)(conn_impl->connect_handle); - if (version < DFS_2_0 || !tcp) + if (!tcp) return -EINVAL; if (set_type == SET_CRYPTO_SEND) { diff --git a/fs/hmdfs/comm/device_node.c b/fs/hmdfs/comm/device_node.c index 0f2585de61fe..796e5cd09a72 100644 --- a/fs/hmdfs/comm/device_node.c +++ b/fs/hmdfs/comm/device_node.c @@ -47,6 +47,11 @@ static void ctrl_cmd_update_socket_handler(const char *buf, size_t len, goto out; } memcpy(&cmd, buf, sizeof(cmd)); + if (cmd.status != CONNECT_STAT_WAIT_REQUEST && + cmd.status != CONNECT_STAT_WAIT_RESPONSE) { + hmdfs_err("invalid status"); + goto out; + } node = hmdfs_get_peer(sbi, cmd.cid, cmd.devsl); if (unlikely(!node)) { @@ -253,12 +258,12 @@ static ssize_t sbi_status_show(struct kobject *kobj, struct sbi_attribute *attr, struct tcp_handle *tcp = NULL; sbi = to_sbi(kobj); - size += sprintf(buf + size, "peers version status\n"); + size += sprintf(buf + size, "peers status\n"); mutex_lock(&sbi->connections.node_lock); list_for_each_entry(peer, &sbi->connections.node_list, list) { - size += sprintf(buf + size, "%llu %d %d\n", peer->device_id, - peer->version, peer->status); + size += sprintf(buf + size, "%llu %d\n", peer->device_id, + peer->status); // connection information size += sprintf( buf + size, diff --git a/fs/hmdfs/comm/message_verify.c b/fs/hmdfs/comm/message_verify.c index fd76658ef16d..6c4117713fac 100644 --- a/fs/hmdfs/comm/message_verify.c +++ b/fs/hmdfs/comm/message_verify.c @@ -934,45 +934,41 @@ int hmdfs_message_verify(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, goto handle_bad_msg; } - if (head->version != DFS_2_0) { - err = -EINVAL; + len = le32_to_cpu(head->data_len) - + sizeof(struct hmdfs_head_cmd); + min = message_length[flag][cmd][HMDFS_MESSAGE_MIN_INDEX]; + if (head->operations.command == F_ITERATE && flag == C_RESPONSE) + max = sizeof(struct slice_descriptor) + PAGE_SIZE; + else + max = message_length[flag][cmd][HMDFS_MESSAGE_MAX_INDEX]; + len_type = + message_length[flag][cmd][HMDFS_MESSAGE_LEN_JUDGE_INDEX]; + + if (len_type == MESSAGE_LEN_JUDGE_RANGE) { + if (len < min || len > max) { + hmdfs_err( + "cmd %d -> %d message verify fail, len = %zu", + cmd, flag, len); + err = -EINVAL; + goto handle_bad_msg; + } } else { - len = le32_to_cpu(head->data_len) - - sizeof(struct hmdfs_head_cmd); - min = message_length[flag][cmd][HMDFS_MESSAGE_MIN_INDEX]; - if (head->operations.command == F_ITERATE && flag == C_RESPONSE) - max = sizeof(struct slice_descriptor) + PAGE_SIZE; - else - max = message_length[flag][cmd][HMDFS_MESSAGE_MAX_INDEX]; - len_type = - message_length[flag][cmd][HMDFS_MESSAGE_LEN_JUDGE_INDEX]; - - if (len_type == MESSAGE_LEN_JUDGE_RANGE) { - if (len < min || len > max) { - hmdfs_err( - "cmd %d -> %d message verify fail, len = %zu", - cmd, flag, len); - err = -EINVAL; - goto handle_bad_msg; - } - } else { - if (len != min && len != max) { - hmdfs_err( - "cmd %d -> %d message verify fail, len = %zu", - cmd, flag, len); - err = -EINVAL; - goto handle_bad_msg; - } + if (len != min && len != max) { + hmdfs_err( + "cmd %d -> %d message verify fail, len = %zu", + cmd, flag, len); + err = -EINVAL; + goto handle_bad_msg; } + } - if (message_verify[cmd]) - err = message_verify[cmd](flag, len, data); + if (message_verify[cmd]) + err = message_verify[cmd](flag, len, data); - if (err) - goto handle_bad_msg; + if (err) + goto handle_bad_msg; - return err; - } + return err; handle_bad_msg: handle_bad_message(con, head, &err); diff --git a/fs/hmdfs/comm/node_cb.c b/fs/hmdfs/comm/node_cb.c index 21b84d2fff82..991ebde1d8cf 100644 --- a/fs/hmdfs/comm/node_cb.c +++ b/fs/hmdfs/comm/node_cb.c @@ -68,9 +68,6 @@ void hmdfs_node_call_evt_cb(struct hmdfs_peer *conn, int evt, bool sync, return; list_for_each_entry(desc, &cb_head[evt][sync], list) { - if (conn->version < desc->min_version) - continue; - desc->fn(conn, evt, seq); } } diff --git a/fs/hmdfs/comm/node_cb.h b/fs/hmdfs/comm/node_cb.h index fe53b946f668..8bd6a4bbc08e 100644 --- a/fs/hmdfs/comm/node_cb.h +++ b/fs/hmdfs/comm/node_cb.h @@ -29,7 +29,6 @@ typedef void (*hmdfs_node_evt_cb)(struct hmdfs_peer *conn, struct hmdfs_node_cb_desc { int evt; bool sync; - unsigned char min_version; hmdfs_node_evt_cb fn; struct list_head list; }; diff --git a/fs/hmdfs/comm/protocol.h b/fs/hmdfs/comm/protocol.h index a873143f20d7..b4b6e0f6aa03 100644 --- a/fs/hmdfs/comm/protocol.h +++ b/fs/hmdfs/comm/protocol.h @@ -144,17 +144,10 @@ struct slice_descriptor { } __packed; enum DFS_VERSION { - INVALID_VERSION = 0, - DFS_1_0, - - USERSPACE_MAX_VER = 0x3F, - DFS_2_0, - + HMDFS_VERSION = 0x40, MAX_VERSION = 0xFF }; -enum CONN_OPERATIONS_VERSION { USERDFS_VERSION, PROTOCOL_VERSION }; - enum CMD_FLAG { C_REQUEST = 0, C_RESPONSE = 1, C_FLAG_SIZE }; enum FILE_CMD { diff --git a/fs/hmdfs/comm/socket_adapter.c b/fs/hmdfs/comm/socket_adapter.c index 0404c2a79d3a..e98f16e05678 100644 --- a/fs/hmdfs/comm/socket_adapter.c +++ b/fs/hmdfs/comm/socket_adapter.c @@ -193,12 +193,10 @@ int hmdfs_sendmessage(struct hmdfs_peer *node, struct hmdfs_send_data *msg) if (!ret) statistic_con_sb_dirty(node, &head->operations); out: - if (node->version == DFS_2_0 && - head->operations.cmd_flag == C_REQUEST) + if (head->operations.cmd_flag == C_REQUEST) hmdfs_client_snd_statis(node->sbi, head->operations.command, ret); - else if (node->version == DFS_2_0 && - head->operations.cmd_flag == C_RESPONSE) + else if (head->operations.cmd_flag == C_RESPONSE) hmdfs_server_snd_statis(node->sbi, head->operations.command, ret); out_err: @@ -214,7 +212,7 @@ int hmdfs_sendmessage_response(struct hmdfs_peer *con, struct hmdfs_head_cmd head; head.magic = HMDFS_MSG_MAGIC; - head.version = DFS_2_0; + head.version = HMDFS_VERSION; head.operations = cmd->operations; head.operations.cmd_flag = C_RESPONSE; head.data_len = cpu_to_le32(data_len + sizeof(struct hmdfs_head_cmd)); @@ -337,7 +335,7 @@ int hmdfs_send_async_request(struct hmdfs_peer *peer, if (IS_ERR(mp)) return PTR_ERR(mp); head.magic = HMDFS_MSG_MAGIC; - head.version = DFS_2_0; + head.version = HMDFS_VERSION; head.data_len = cpu_to_le32(msg_len); head.operations = mp->req.operations; head.msg_id = cpu_to_le32(mp->head.msg_id); @@ -429,7 +427,7 @@ int hmdfs_sendmessage_request(struct hmdfs_peer *con, sm->out_buf = NULL; head->magic = HMDFS_MSG_MAGIC; - head->version = DFS_2_0; + head->version = HMDFS_VERSION; head->operations = sm->operations; head->data_len = cpu_to_le32(outlen); head->ret_code = cpu_to_le32(sm->ret_code); @@ -528,7 +526,7 @@ static int hmdfs_send_slice(struct hmdfs_peer *con, struct hmdfs_head_cmd *cmd, sizeof(struct slice_descriptor); head.magic = HMDFS_MSG_MAGIC; - head.version = DFS_2_0; + head.version = HMDFS_VERSION; head.operations = cmd->operations; head.operations.cmd_flag = C_RESPONSE; head.data_len = cpu_to_le32(msg_len); @@ -623,9 +621,8 @@ void hmdfs_recv_page_work_fn(struct work_struct *ptr) struct hmdfs_async_work *async_work = container_of(ptr, struct hmdfs_async_work, d_work.work); - if (async_work->head.peer->version >= DFS_2_0) - hmdfs_client_resp_statis(async_work->head.peer->sbi, - F_READPAGE, HMDFS_RESP_TIMEOUT, 0, 0); + hmdfs_client_resp_statis(async_work->head.peer->sbi, + F_READPAGE, HMDFS_RESP_TIMEOUT, 0, 0); hmdfs_err_ratelimited("timeout and release page, msg_id:%u", async_work->head.msg_id); asw_done(async_work); @@ -659,7 +656,7 @@ int hmdfs_sendpage_request(struct hmdfs_peer *con, memset(&head, 0, sizeof(head)); head.magic = HMDFS_MSG_MAGIC; - head.version = DFS_2_0; + head.version = HMDFS_VERSION; head.operations = sm->operations; head.data_len = cpu_to_le32(outlen); head.ret_code = cpu_to_le32(sm->ret_code); @@ -1067,7 +1064,7 @@ static int hmdfs_response_recv(struct hmdfs_peer *con, } } -static void hmdfs_recv_mesg_callback(struct hmdfs_peer *con, void *head, +void hmdfs_recv_mesg_callback(struct hmdfs_peer *con, void *head, void *buf) { struct hmdfs_head_cmd *hmdfs_head = (struct hmdfs_head_cmd *)head; @@ -1101,43 +1098,6 @@ static void hmdfs_recv_mesg_callback(struct hmdfs_peer *con, void *head, kfree(buf); } -static inline void hmdfs_recv_page_callback(struct hmdfs_peer *con, - struct hmdfs_head_cmd *head, - int err, void *data) -{ - if (head->operations.command == F_READPAGE) - hmdfs_client_recv_readpage(head, err, data); -} - -static const struct connection_operations conn_operations[] = { - [PROTOCOL_VERSION] = { - .recvmsg = hmdfs_recv_mesg_callback, - .recvpage = hmdfs_recv_page_callback, - /* remote device operations */ - .remote_file_fops = - &hmdfs_dev_file_fops_remote, - .remote_file_iops = - &hmdfs_dev_file_iops_remote, - .remote_file_aops = - &hmdfs_dev_file_aops_remote, - .remote_unlink = - hmdfs_dev_unlink_from_con, - .remote_readdir = - hmdfs_dev_readdir_from_con, - } -}; - -const struct connection_operations *hmdfs_get_peer_operation(__u8 version) -{ - if (version <= INVALID_VERSION || version >= MAX_VERSION) - return NULL; - - if (version <= USERSPACE_MAX_VER) - return &(conn_operations[USERDFS_VERSION]); - else - return &(conn_operations[PROTOCOL_VERSION]); -} - void hmdfs_wakeup_parasite(struct hmdfs_msg_parasite *mp) { hmdfs_wait_mp_wfired(mp); diff --git a/fs/hmdfs/comm/socket_adapter.h b/fs/hmdfs/comm/socket_adapter.h index ba4c672d7bcc..35275ccac2a6 100644 --- a/fs/hmdfs/comm/socket_adapter.h +++ b/fs/hmdfs/comm/socket_adapter.h @@ -23,21 +23,6 @@ #define HMDFS_IDR_RESCHED_COUNT 512 -struct connection_operations { - void (*recvmsg)(struct hmdfs_peer *con, void *head, void *buf); - void (*recvpage)(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, - int err, void *data); - const struct file_operations *remote_file_fops; - const struct inode_operations *remote_file_iops; - const struct address_space_operations *remote_file_aops; - int (*remote_unlink)(struct hmdfs_peer *con, struct dentry *dentry); - int (*remote_readdir)(struct hmdfs_peer *con, struct file *file, - struct dir_context *ctx); - struct hmdfs_lookup_ret *(*remote_lookup)(struct hmdfs_peer *con, - const char *relative_path, - const char *d_name); -}; - /***************************************************************************** * connections(TCP, UDP, .etc) adapter for RPC *****************************************************************************/ @@ -75,7 +60,6 @@ int hmdfs_sendmessage_response(struct hmdfs_peer *con, void *buf, __u32 ret_code); int hmdfs_readfile_response(struct hmdfs_peer *con, struct hmdfs_head_cmd *head, struct file *filp); -const struct connection_operations *hmdfs_get_peer_operation(__u8 version); void hmdfs_recv_page_work_fn(struct work_struct *ptr); @@ -168,6 +152,8 @@ static inline void set_cmd_timeout(struct hmdfs_sb_info *sbi, enum FILE_CMD cmd, sbi->s_cmd_timeout[cmd] = value; } +void hmdfs_recv_mesg_callback(struct hmdfs_peer *con, void *head, void *buf); + void hmdfs_response_wakeup(struct sendmsg_wait_queue *msg_info, __u32 ret_code, __u32 data_len, void *buf); diff --git a/fs/hmdfs/comm/transport.c b/fs/hmdfs/comm/transport.c index c9eaaa7dca37..c0e893c609fc 100644 --- a/fs/hmdfs/comm/transport.c +++ b/fs/hmdfs/comm/transport.c @@ -250,7 +250,7 @@ static int tcp_recvpage_tls(struct connection *connect, if (ret) rd_err = ret; } - node->conn_operations->recvpage(node, recv, rd_err, async_work); + hmdfs_client_recv_readpage(recv, rd_err, async_work); asw_put(async_work); return ret; @@ -519,7 +519,7 @@ static int tcp_receive_from_sock(struct tcp_handle *tcp) tcp->connect->stat.recv_bytes += sizeof(struct hmdfs_head_cmd); tcp->connect->stat.recv_message_count++; - if (recv->magic != HMDFS_MSG_MAGIC) { + if (recv->magic != HMDFS_MSG_MAGIC || recv->version != HMDFS_VERSION) { hmdfs_info_ratelimited("tcp recv fd %d wrong magic. drop message", tcp->fd); goto out; @@ -533,16 +533,14 @@ static int tcp_receive_from_sock(struct tcp_handle *tcp) goto out; } - if (recv->version > USERSPACE_MAX_VER && - tcp->connect->status == CONNECT_STAT_WORKING && + if (tcp->connect->status == CONNECT_STAT_WORKING && recv->operations.command == F_READPAGE && recv->operations.cmd_flag == C_RESPONSE) { ret = tcp_recvpage_tls(tcp->connect, recv); goto out; } - if (tcp->connect->status == CONNECT_STAT_WORKING && - recv->version > USERSPACE_MAX_VER) + if (tcp->connect->status == CONNECT_STAT_WORKING) ret = tcp_recvbuffer_tls(tcp->connect, recv); else ret = tcp_recvbuffer_cipher(tcp->connect, recv); @@ -773,7 +771,7 @@ int tcp_send_rekey_request(struct connection *connect) rekey_request_param->update_request = cpu_to_le32(UPDATE_NOT_REQUESTED); head->magic = HMDFS_MSG_MAGIC; - head->version = DFS_2_0; + head->version = HMDFS_VERSION; head->operations = operations; head->data_len = cpu_to_le32(sizeof(*head) + sizeof(*rekey_request_param)); @@ -822,11 +820,9 @@ static int tcp_send_message(struct connection *connect, trace_hmdfs_tcp_send_message(msg->head); - if (connect->status == CONNECT_STAT_WORKING && - connect->node->version > USERSPACE_MAX_VER) + if (connect->status == CONNECT_STAT_WORKING) ret = tcp_send_message_sock_tls(tcp, msg); else - // Handshake status or version HMDFS1.0 ret = tcp_send_message_sock_cipher(tcp, msg); if (ret != 0) { @@ -835,8 +831,7 @@ static int tcp_send_message(struct connection *connect, } #ifdef CONFIG_HMDFS_FS_ENCRYPTION if (nowtime - connect->stat.rekey_time >= REKEY_LIFETIME && - connect->status == CONNECT_STAT_WORKING && - connect->node->version >= DFS_2_0) { + connect->status == CONNECT_STAT_WORKING) { hmdfs_info("send rekey message to devid %llu", connect->node->device_id); ret = tcp_send_rekey_request(connect); diff --git a/fs/hmdfs/file_remote.c b/fs/hmdfs/file_remote.c index f9a77ddf4dcc..f483ad5e030b 100644 --- a/fs/hmdfs/file_remote.c +++ b/fs/hmdfs/file_remote.c @@ -994,7 +994,7 @@ static int hmdfs_iterate_remote(struct file *file, struct dir_context *ctx) con = hmdfs_lookup_from_devid(file->f_inode->i_sb->s_fs_info, dev_id); if (con) { // ctx->pos = 0; - err = con->conn_operations->remote_readdir(con, file, ctx); + err = hmdfs_dev_readdir_from_con(con, file, ctx); if (unlikely(!con)) { hmdfs_err("con is null"); goto done; @@ -1018,9 +1018,7 @@ int hmdfs_dir_open_remote(struct inode *inode, struct file *file) struct hmdfs_inode_info *info = hmdfs_i(inode); struct clearcache_item *cache_item = NULL; - if (info->conn && info->conn->version <= USERSPACE_MAX_VER) { - return 0; - } else if (info->conn) { + if (info->conn) { if (!hmdfs_cache_revalidate(READ_ONCE(info->conn->conn_time), info->conn->device_id, file->f_path.dentry)) diff --git a/fs/hmdfs/hmdfs_client.c b/fs/hmdfs/hmdfs_client.c index 31c1a6d38e8f..9b7981085707 100644 --- a/fs/hmdfs/hmdfs_client.c +++ b/fs/hmdfs/hmdfs_client.c @@ -1086,7 +1086,6 @@ static struct hmdfs_node_cb_desc client_cb[] = { { .evt = NODE_EVT_OFFLINE, .sync = true, - .min_version = DFS_1_0, .fn = hmdfs_client_offline_notify, }, }; diff --git a/fs/hmdfs/hmdfs_server.c b/fs/hmdfs/hmdfs_server.c index fc0274233970..d711edd948c4 100644 --- a/fs/hmdfs/hmdfs_server.c +++ b/fs/hmdfs/hmdfs_server.c @@ -147,7 +147,6 @@ static struct hmdfs_node_cb_desc server_cb[] = { { .evt = NODE_EVT_OFFLINE, .sync = true, - .min_version = DFS_2_0, .fn = hmdfs_server_offline_notify }, }; diff --git a/fs/hmdfs/inode_remote.c b/fs/hmdfs/inode_remote.c index 6a19e6e6b135..65eddedb299a 100644 --- a/fs/hmdfs/inode_remote.c +++ b/fs/hmdfs/inode_remote.c @@ -175,48 +175,40 @@ struct hmdfs_lookup_ret *hmdfs_lookup_by_con(struct hmdfs_peer *con, { struct hmdfs_lookup_ret *result = NULL; - if (con->version > USERSPACE_MAX_VER) { + /* + * LOOKUP_REVAL means we found stale info from dentry file, thus + * we need to use remote getattr. + */ + if (flags & LOOKUP_REVAL) { /* - * LOOKUP_REVAL means we found stale info from dentry file, thus - * we need to use remote getattr. - */ - if (flags & LOOKUP_REVAL) { - /* - * HMDFS_LOOKUP_REVAL means we need to skip dentry cache - * in lookup, because dentry cache in server might have - * stale data. - */ - result = get_remote_inode_info(con, dentry, - HMDFS_LOOKUP_REVAL); - get_remote_dentry_file_in_wq(dentry->d_parent, con); - return result; - } - - /* If cache file is still valid */ - if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), - con->device_id, dentry->d_parent)) { - result = lookup_remote_dentry(dentry, qstr, - con->device_id); - /* - * If lookup from cache file failed, use getattr to see - * if remote have created the file. - */ - if (!(flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) && - !result) - result = get_remote_inode_info(con, dentry, 0); - /* If cache file expired, use getattr directly - * except create and rename opt - */ - } else { + * HMDFS_LOOKUP_REVAL means we need to skip dentry cache + * in lookup, because dentry cache in server might have + * stale data. + */ + result = get_remote_inode_info(con, dentry, + HMDFS_LOOKUP_REVAL); + get_remote_dentry_file_in_wq(dentry->d_parent, con); + return result; + } + + /* If cache file is still valid */ + if (hmdfs_cache_revalidate(READ_ONCE(con->conn_time), + con->device_id, dentry->d_parent)) { + result = lookup_remote_dentry(dentry, qstr, + con->device_id); + /* + * If lookup from cache file failed, use getattr to see + * if remote have created the file. + */ + if (!(flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) && + !result) result = get_remote_inode_info(con, dentry, 0); - get_remote_dentry_file_in_wq(dentry->d_parent, con); - } + /* If cache file expired, use getattr directly + * except create and rename opt + */ } else { - if (!relative_path) - return NULL; - - result = con->conn_operations->remote_lookup( - con, relative_path, dentry->d_name.name); + result = get_remote_inode_info(con, dentry, 0); + get_remote_dentry_file_in_wq(dentry->d_parent, con); } return result; @@ -352,17 +344,16 @@ struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, info = hmdfs_i(inode); info->inode_type = HMDFS_LAYER_OTHER_REMOTE; - if (con->version > USERSPACE_MAX_VER) { - /* the inode was found in cache */ - if (!(inode->i_state & I_NEW)) { - hmdfs_fill_inode_remote(inode, dir, mode); - hmdfs_update_inode(inode, res); - return inode; - } - hmdfs_remote_init_stash_status(con, inode, mode); + /* the inode was found in cache */ + if (!(inode->i_state & I_NEW)) { + hmdfs_fill_inode_remote(inode, dir, mode); + hmdfs_update_inode(inode, res); + return inode; } + hmdfs_remote_init_stash_status(con, inode, mode); + inode->i_ctime.tv_sec = 0; inode->i_ctime.tv_nsec = 0; inode->i_mtime.tv_sec = res->i_mtime; @@ -380,9 +371,9 @@ struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, goto bad_inode; } - if (S_ISREG(mode)) { - inode->i_op = con->conn_operations->remote_file_iops; - inode->i_fop = con->conn_operations->remote_file_fops; + if (S_ISREG(mode) || S_ISLNK(mode)) { + inode->i_op = &hmdfs_dev_file_iops_remote; + inode->i_fop = &hmdfs_dev_file_fops_remote; inode->i_size = res->i_size; set_nlink(inode, 1); } else if (S_ISDIR(mode)) { @@ -394,7 +385,7 @@ struct inode *fill_inode_remote(struct super_block *sb, struct hmdfs_peer *con, goto bad_inode; } - inode->i_mapping->a_ops = con->conn_operations->remote_file_aops; + inode->i_mapping->a_ops = &hmdfs_dev_file_aops_remote; hmdfs_fill_inode_remote(inode, dir, mode); unlock_new_inode(inode); @@ -584,10 +575,7 @@ int hmdfs_mkdir_remote(struct inode *dir, struct dentry *dentry, umode_t mode) hmdfs_warning("qpb_debug: con is null!"); goto out; } - if (con->version <= USERSPACE_MAX_VER) { - err = -EPERM; - goto out; - } + err = hmdfs_mkdir_remote_dentry(con, dentry, mode); if (!err) create_in_cache_file(con->device_id, dentry); @@ -656,10 +644,7 @@ int hmdfs_create_remote(struct inode *dir, struct dentry *dentry, umode_t mode, hmdfs_warning("qpb_debug: con is null!"); goto out; } - if (con->version <= USERSPACE_MAX_VER) { - err = -EPERM; - goto out; - } + err = hmdfs_create_remote_dentry(con, dentry, mode, want_excl); if (!err) create_in_cache_file(con->device_id, dentry); @@ -705,10 +690,7 @@ int hmdfs_rmdir_remote(struct inode *dir, struct dentry *dentry) err = -EACCES; goto out; } - if (con->version <= USERSPACE_MAX_VER) { - err = -EPERM; - goto out; - } + err = hmdfs_rmdir_remote_dentry(con, dentry); /* drop dentry even remote failed * it maybe cause that one remote devices disconnect @@ -757,7 +739,7 @@ int hmdfs_unlink_remote(struct inode *dir, struct dentry *dentry) if (conn->status != NODE_STAT_ONLINE) return 0; - return conn->conn_operations->remote_unlink(conn, dentry); + return hmdfs_dev_unlink_from_con(conn, dentry); } /* rename dentry in cache file */ @@ -819,20 +801,17 @@ int hmdfs_rename_remote(struct inode *old_dir, struct dentry *old_dentry, goto rename_out; } if (S_ISREG(old_dentry->d_inode->i_mode)) { - if (con->version > USERSPACE_MAX_VER) { - hmdfs_debug("send MSG to remote devID %llu", - con->device_id); - err = hmdfs_client_start_rename( - con, relative_old_dir_path, old_dentry_d_name, - relative_new_dir_path, new_dentry_d_name, - flags); - if (!err) - rename_in_cache_file(con->device_id, old_dentry, - new_dentry); - } + hmdfs_debug("send MSG to remote devID %llu", + con->device_id); + err = hmdfs_client_start_rename( + con, relative_old_dir_path, old_dentry_d_name, + relative_new_dir_path, new_dentry_d_name, + flags); + if (!err) + rename_in_cache_file(con->device_id, old_dentry, + new_dentry); } else if (S_ISDIR(old_dentry->d_inode->i_mode)) { - if ((con->status == NODE_STAT_ONLINE) && - (con->version > USERSPACE_MAX_VER)) { + if ((con->status == NODE_STAT_ONLINE)) { ret = hmdfs_client_start_rename( con, relative_old_dir_path, old_dentry_d_name, relative_new_dir_path, new_dentry_d_name, diff --git a/fs/hmdfs/main.c b/fs/hmdfs/main.c index f57d67562486..78c25ffe7024 100644 --- a/fs/hmdfs/main.c +++ b/fs/hmdfs/main.c @@ -326,8 +326,7 @@ static int hmdfs_remote_statfs(struct dentry *dentry, struct kstatfs *buf) } mutex_lock(&sbi->connections.node_lock); list_for_each_entry(con, &sbi->connections.node_list, list) { - if (con->status == NODE_STAT_ONLINE && - con->version > USERSPACE_MAX_VER) { + if (con->status == NODE_STAT_ONLINE) { peer_get(con); mutex_unlock(&sbi->connections.node_lock); hmdfs_debug("send MSG to remote devID %llu", @@ -839,6 +838,9 @@ static int hmdfs_fill_super(struct super_block *sb, void *data, int silent) char ctrl_path[CTRL_PATH_MAX_LEN]; uint64_t ctrl_hash; + if (!raw_data) + return -EINVAL; + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) { err = -ENOMEM; diff --git a/fs/hmdfs/stash.c b/fs/hmdfs/stash.c index 413720404cc7..094fc15f9fb8 100644 --- a/fs/hmdfs/stash.c +++ b/fs/hmdfs/stash.c @@ -2205,16 +2205,13 @@ static struct hmdfs_node_cb_desc stash_cb[] = { { .evt = NODE_EVT_OFFLINE, .sync = true, - .min_version = DFS_2_0, .fn = hmdfs_stash_offline_prepare, }, { .evt = NODE_EVT_OFFLINE, .sync = false, - .min_version = DFS_2_0, .fn = hmdfs_stash_offline_do_stash, }, - /* Don't known peer version yet, so min_version is 0 */ { .evt = NODE_EVT_ADD, .sync = true, @@ -2223,19 +2220,16 @@ static struct hmdfs_node_cb_desc stash_cb[] = { { .evt = NODE_EVT_ONLINE, .sync = false, - .min_version = DFS_2_0, .fn = hmdfs_stash_online_prepare, }, { .evt = NODE_EVT_ONLINE, .sync = false, - .min_version = DFS_2_0, .fn = hmdfs_stash_online_do_restore, }, { .evt = NODE_EVT_DEL, .sync = true, - .min_version = DFS_2_0, .fn = hmdfs_stash_del_do_cleanup, }, }; -- Gitee From 4cc3ff3a60c06ea40634bc73523068010e5f4335 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 15 Dec 2023 19:59:14 -0300 Subject: [PATCH 239/243] smb: client: fix OOB in smbCalcSize() stable inclusion from stable-v5.10.206 commit 0c54b79d1d9b25f5a406bcf1969f956e14c4704d category: bugfix issue: NA CVE: CVE-2023-6606 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit b35858b3786ddbb56e1c35138ba25d6adf8d0bef ] Validate @smb->WordCount to avoid reading off the end of @smb and thus causing the following KASAN splat: BUG: KASAN: slab-out-of-bounds in smbCalcSize+0x32/0x40 [cifs] Read of size 2 at addr ffff88801c024ec5 by task cifsd/1328 CPU: 1 PID: 1328 Comm: cifsd Not tainted 6.7.0-rc5 #9 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.2-3-gd478f380-rebuilt.opensuse.org 04/01/2014 Call Trace: dump_stack_lvl+0x4a/0x80 print_report+0xcf/0x650 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? __phys_addr+0x46/0x90 kasan_report+0xd8/0x110 ? smbCalcSize+0x32/0x40 [cifs] ? smbCalcSize+0x32/0x40 [cifs] kasan_check_range+0x105/0x1b0 smbCalcSize+0x32/0x40 [cifs] checkSMB+0x162/0x370 [cifs] ? __pfx_checkSMB+0x10/0x10 [cifs] cifs_handle_standard+0xbc/0x2f0 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 cifs_demultiplex_thread+0xed1/0x1360 [cifs] ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs] ? srso_alias_return_thunk+0x5/0xfbef5 ? lockdep_hardirqs_on_prepare+0x136/0x210 ? __pfx_lock_release+0x10/0x10 ? srso_alias_return_thunk+0x5/0xfbef5 ? mark_held_locks+0x1a/0x90 ? lockdep_hardirqs_on_prepare+0x136/0x210 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? __kthread_parkme+0xce/0xf0 ? __pfx_cifs_demultiplex_thread+0x10/0x10 [cifs] kthread+0x18d/0x1d0 ? kthread+0xdb/0x1d0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x34/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 This fixes CVE-2023-6606. Reported-by: j51569436@gmail.com Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218218 Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing Signed-off-by: Ywenrui44091 --- fs/cifs/misc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1c14cf01dbef..ab4b91a3e8bb 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -353,6 +353,10 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) cifs_dbg(VFS, "Length less than smb header size\n"); } return -EIO; + } else if (total_read < sizeof(*smb) + 2 * smb->WordCount) { + cifs_dbg(VFS, "%s: can't read BCC due to invalid WordCount(%u)\n", + __func__, smb->WordCount); + return -EIO; } /* otherwise, there is enough to get to the BCC */ -- Gitee From 20ac5e88e1684edb0571d022599d510d1c2212a8 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Fri, 10 Feb 2023 15:17:30 +0800 Subject: [PATCH 240/243] netfilter: ctnetlink: fix possible refcount leak in ctnetlink_create_conntrack() stable inclusion from stable-5.10.173 commit 1ff0b87df98b93e10ced45773aa7d35377355421 category: bugfix issue: #I8UVN2 CVE: NA Signed-off-by: huzhaodong --------------------------------------- [ Upstream commit ac4893980bbe79ce383daf9a0885666a30fe4c83 ] nf_ct_put() needs to be called to put the refcount got by nf_conntrack_find_get() to avoid refcount leak when nf_conntrack_hash_check_insert() fails. Fixes: 7d367e06688d ("netfilter: ctnetlink: fix soft lockup when netlink adds new entries (v2)") Signed-off-by: Hangyu Hua Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: huzhaodong Signed-off-by: Ywenrui44091 --- net/netfilter/nf_conntrack_netlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c6bcc28ae338..1487b7d08014 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2363,12 +2363,15 @@ ctnetlink_create_conntrack(struct net *net, err = nf_conntrack_hash_check_insert(ct); if (err < 0) - goto err2; + goto err3; rcu_read_unlock(); return ct; +err3: + if (ct->master) + nf_ct_put(ct->master); err2: rcu_read_unlock(); err1: -- Gitee From 4ec6f873c4699b00e67a0a9e50f01c7660c8ae5a Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 16 Feb 2022 15:55:38 +0100 Subject: [PATCH 241/243] netfilter: nf_tables: Reject tables of unsupported family stable inclusion from stable-v5.10.208 commit 25d1e7be85cf0a5afca5555f90e4609c40480ece category: bugfix issue: #I8YJUQ CVE: CVE-2023-6040 Signed-off-by: wanxiaoqing --------------------------------------- commit f1082dd31fe461d482d69da2a8eccfeb7bf07ac2 upstream. An nftables family is merely a hollow container, its family just a number and such not reliant on compile-time options other than nftables support itself. Add an artificial check so attempts at using a family the kernel can't support fail as early as possible. This helps user space detect kernels which lack e.g. NFPROTO_INET. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing Signed-off-by: Ywenrui44091 --- net/netfilter/nf_tables_api.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 09642d2ed2e6..7be340ea0b2b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -986,6 +986,30 @@ static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg, return strcmp(obj->key.name, k->name); } +static bool nft_supported_family(u8 family) +{ + return false +#ifdef CONFIG_NF_TABLES_INET + || family == NFPROTO_INET +#endif +#ifdef CONFIG_NF_TABLES_IPV4 + || family == NFPROTO_IPV4 +#endif +#ifdef CONFIG_NF_TABLES_ARP + || family == NFPROTO_ARP +#endif +#ifdef CONFIG_NF_TABLES_NETDEV + || family == NFPROTO_NETDEV +#endif +#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) + || family == NFPROTO_BRIDGE +#endif +#ifdef CONFIG_NF_TABLES_IPV6 + || family == NFPROTO_IPV6 +#endif + ; +} + static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -1000,6 +1024,9 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, u32 flags = 0; int err; + if (!nft_supported_family(family)) + return -EOPNOTSUPP; + lockdep_assert_held(&net->nft.commit_mutex); attr = nla[NFTA_TABLE_NAME]; table = nft_table_lookup(net, attr, family, genmask); -- Gitee From 059fe5fc58bf8027b9f7327483abdd11eb93ba5a Mon Sep 17 00:00:00 2001 From: Prathu Baronia Date: Mon, 22 May 2023 14:20:19 +0530 Subject: [PATCH 242/243] vhost: use kzalloc() instead of kmalloc() followed by memset() mainline inclusion from mainline-v6.4-rc6 commit 4d8df0f5f79f747d75a7d356d9b9ea40a4e4c8a9 category: bugfix issue: #NA CVE: CVE-2024-0340 Signed-off-by: wanxiaoqing --------------------------------------- Use kzalloc() to allocate new zeroed out msg node instead of memsetting a node allocated with kmalloc(). Signed-off-by: Prathu Baronia Message-Id: <20230522085019.42914-1-prathubaronia2011@gmail.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Signed-off-by: wanxiaoqing Signed-off-by: Ywenrui44091 --- drivers/vhost/vhost.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index f41463ab4031..fc989667b405 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2568,12 +2568,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify); /* Create a new message. */ struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) { - struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); + /* Make sure all padding within the structure is initialized. */ + struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; - /* Make sure all padding within the structure is initialized. */ - memset(&node->msg, 0, sizeof node->msg); node->vq = vq; node->msg.type = type; return node; -- Gitee From c5d7e821033f549ffe48a247a59eca14063b21ff Mon Sep 17 00:00:00 2001 From: liuzerun Date: Mon, 5 Feb 2024 15:10:42 +0000 Subject: [PATCH 243/243] status Signed-off-by: liuzerun --- fs/hmdfs/comm/device_node.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/hmdfs/comm/device_node.c b/fs/hmdfs/comm/device_node.c index 796e5cd09a72..bd42fc124efa 100644 --- a/fs/hmdfs/comm/device_node.c +++ b/fs/hmdfs/comm/device_node.c @@ -258,12 +258,12 @@ static ssize_t sbi_status_show(struct kobject *kobj, struct sbi_attribute *attr, struct tcp_handle *tcp = NULL; sbi = to_sbi(kobj); - size += sprintf(buf + size, "peers status\n"); + size += snprintf(buf + size, PAGE_SIZE - size, "peers status\n"); mutex_lock(&sbi->connections.node_lock); list_for_each_entry(peer, &sbi->connections.node_list, list) { - size += sprintf(buf + size, "%llu %d\n", peer->device_id, - peer->status); + size += snprintf(buf + size, PAGE_SIZE - size, "%llu %d\n", + peer->device_id, peer->status); // connection information size += sprintf( buf + size, @@ -271,7 +271,8 @@ static ssize_t sbi_status_show(struct kobject *kobj, struct sbi_attribute *attr, mutex_lock(&peer->conn_impl_list_lock); list_for_each_entry(conn_impl, &peer->conn_impl_list, list) { tcp = conn_impl->connect_handle; - size += sprintf(buf + size, "\t %d \t%d \t%d \t%p \t%ld\n", + size += snprintf(buf + size, PAGE_SIZE - size, + "\t %d \t%d \t%d \t%p \t%ld\n", tcp->fd, conn_impl->status, tcp->sock->state, tcp->sock, file_count(tcp->sock->file)); } -- Gitee