From cf575406f9324bfa2c9e940dabb50634d569289b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jun 2022 10:09:03 -0400 Subject: [PATCH 1/3] KVM: x86: do not report a vCPU as preempted outside instruction boundaries mainline inclusion from mainline-5.19-rc2 commit 6cd88243c7e03845a450795e134b488fc2afb736 category: bugfix issue: I5QXCH CVE: CVE-2022-39189 Signed-off-by: gaochao --------------------------------------- If a vCPU is outside guest mode and is scheduled out, it might be in the process of making a memory access. A problem occurs if another vCPU uses the PV TLB flush feature during the period when the vCPU is scheduled out, and a virtual address has already been translated but has not yet been accessed, because this is equivalent to using a stale TLB entry. To avoid this, only report a vCPU as preempted if sure that the guest is at an instruction boundary. A rescheduling request will be delivered to the host physical CPU as an external interrupt, so for simplicity consider any vmexit *not* instruction boundary except for external interrupts. It would in principle be okay to report the vCPU as preempted also if it is sleeping in kvm_vcpu_block(): a TLB flush IPI will incur the vmentry/vmexit overhead unnecessarily, and optimistic spinning is also unlikely to succeed. However, leave it for later because right now kvm_vcpu_check_block() is doing memory accesses. Even though the TLB flush issue only applies to virtual memory address, it's very much preferrable to be conservative. Reported-by: Jann Horn Signed-off-by: Paolo Bonzini conflict: arch/x86/include/asm/kvm_host.h arch/x86/kvm/x86.c Signed-off-by: gaochao --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/vmx/vmx.c | 1 + arch/x86/kvm/x86.c | 22 ++++++++++++++++++++++ 4 files changed, 28 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b1cd8334db11..df4a54f34e7b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -551,6 +551,7 @@ struct kvm_vcpu_arch { u64 ia32_misc_enable_msr; u64 smbase; u64 smi_count; + bool at_instruction_boundary; bool tpr_access_reporting; bool xsaves_enabled; u64 ia32_xss; @@ -1059,6 +1060,8 @@ struct kvm_vcpu_stat { u64 req_event; u64 halt_poll_success_ns; u64 halt_poll_fail_ns; + u64 preemption_reported; + u64 preemption_other; }; struct x86_instruction_info; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5e1d7396a6b8..7d388520851b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3992,6 +3992,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) { + if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR) + vcpu->arch.at_instruction_boundary = true; } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6f6376c7464b..620ac38eadda 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6449,6 +6449,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) return; handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc)); + vcpu->arch.at_instruction_boundary = true; } static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 26b51a6dc376..ea24395c19a2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -231,6 +231,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("l1d_flush", l1d_flush), VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VCPU_STAT("preemption_reported", preemption_reported), + VCPU_STAT("preemption_other", preemption_other), VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped), VM_STAT("mmu_pte_write", mmu_pte_write), VM_STAT("mmu_pde_zapped", mmu_pde_zapped), @@ -4022,6 +4024,19 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) struct kvm_host_map map; struct kvm_steal_time *st; + /* + * The vCPU can be marked preempted if and only if the VM-Exit was on + * an instruction boundary and will not trigger guest emulation of any + * kind (see vcpu_run). Vendor specific code controls (conservatively) + * when this is true, for example allowing the vCPU to be marked + * preempted if and only if the VM-Exit was due to a host interrupt. + */ + if (!vcpu->arch.at_instruction_boundary) { + vcpu->stat.preemption_other++; + return; + } + + vcpu->stat.preemption_reported++; if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -9215,6 +9230,13 @@ static int vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.l1tf_flush_l1d = true; for (;;) { + /* + * If another guest vCPU requests a PV TLB flush in the middle + * of instruction emulation, the rest of the emulation could + * use a stale page translation. Assume that any code after + * this point can start executing an instruction. + */ + vcpu->arch.at_instruction_boundary = false; if (kvm_vcpu_running(vcpu)) { r = vcpu_enter_guest(vcpu); } else { -- Gitee From 913bbeb317f25edcc72c52816e5b32ed7f36b19c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 22 Aug 2022 11:06:39 +0200 Subject: [PATCH 2/3] netfilter: nf_tables: disallow binding to already bound chain stable inclusion from stable-5.10.140 commit c08a104a8bce832f6e7a4e8d9ac091777b9982ea category: bugfix issue: I5QQND CVE: CVE-2022-39190 Signed-off-by: gaochao --------------------------------------- [ Upstream commit e02f0d3970404bfea385b6edb86f2d936db0ea2b ] Update nft_data_init() to report EINVAL if chain is already bound. Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING") Reported-by: Gwangun Jung Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin conflict: et/netfilter/nf_tables_api.c Signed-off-by: gaochao --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 98644731a122..25f18c93fa46 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8648,6 +8648,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, return PTR_ERR(chain); if (nft_is_base_chain(chain)) return -EOPNOTSUPP; + if (nft_chain_is_bound(chain)) + return -EINVAL; chain->use++; data->verdict.chain = chain; -- Gitee From 4ecc66f2c7b047a8e24e63de8ec610baf58dbe48 Mon Sep 17 00:00:00 2001 From: David Leadbeater Date: Fri, 26 Aug 2022 14:56:57 +1000 Subject: [PATCH 3/3] netfilter: nf_conntrack_irc: Tighten matching on DCC message mainline inclusion from mainline-v6.0-rc6 commit e8d5dfd1d8747b56077d02664a8838c71ced948e category: bugfix issue: I5QQN0 CVE: CVE-2022-2663 Signed-off-by: gaochao --------------------------------------- CTCP messages should only be at the start of an IRC message, not anywhere within it. While the helper only decodes packes in the ORIGINAL direction, its possible to make a client send a CTCP message back by empedding one into a PING request. As-is, thats enough to make the helper believe that it saw a CTCP message. Fixes: 869f37d8e48f ("[NETFILTER]: nf_conntrack/nf_nat: add IRC helper port") Signed-off-by: David Leadbeater Signed-off-by: Florian Westphal conflict net/netfilter/nf_conntrack_irc.c Signed-off-by: gaochao --- net/netfilter/nf_conntrack_irc.c | 34 ++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index e40988a2f22f..c745dd19d42f 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -148,15 +148,37 @@ static int help(struct sk_buff *skb, unsigned int protoff, data = ib_ptr; data_limit = ib_ptr + skb->len - dataoff; - /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 - * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ - while (data < data_limit - (19 + MINMATCHLEN)) { - if (memcmp(data, "\1DCC ", 5)) { + /* Skip any whitespace */ + while (data < data_limit - 10) { + if (*data == ' ' || *data == '\r' || *data == '\n') + data++; + else + break; + } + + /* strlen("PRIVMSG x ")=10 */ + if (data < data_limit - 10) { + if (strncasecmp("PRIVMSG ", data, 8)) + goto out; + data += 8; + } + + /* strlen(" :\1DCC SENT t AAAAAAAA P\1\n")=26 + * 7+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=26 + */ + while (data < data_limit - (21 + MINMATCHLEN)) { + /* Find first " :", the start of message */ + if (memcmp(data, " :", 2)) { data++; continue; } + data += 2; + + /* then check that place only for the DCC command */ + if (memcmp(data, "\1DCC ", 5)) + goto out; data += 5; - /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ + /* we have at least (21+MINMATCHLEN)-(2+5) bytes valid data left */ iph = ip_hdr(skb); pr_debug("DCC found in master %pI4:%u %pI4:%u\n", @@ -172,7 +194,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, pr_debug("DCC %s detected\n", dccprotos[i]); /* we have at least - * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid + * (21+MINMATCHLEN)-7-dccprotos[i].matchlen bytes valid * data left (== 14/13 bytes) */ if (parse_dcc(data, data_limit, &dcc_ip, &dcc_port, &addr_beg_p, &addr_end_p)) { -- Gitee