diff --git a/memory-assert-memoryregionops-callbacks-defined.patch b/memory-assert-memoryregionops-callbacks-defined.patch new file mode 100644 index 0000000000000000000000000000000000000000..c95238bfaecb5b4765bb5aab9eaf162a8e1516ff --- /dev/null +++ b/memory-assert-memoryregionops-callbacks-defined.patch @@ -0,0 +1,48 @@ +memory: assert and define MemoryRegionOps callbacks + +From: Prasad J Pandit + +When registering a MemoryRegionOps object, assert that its +read/write callback methods are defined. This avoids potential +guest crash via a NULL pointer dereference. + +Suggested-by: Peter Maydell +Reviewed-by: Li Qiang +Signed-off-by: Prasad J Pandit +--- + memory.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +Update v3: Add Reviewed-by: ... + -> https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg09467.html + +diff --git a/memory.c b/memory.c +index 9200b20130..5e22bce326 100644 +--- a/memory.c ++++ b/memory.c +@@ -1485,7 +1485,13 @@ void memory_region_init_io(MemoryRegion *mr, + uint64_t size) + { + memory_region_init(mr, owner, name, size); +- mr->ops = ops ? ops : &unassigned_mem_ops; ++ if (ops) { ++ assert(ops->read || ops->read_with_attrs); ++ assert(ops->write || ops->write_with_attrs); ++ mr->ops = ops; ++ } else { ++ mr->ops = &unassigned_mem_ops; ++ } + mr->opaque = opaque; + mr->terminates = true; + } +@@ -1663,6 +1669,8 @@ void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, + { + Error *err = NULL; + assert(ops); ++ assert(ops->read || ops->read_with_attrs); ++ assert(ops->write || ops->write_with_attrs); + memory_region_init(mr, owner, name, size); + mr->ops = ops; + mr->opaque = opaque; +-- +2.26.2 diff --git a/qemu.spec b/qemu.spec index 61d4cf47b7835920a47e87aae298315a36cadefd..e5ef19f1d71a9bc923ac28336cf72c5036b9c01c 100644 --- a/qemu.spec +++ b/qemu.spec @@ -183,6 +183,7 @@ Patch0170: megasas-use-unsigned-type-for-reply_queue_head-and-c.patch Patch0171: megasas-avoid-NULL-pointer-dereference.patch Patch0172: megasas-use-unsigned-type-for-positive-numeric-field.patch Patch0173: hw-scsi-megasas-Fix-possible-out-of-bounds-array-acc.patch +Patch0174: using-bottom-half-to-send-packets.patch BuildRequires: flex BuildRequires: bison @@ -528,6 +529,12 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Tue Aug 11 2020 Huawei Technologies Co., Ltd +- memory: assert and define MemoryRegionOps callbacks + +* Tue Aug 11 2020 Huawei Technologies Co., Ltd +- e1000e: using bottom half to send packets + * Thu Aug 6 2020 Huawei Technologies Co., Ltd - es1370: check total frame count against current frame - exec: set map length to zero when returning NULL diff --git a/using-bottom-half-to-send-packets.patch b/using-bottom-half-to-send-packets.patch new file mode 100644 index 0000000000000000000000000000000000000000..8bbdfd2dc3f191e1556a5bbfcd15436c5ab4f6fb --- /dev/null +++ b/using-bottom-half-to-send-packets.patch @@ -0,0 +1,302 @@ +e1000e: using bottom half to send packets + +Alexander Bulekov reported a UAF bug related e1000e packets send. + +-->https://bugs.launchpad.net/qemu/+bug/1886362 + +This is because the guest trigger a e1000e packet send and set the +data's address to e1000e's MMIO address. So when the e1000e do DMA +it will write the MMIO again and trigger re-entrancy and finally +causes this UAF. + +Paolo suggested to use a bottom half whenever MMIO is doing complicate +things in here: +-->https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg03342.html + +Reference here: +'The easiest solution is to delay processing of descriptors to a bottom +half whenever MMIO is doing something complicated. This is also better +for latency because it will free the vCPU thread more quickly and leave +the work to the I/O thread.' + +This patch fixes this UAF. + +Reported-by: Alexander Bulekov +Signed-off-by: Li Qiang +--- +Change since v2: +1. Add comments for the tx bh schdule when VM resumes +2. Leave the set ics code in 'e1000e_start_xmit' +3. Cancel the tx bh and reset tx_waiting in e1000e_core_reset + +Change since v1: +Per Jason's review here: +-- https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg05368.html +1. Cancel and schedule the tx bh when VM is stopped or resume +2. Add a tx_burst for e1000e configuration to throttle the bh execution +3. Add a tx_waiting to record whether the bh is pending or not +Don't use BQL in the tx_bh handler as when tx_bh is executed, the BQL is +acquired. + + hw/net/e1000e.c | 6 +++ + hw/net/e1000e_core.c | 107 +++++++++++++++++++++++++++++++++++-------- + hw/net/e1000e_core.h | 8 ++++ + 3 files changed, 101 insertions(+), 20 deletions(-) + +diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c +index fda34518c9..24e35a78bf 100644 +--- a/hw/net/e1000e.c ++++ b/hw/net/e1000e.c +@@ -77,10 +77,14 @@ typedef struct E1000EState { + + bool disable_vnet; + ++ int32_t tx_burst; ++ + E1000ECore core; + + } E1000EState; + ++#define TX_BURST 256 ++ + #define E1000E_MMIO_IDX 0 + #define E1000E_FLASH_IDX 1 + #define E1000E_IO_IDX 2 +@@ -263,6 +267,7 @@ static void e1000e_core_realize(E1000EState *s) + { + s->core.owner = &s->parent_obj; + s->core.owner_nic = s->nic; ++ s->core.tx_burst = s->tx_burst; + } + + static void +@@ -665,6 +670,7 @@ static Property e1000e_properties[] = { + e1000e_prop_subsys_ven, uint16_t), + DEFINE_PROP_SIGNED("subsys", E1000EState, subsys, 0, + e1000e_prop_subsys, uint16_t), ++ DEFINE_PROP_INT32("x-txburst", E1000EState, tx_burst, TX_BURST), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c +index bcd186cac5..2fdfc23204 100644 +--- a/hw/net/e1000e_core.c ++++ b/hw/net/e1000e_core.c +@@ -910,18 +910,17 @@ e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx) + } + + static void +-e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr) ++e1000e_start_xmit(struct e1000e_tx *q) + { ++ E1000ECore *core = q->core; + dma_addr_t base; + struct e1000_tx_desc desc; +- bool ide = false; +- const E1000E_RingInfo *txi = txr->i; +- uint32_t cause = E1000_ICS_TXQE; ++ const E1000E_RingInfo *txi; ++ E1000E_TxRing txr; ++ int32_t num_packets = 0; + +- if (!(core->mac[TCTL] & E1000_TCTL_EN)) { +- trace_e1000e_tx_disabled(); +- return; +- } ++ e1000e_tx_ring_init(core, &txr, q - &core->tx[0]); ++ txi = txr.i; + + while (!e1000e_ring_empty(core, txi)) { + base = e1000e_ring_head_descr(core, txi); +@@ -931,14 +930,24 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr) + trace_e1000e_tx_descr((void *)(intptr_t)desc.buffer_addr, + desc.lower.data, desc.upper.data); + +- e1000e_process_tx_desc(core, txr->tx, &desc, txi->idx); +- cause |= e1000e_txdesc_writeback(core, base, &desc, &ide, txi->idx); ++ e1000e_process_tx_desc(core, txr.tx, &desc, txi->idx); ++ q->cause |= e1000e_txdesc_writeback(core, base, &desc, ++ &q->ide, txi->idx); + + e1000e_ring_advance(core, txi, 1); ++ if (++num_packets >= core->tx_burst) { ++ break; ++ } ++ } ++ ++ if (num_packets >= core->tx_burst) { ++ qemu_bh_schedule(q->tx_bh); ++ q->tx_waiting = 1; ++ return; + } + +- if (!ide || !e1000e_intrmgr_delay_tx_causes(core, &cause)) { +- e1000e_set_interrupt_cause(core, cause); ++ if (!q->ide || !e1000e_intrmgr_delay_tx_causes(core, &q->cause)) { ++ e1000e_set_interrupt_cause(core, q->cause); + } + } + +@@ -2423,32 +2432,41 @@ e1000e_set_dbal(E1000ECore *core, int index, uint32_t val) + static void + e1000e_set_tctl(E1000ECore *core, int index, uint32_t val) + { +- E1000E_TxRing txr; + core->mac[index] = val; + + if (core->mac[TARC0] & E1000_TARC_ENABLE) { +- e1000e_tx_ring_init(core, &txr, 0); +- e1000e_start_xmit(core, &txr); ++ if (core->tx[0].tx_waiting) { ++ return; ++ } ++ core->tx[0].tx_waiting = 1; ++ if (!core->vm_running) { ++ return; ++ } ++ qemu_bh_schedule(core->tx[0].tx_bh); + } + + if (core->mac[TARC1] & E1000_TARC_ENABLE) { +- e1000e_tx_ring_init(core, &txr, 1); +- e1000e_start_xmit(core, &txr); ++ if (core->tx[1].tx_waiting) { ++ return; ++ } ++ core->tx[1].tx_waiting = 1; ++ if (!core->vm_running) { ++ return; ++ } ++ qemu_bh_schedule(core->tx[1].tx_bh); + } + } + + static void + e1000e_set_tdt(E1000ECore *core, int index, uint32_t val) + { +- E1000E_TxRing txr; + int qidx = e1000e_mq_queue_idx(TDT, index); + uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1; + + core->mac[index] = val & 0xffff; + + if (core->mac[tarc_reg] & E1000_TARC_ENABLE) { +- e1000e_tx_ring_init(core, &txr, qidx); +- e1000e_start_xmit(core, &txr); ++ qemu_bh_schedule(core->tx[qidx].tx_bh); + } + } + +@@ -3315,11 +3333,52 @@ e1000e_vm_state_change(void *opaque, int running, RunState state) + trace_e1000e_vm_state_running(); + e1000e_intrmgr_resume(core); + e1000e_autoneg_resume(core); ++ core->vm_running = 1; ++ ++ for (int i = 0; i < E1000E_NUM_QUEUES; i++) { ++ /* ++ * Schedule tx bh unconditionally to make sure ++ * tx work after live migration since we don't ++ * migrate tx_waiting. ++ */ ++ qemu_bh_schedule(core->tx[i].tx_bh); ++ } ++ + } else { + trace_e1000e_vm_state_stopped(); ++ ++ for (int i = 0; i < E1000E_NUM_QUEUES; i++) { ++ qemu_bh_cancel(core->tx[i].tx_bh); ++ } ++ + e1000e_autoneg_pause(core); + e1000e_intrmgr_pause(core); ++ core->vm_running = 0; ++ } ++} ++ ++ ++static void e1000e_core_tx_bh(void *opaque) ++{ ++ struct e1000e_tx *q = opaque; ++ E1000ECore *core = q->core; ++ ++ if (!core->vm_running) { ++ assert(q->tx_waiting); ++ return; ++ } ++ ++ q->tx_waiting = 0; ++ ++ if (!(core->mac[TCTL] & E1000_TCTL_EN)) { ++ trace_e1000e_tx_disabled(); ++ return; + } ++ ++ q->cause = E1000_ICS_TXQE; ++ q->ide = false; ++ ++ e1000e_start_xmit(q); + } + + void +@@ -3334,12 +3393,15 @@ e1000e_core_pci_realize(E1000ECore *core, + e1000e_autoneg_timer, core); + e1000e_intrmgr_pci_realize(core); + ++ core->vm_running = runstate_is_running(); + core->vmstate = + qemu_add_vm_change_state_handler(e1000e_vm_state_change, core); + + for (i = 0; i < E1000E_NUM_QUEUES; i++) { + net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, + E1000E_MAX_TX_FRAGS, core->has_vnet); ++ core->tx[i].core = core; ++ core->tx[i].tx_bh = qemu_bh_new(e1000e_core_tx_bh, &core->tx[i]); + } + + net_rx_pkt_init(&core->rx_pkt, core->has_vnet); +@@ -3367,6 +3429,9 @@ e1000e_core_pci_uninit(E1000ECore *core) + for (i = 0; i < E1000E_NUM_QUEUES; i++) { + net_tx_pkt_reset(core->tx[i].tx_pkt); + net_tx_pkt_uninit(core->tx[i].tx_pkt); ++ qemu_bh_cancel(core->tx[i].tx_bh); ++ qemu_bh_delete(core->tx[i].tx_bh); ++ core->tx[i].tx_bh = NULL; + } + + net_rx_pkt_uninit(core->rx_pkt); +@@ -3480,6 +3545,8 @@ e1000e_core_reset(E1000ECore *core) + net_tx_pkt_reset(core->tx[i].tx_pkt); + memset(&core->tx[i].props, 0, sizeof(core->tx[i].props)); + core->tx[i].skip_cp = false; ++ qemu_bh_cancel(core->tx[i].tx_bh); ++ core->tx[i].tx_waiting = 0; + } + } + +diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h +index aee32f7e48..0c16dce3a6 100644 +--- a/hw/net/e1000e_core.h ++++ b/hw/net/e1000e_core.h +@@ -77,10 +77,18 @@ struct E1000Core { + unsigned char sum_needed; + bool cptse; + struct NetTxPkt *tx_pkt; ++ QEMUBH *tx_bh; ++ uint32_t tx_waiting; ++ uint32_t cause; ++ bool ide; ++ E1000ECore *core; + } tx[E1000E_NUM_QUEUES]; + + struct NetRxPkt *rx_pkt; + ++ int32_t tx_burst; ++ ++ bool vm_running; + bool has_vnet; + int max_queue_num; + +-- +2.17.1