diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b1cd8334db11a7789465f31aeb4e4a9d6ee3cdee..6ef0074e530601e73dce22988ddb899f080ae864 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1272,6 +1272,7 @@ struct kvm_x86_ops { int (*mem_enc_op)(struct kvm *kvm, void __user *argp); int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); + void (*guest_memory_reclaimed)(struct kvm *kvm); int (*get_msr_feature)(struct kvm_msr_entry *entry); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 01547bdbfb061c867b1a7c7ec22fb0a482168c17..783ef0eb2b49f444c919cdd5c201b0fd2d7f657d 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1177,6 +1177,14 @@ void sev_hardware_teardown(void) sev_flush_asids(); } +void sev_guest_memory_reclaimed(struct kvm *kvm) +{ + if (!sev_guest(kvm)) + return; + + wbinvd_on_all_cpus(); +} + void pre_sev_run(struct vcpu_svm *svm, int cpu) { struct svm_cpu_data *sd = per_cpu(svm_data, cpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1c9226cd6cdecf7d29e4f08092786ff33bf74f14..cc9f950c206dd3285e8666de534bc19f15a0c9bd 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4315,6 +4315,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .mem_enc_op = svm_mem_enc_op, .mem_enc_reg_region = svm_register_enc_region, .mem_enc_unreg_region = svm_unregister_enc_region, + .guest_memory_reclaimed = sev_guest_memory_reclaimed, .can_emulate_instruction = svm_can_emulate_instruction, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index be74e22b82ea7332d80670f355c94cd8b1a7ccc5..76e3b95343be9ec28295e0118f889b88024c8511 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -491,6 +491,8 @@ int svm_register_enc_region(struct kvm *kvm, struct kvm_enc_region *range); int svm_unregister_enc_region(struct kvm *kvm, struct kvm_enc_region *range); +void sev_guest_memory_reclaimed(struct kvm *kvm); + void pre_sev_run(struct vcpu_svm *svm, int cpu); int __init sev_hardware_setup(void); void sev_hardware_teardown(void); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ef02d23a81146cb22d794c0b3e41eb71c1cd10e2..5243e00871c887b61b0b0b6de9364d3a24ea0072 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8819,6 +8819,14 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); } +void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) +{ + if (!kvm_x86_ops.guest_memory_reclaimed) + return; + + kvm_x86_ops.guest_memory_reclaimed(kvm); +} + void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { if (!lapic_in_kernel(vcpu)) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 6b303e507a205bb6d8cd501cdd04cf593683f999..3093da10b700a210a8c6a8415f339562707166f4 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -509,8 +509,8 @@ static unsigned long fdc_busy; static DECLARE_WAIT_QUEUE_HEAD(fdc_wait); static DECLARE_WAIT_QUEUE_HEAD(command_done); -/* Errors during formatting are counted here. */ -static int format_errors; +/* errors encountered on the current (or last) request */ +static int floppy_errors; /* Format request descriptor. */ static struct format_descr format_req; @@ -530,7 +530,6 @@ static struct format_descr format_req; static char *floppy_track_buffer; static int max_buffer_sectors; -static int *errors; typedef void (*done_f)(int); static const struct cont_t { void (*interrupt)(void); @@ -1455,7 +1454,7 @@ static int interpret_errors(void) if (drive_params[current_drive].flags & FTD_MSG) DPRINT("Over/Underrun - retrying\n"); bad = 0; - } else if (*errors >= drive_params[current_drive].max_errors.reporting) { + } else if (floppy_errors >= drive_params[current_drive].max_errors.reporting) { print_errors(); } if (reply_buffer[ST2] & ST2_WC || reply_buffer[ST2] & ST2_BC) @@ -2095,7 +2094,7 @@ static void bad_flp_intr(void) if (!next_valid_format(current_drive)) return; } - err_count = ++(*errors); + err_count = ++floppy_errors; INFBOUND(write_errors[current_drive].badness, err_count); if (err_count > drive_params[current_drive].max_errors.abort) cont->done(0); @@ -2240,9 +2239,8 @@ static int do_format(int drive, struct format_descr *tmp_format_req) return -EINVAL; } format_req = *tmp_format_req; - format_errors = 0; cont = &format_cont; - errors = &format_errors; + floppy_errors = 0; ret = wait_til_done(redo_format, true); if (ret == -EINTR) return -EINTR; @@ -2721,7 +2719,7 @@ static int make_raw_rw_request(void) */ if (!direct || (indirect * 2 > direct * 3 && - *errors < drive_params[current_drive].max_errors.read_track && + floppy_errors < drive_params[current_drive].max_errors.read_track && ((!probing || (drive_params[current_drive].read_track & (1 << drive_state[current_drive].probed_format)))))) { max_size = blk_rq_sectors(current_req); @@ -2846,10 +2844,11 @@ static int set_next_request(void) current_req = list_first_entry_or_null(&floppy_reqs, struct request, queuelist); if (current_req) { - current_req->error_count = 0; + floppy_errors = 0; list_del_init(¤t_req->queuelist); + return 1; } - return current_req != NULL; + return 0; } /* Starts or continues processing request. Will automatically unlock the @@ -2908,7 +2907,6 @@ static void redo_fd_request(void) _floppy = floppy_type + drive_params[current_drive].autodetect[drive_state[current_drive].probed_format]; } else probing = 0; - errors = &(current_req->error_count); tmp = make_raw_rw_request(); if (tmp < 2) { request_done(tmp); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index a5b5e1e6ca2618f8a93cb6712e7e6473097c896d..01e1c87435af30dc29419a27c9b515d7a5214471 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -151,6 +151,10 @@ static unsigned int xen_blkif_max_ring_order; module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444); MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring"); +static bool __read_mostly xen_blkif_trusted = true; +module_param_named(trusted, xen_blkif_trusted, bool, 0644); +MODULE_PARM_DESC(trusted, "Is the backend trusted"); + #define BLK_RING_SIZE(info) \ __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages) @@ -208,6 +212,7 @@ struct blkfront_info unsigned int feature_discard:1; unsigned int feature_secdiscard:1; unsigned int feature_persistent:1; + unsigned int bounce:1; unsigned int discard_granularity; unsigned int discard_alignment; /* Number of 4KB segments handled */ @@ -310,8 +315,8 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) if (!gnt_list_entry) goto out_of_memory; - if (info->feature_persistent) { - granted_page = alloc_page(GFP_NOIO); + if (info->bounce) { + granted_page = alloc_page(GFP_NOIO | __GFP_ZERO); if (!granted_page) { kfree(gnt_list_entry); goto out_of_memory; @@ -330,7 +335,7 @@ static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num) list_for_each_entry_safe(gnt_list_entry, n, &rinfo->grants, node) { list_del(&gnt_list_entry->node); - if (info->feature_persistent) + if (info->bounce) __free_page(gnt_list_entry->page); kfree(gnt_list_entry); i--; @@ -376,7 +381,7 @@ static struct grant *get_grant(grant_ref_t *gref_head, /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (info->feature_persistent) + if (info->bounce) grant_foreign_access(gnt_list_entry, info); else { /* Grant access to the GFN passed by the caller */ @@ -400,7 +405,7 @@ static struct grant *get_indirect_grant(grant_ref_t *gref_head, /* Assign a gref to this page */ gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); - if (!info->feature_persistent) { + if (!info->bounce) { struct page *indirect_page; /* Fetch a pre-allocated page to use for indirect grefs */ @@ -715,7 +720,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri .grant_idx = 0, .segments = NULL, .rinfo = rinfo, - .need_copy = rq_data_dir(req) && info->feature_persistent, + .need_copy = rq_data_dir(req) && info->bounce, }; /* @@ -1035,11 +1040,12 @@ static void xlvbd_flush(struct blkfront_info *info) { blk_queue_write_cache(info->rq, info->feature_flush ? true : false, info->feature_fua ? true : false); - pr_info("blkfront: %s: %s %s %s %s %s\n", + pr_info("blkfront: %s: %s %s %s %s %s %s %s\n", info->gd->disk_name, flush_info(info), "persistent grants:", info->feature_persistent ? "enabled;" : "disabled;", "indirect descriptors:", - info->max_indirect_segments ? "enabled;" : "disabled;"); + info->max_indirect_segments ? "enabled;" : "disabled;", + "bounce buffer:", info->bounce ? "enabled" : "disabled;"); } static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset) @@ -1273,7 +1279,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) if (!list_empty(&rinfo->indirect_pages)) { struct page *indirect_page, *n; - BUG_ON(info->feature_persistent); + BUG_ON(info->bounce); list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) { list_del(&indirect_page->lru); __free_page(indirect_page); @@ -1290,7 +1296,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) 0, 0UL); rinfo->persistent_gnts_c--; } - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1311,7 +1317,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) for (j = 0; j < segs; j++) { persistent_gnt = rinfo->shadow[i].grants_used[j]; gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); - if (info->feature_persistent) + if (info->bounce) __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1501,7 +1507,7 @@ static int blkif_completion(unsigned long *id, data.s = s; num_sg = s->num_sg; - if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { + if (bret->operation == BLKIF_OP_READ && info->bounce) { for_each_sg(s->sg, sg, num_sg, i) { BUG_ON(sg->offset + sg->length > PAGE_SIZE); @@ -1560,7 +1566,7 @@ static int blkif_completion(unsigned long *id, * Add the used indirect page back to the list of * available pages for indirect grefs. */ - if (!info->feature_persistent) { + if (!info->bounce) { indirect_page = s->indirect_grants[i]->page; list_add(&indirect_page->lru, &rinfo->indirect_pages); } @@ -1581,9 +1587,12 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) unsigned long flags; struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; struct blkfront_info *info = rinfo->dev_info; + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); return IRQ_HANDLED; + } spin_lock_irqsave(&rinfo->ring_lock, flags); again: @@ -1599,8 +1608,11 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) unsigned long id; unsigned int op; + eoiflag = 0; + bret = RING_GET_RESPONSE(&rinfo->ring, i); id = bret->id; + /* * The backend has messed up and given us an id that we would * never have given to it (we stamp it up to BLK_RING_SIZE - @@ -1718,6 +1730,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&rinfo->ring_lock, flags); + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; err: @@ -1725,6 +1739,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&rinfo->ring_lock, flags); + /* No EOI in order to avoid further interrupts. */ + pr_alert("%s disabled for further use\n", info->gd->disk_name); return IRQ_HANDLED; } @@ -1742,7 +1758,7 @@ static int setup_blkring(struct xenbus_device *dev, for (i = 0; i < info->nr_ring_pages; i++) rinfo->ring_ref[i] = GRANT_INVALID_REF; - sring = alloc_pages_exact(ring_size, GFP_NOIO); + sring = alloc_pages_exact(ring_size, GFP_NOIO | __GFP_ZERO); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; @@ -1763,8 +1779,8 @@ static int setup_blkring(struct xenbus_device *dev, if (err) goto fail; - err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0, - "blkif", rinfo); + err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt, + 0, "blkif", rinfo); if (err <= 0) { xenbus_dev_fatal(dev, err, "bind_evtchn_to_irqhandler failed"); @@ -1846,6 +1862,10 @@ static int talk_to_blkback(struct xenbus_device *dev, if (!info) return -ENODEV; + /* Check if backend is trusted. */ + info->bounce = !xen_blkif_trusted || + !xenbus_read_unsigned(dev->nodename, "trusted", 1); + max_page_order = xenbus_read_unsigned(info->xbdev->otherend, "max-ring-page-order", 0); ring_page_order = min(xen_blkif_max_ring_order, max_page_order); @@ -2272,17 +2292,18 @@ static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo) if (err) goto out_of_memory; - if (!info->feature_persistent && info->max_indirect_segments) { + if (!info->bounce && info->max_indirect_segments) { /* - * We are using indirect descriptors but not persistent - * grants, we need to allocate a set of pages that can be + * We are using indirect descriptors but don't have a bounce + * buffer, we need to allocate a set of pages that can be * used for mapping indirect grefs */ int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info); BUG_ON(!list_empty(&rinfo->indirect_pages)); for (i = 0; i < num; i++) { - struct page *indirect_page = alloc_page(GFP_KERNEL); + struct page *indirect_page = alloc_page(GFP_KERNEL | + __GFP_ZERO); if (!indirect_page) goto out_of_memory; list_add(&indirect_page->lru, &rinfo->indirect_pages); @@ -2375,6 +2396,8 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) info->feature_persistent = !!xenbus_read_unsigned(info->xbdev->otherend, "feature-persistent", 0); + if (info->feature_persistent) + info->bounce = true; indirect_segments = xenbus_read_unsigned(info->xbdev->otherend, "feature-max-indirect-segments", 0); @@ -2748,6 +2771,13 @@ static void blkfront_delay_work(struct work_struct *work) struct blkfront_info *info; bool need_schedule_work = false; + /* + * Note that when using bounce buffers but not persistent grants + * there's no need to run blkfront_delay_work because grants are + * revoked in blkif_completion or else an error is reported and the + * connection is closed. + */ + mutex_lock(&blkfront_mutex); list_for_each_entry(info, &info_list, info_list) { diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index c5e0485f0d87b5e97cfec7011819ae8caab79be8..0c7630ed89fd7996213bc659021d7888e49fb775 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -66,6 +66,10 @@ module_param_named(max_queues, xennet_max_queues, uint, 0644); MODULE_PARM_DESC(max_queues, "Maximum number of queues per virtual interface"); +static bool __read_mostly xennet_trusted = true; +module_param_named(trusted, xennet_trusted, bool, 0644); +MODULE_PARM_DESC(trusted, "Is the backend trusted"); + #define XENNET_TIMEOUT (5 * HZ) static const struct ethtool_ops xennet_ethtool_ops; @@ -148,6 +152,9 @@ struct netfront_queue { grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + unsigned int rx_rsp_unconsumed; + spinlock_t rx_cons_lock; + struct page_pool *page_pool; struct xdp_rxq_info xdp_rxq; }; @@ -172,6 +179,9 @@ struct netfront_info { /* Is device behaving sane? */ bool broken; + /* Should skbs be bounced into a zeroed buffer? */ + bool bounce; + atomic_t rx_gso_checksum_fixup; }; @@ -270,7 +280,8 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue) if (unlikely(!skb)) return NULL; - page = page_pool_dev_alloc_pages(queue->page_pool); + page = page_pool_alloc_pages(queue->page_pool, + GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO); if (unlikely(!page)) { kfree_skb(skb); return NULL; @@ -376,12 +387,13 @@ static int xennet_open(struct net_device *dev) return 0; } -static void xennet_tx_buf_gc(struct netfront_queue *queue) +static bool xennet_tx_buf_gc(struct netfront_queue *queue) { RING_IDX cons, prod; unsigned short id; struct sk_buff *skb; bool more_to_do; + bool work_done = false; const struct device *dev = &queue->info->netdev->dev; BUG_ON(!netif_carrier_ok(queue->info->netdev)); @@ -398,6 +410,8 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) for (cons = queue->tx.rsp_cons; cons != prod; cons++) { struct xen_netif_tx_response *txrsp; + work_done = true; + txrsp = RING_GET_RESPONSE(&queue->tx, cons); if (txrsp->status == XEN_NETIF_RSP_NULL) continue; @@ -439,11 +453,13 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) xennet_maybe_wake_tx(queue); - return; + return work_done; err: queue->info->broken = true; dev_alert(dev, "Disabled for further use\n"); + + return work_done; } struct xennet_gnttab_make_txreq { @@ -667,6 +683,33 @@ static int xennet_xdp_xmit(struct net_device *dev, int n, return n - drops; } +struct sk_buff *bounce_skb(const struct sk_buff *skb) +{ + unsigned int headerlen = skb_headroom(skb); + /* Align size to allocate full pages and avoid contiguous data leaks */ + unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len, + XEN_PAGE_SIZE); + struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO); + + if (!n) + return NULL; + + if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) { + WARN_ONCE(1, "misaligned skb allocated\n"); + kfree_skb(n); + return NULL; + } + + /* Set the data pointer */ + skb_reserve(n, headerlen); + /* Set the tail pointer and length */ + skb_put(n, skb->len); + + BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); + + skb_copy_header(n, skb); + return n; +} #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1) @@ -719,9 +762,13 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev /* The first req should be at least ETH_HLEN size or the packet will be * dropped by netback. + * + * If the backend is not trusted bounce all data to zeroed pages to + * avoid exposing contiguous data on the granted page not belonging to + * the skb. */ - if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) { - nskb = skb_copy(skb, GFP_ATOMIC); + if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) { + nskb = bounce_skb(skb); if (!nskb) goto drop; dev_consume_skb_any(skb); @@ -836,6 +883,16 @@ static int xennet_close(struct net_device *dev) return 0; } +static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + queue->rx.rsp_cons = val; + queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); +} + static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb, grant_ref_t ref) { @@ -889,7 +946,7 @@ static int xennet_get_extras(struct netfront_queue *queue, xennet_move_rx_slot(queue, skb, ref); } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return err; } @@ -1021,8 +1078,10 @@ static int xennet_get_responses(struct netfront_queue *queue, } } rcu_read_unlock(); -next: + __skb_queue_tail(list, skb); + +next: if (!(rx->flags & XEN_NETRXF_more_data)) break; @@ -1046,7 +1105,7 @@ static int xennet_get_responses(struct netfront_queue *queue, } if (unlikely(err)) - queue->rx.rsp_cons = cons + slots; + xennet_set_rx_rsp_cons(queue, cons + slots); return err; } @@ -1099,7 +1158,8 @@ static int xennet_fill_frags(struct netfront_queue *queue, __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { - queue->rx.rsp_cons = ++cons + skb_queue_len(list); + xennet_set_rx_rsp_cons(queue, + ++cons + skb_queue_len(list)); kfree_skb(nskb); return -ENOENT; } @@ -1112,7 +1172,7 @@ static int xennet_fill_frags(struct netfront_queue *queue, kfree_skb(nskb); } - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return 0; } @@ -1239,7 +1299,9 @@ static int xennet_poll(struct napi_struct *napi, int budget) if (unlikely(xennet_set_skb_gso(skb, gso))) { __skb_queue_head(&tmpq, skb); - queue->rx.rsp_cons += skb_queue_len(&tmpq); + xennet_set_rx_rsp_cons(queue, + queue->rx.rsp_cons + + skb_queue_len(&tmpq)); goto err; } } @@ -1263,7 +1325,8 @@ static int xennet_poll(struct napi_struct *napi, int budget) __skb_queue_tail(&rxq, skb); - i = ++queue->rx.rsp_cons; + i = queue->rx.rsp_cons + 1; + xennet_set_rx_rsp_cons(queue, i); work_done++; } if (need_xdp_flush) @@ -1427,40 +1490,79 @@ static int xennet_set_features(struct net_device *dev, return 0; } -static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi) { - struct netfront_queue *queue = dev_id; unsigned long flags; - if (queue->info->broken) - return IRQ_HANDLED; + if (unlikely(queue->info->broken)) + return false; spin_lock_irqsave(&queue->tx_lock, flags); - xennet_tx_buf_gc(queue); + if (xennet_tx_buf_gc(queue)) + *eoi = 0; spin_unlock_irqrestore(&queue->tx_lock, flags); + return true; +} + +static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +{ + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (likely(xennet_handle_tx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } -static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) +static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi) { - struct netfront_queue *queue = dev_id; - struct net_device *dev = queue->info->netdev; + unsigned int work_queued; + unsigned long flags; + + if (unlikely(queue->info->broken)) + return false; - if (queue->info->broken) - return IRQ_HANDLED; + spin_lock_irqsave(&queue->rx_cons_lock, flags); + work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + if (work_queued > queue->rx_rsp_unconsumed) { + queue->rx_rsp_unconsumed = work_queued; + *eoi = 0; + } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) { + const struct device *dev = &queue->info->netdev->dev; - if (likely(netif_carrier_ok(dev) && - RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))) + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + dev_alert(dev, "RX producer index going backwards\n"); + dev_alert(dev, "Disabled for further use\n"); + queue->info->broken = true; + return false; + } + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + + if (likely(netif_carrier_ok(queue->info->netdev) && work_queued)) napi_schedule(&queue->napi); + return true; +} + +static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) +{ + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (likely(xennet_handle_rx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } static irqreturn_t xennet_interrupt(int irq, void *dev_id) { - xennet_tx_interrupt(irq, dev_id); - xennet_rx_interrupt(irq, dev_id); + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (xennet_handle_tx(dev_id, &eoiflag) && + xennet_handle_rx(dev_id, &eoiflag)) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } @@ -1774,9 +1876,10 @@ static int setup_netfront_single(struct netfront_queue *queue) if (err < 0) goto fail; - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_interrupt, - 0, queue->info->netdev->name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_interrupt, 0, + queue->info->netdev->name, + queue); if (err < 0) goto bind_fail; queue->rx_evtchn = queue->tx_evtchn; @@ -1804,18 +1907,18 @@ static int setup_netfront_split(struct netfront_queue *queue) snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_tx_interrupt, - 0, queue->tx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_tx_interrupt, 0, + queue->tx_irq_name, queue); if (err < 0) goto bind_tx_fail; queue->tx_irq = err; snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), "%s-rx", queue->name); - err = bind_evtchn_to_irqhandler(queue->rx_evtchn, - xennet_rx_interrupt, - 0, queue->rx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn, + xennet_rx_interrupt, 0, + queue->rx_irq_name, queue); if (err < 0) goto bind_rx_fail; queue->rx_irq = err; @@ -1923,6 +2026,7 @@ static int xennet_init_queue(struct netfront_queue *queue) spin_lock_init(&queue->tx_lock); spin_lock_init(&queue->rx_lock); + spin_lock_init(&queue->rx_cons_lock); timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0); @@ -2176,6 +2280,10 @@ static int talk_to_netback(struct xenbus_device *dev, info->netdev->irq = 0; + /* Check if backend is trusted. */ + info->bounce = !xennet_trusted || + !xenbus_read_unsigned(dev->nodename, "trusted", 1); + /* Check if backend supports multiple queues */ max_queues = xenbus_read_unsigned(info->xbdev->otherend, "multi-queue-max-queues", 1); @@ -2342,6 +2450,9 @@ static int xennet_connect(struct net_device *dev) return err; if (np->netback_has_xdp_headroom) pr_info("backend supports XDP headroom\n"); + if (np->bounce) + dev_info(&np->xbdev->dev, + "bouncing transmitted data to zeroed pages\n"); /* talk_to_netback() sets the correct number of queues */ num_queues = dev->real_num_tx_queues; diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 92c9a476defc998f5cf8dfd6185127cf68b479b5..b8a7669cbaffd4adc071c1767ed58f36063b461c 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -37,6 +37,8 @@ struct xencons_info { struct xenbus_device *xbdev; struct xencons_interface *intf; unsigned int evtchn; + XENCONS_RING_IDX out_cons; + unsigned int out_cons_same; struct hvc_struct *hvc; int irq; int vtermno; @@ -131,6 +133,8 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) XENCONS_RING_IDX cons, prod; int recv = 0; struct xencons_info *xencons = vtermno_to_xencons(vtermno); + unsigned int eoiflag = 0; + if (xencons == NULL) return -EINVAL; intf = xencons->intf; @@ -146,7 +150,27 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) mb(); /* read ring before consuming */ intf->in_cons = cons; - notify_daemon(xencons); + /* + * When to mark interrupt having been spurious: + * - there was no new data to be read, and + * - the backend did not consume some output bytes, and + * - the previous round with no read data didn't see consumed bytes + * (we might have a race with an interrupt being in flight while + * updating xencons->out_cons, so account for that by allowing one + * round without any visible reason) + */ + if (intf->out_cons != xencons->out_cons) { + xencons->out_cons = intf->out_cons; + xencons->out_cons_same = 0; + } + if (recv) { + notify_daemon(xencons); + } else if (xencons->out_cons_same++ > 1) { + eoiflag = XEN_EOI_FLAG_SPURIOUS; + } + + xen_irq_lateeoi(xencons->irq, eoiflag); + return recv; } @@ -375,7 +399,7 @@ static int xencons_connect_backend(struct xenbus_device *dev, if (ret) return ret; info->evtchn = evtchn; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn); if (irq < 0) return irq; info->irq = irq; @@ -539,7 +563,7 @@ static int __init xen_hvc_init(void) return r; info = vtermno_to_xencons(HVC_COOKIE); - info->irq = bind_evtchn_to_irq(info->evtchn); + info->irq = bind_evtchn_to_irq_lateeoi(info->evtchn); } if (info->irq < 0) info->irq = 0; /* NO_IRQ */ diff --git a/fs/io_uring.c b/fs/io_uring.c index 545b1d8f15dbc90eb53cb4a4539fcec8e4be31b9..4f532ce5d0b4165f03668448916fe3da7ee81bcc 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1553,6 +1553,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx) static void io_flush_timeouts(struct io_ring_ctx *ctx) { + struct io_kiocb *req, *tmp; u32 seq; if (list_empty(&ctx->timeout_list)) @@ -1560,10 +1561,8 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx) seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); - do { + list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { u32 events_needed, events_got; - struct io_kiocb *req = list_first_entry(&ctx->timeout_list, - struct io_kiocb, timeout.list); if (io_is_timeout_noseq(req)) break; @@ -1580,9 +1579,8 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx) if (events_got < events_needed) break; - list_del_init(&req->timeout.list); io_kill_timeout(req, 0); - } while (!list_empty(&ctx->timeout_list)); + } ctx->cq_last_tm_flush = seq; } @@ -5586,6 +5584,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, else data->mode = HRTIMER_MODE_REL; + INIT_LIST_HEAD(&req->timeout.list); hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode); return 0; } @@ -6234,6 +6233,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) prev = NULL; } + list_del(&req->timeout.list); spin_unlock_irqrestore(&ctx->completion_lock, flags); if (prev) { diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c index 90d255fbdd9b34d366bd6b6fe64f6c760595ea8f..c84d87f558cb62eeb312ad9f74abc59aa257f9a1 100644 --- a/fs/kernel_read_file.c +++ b/fs/kernel_read_file.c @@ -178,7 +178,7 @@ int kernel_read_file_from_fd(int fd, loff_t offset, void **buf, struct fd f = fdget(fd); int ret = -EBADF; - if (!f.file) + if (!f.file || !(f.file->f_mode & FMODE_READ)) goto out; ret = kernel_read_file(f.file, offset, buf, buf_size, file_size, id); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c66c702a4f07974a138bac5339faf03e5a7249be..4d3d918f0049a02831bcfd878b03304a62978ca5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1463,6 +1463,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, unsigned long start, unsigned long end); +void kvm_arch_guest_memory_reclaimed(struct kvm *kvm); + #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu); #else diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 9b5adeaa47fc819c4c9fc6396871db2848770505..3188bc80b1b815974c2d2e547867b1771bfc76a3 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -1797,6 +1797,10 @@ bool ima_appraise_signature(enum kernel_read_file_id id) if (id >= READING_MAX_ID) return false; + if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE) + && security_locked_down(LOCKDOWN_KEXEC)) + return false; + func = read_idmap[id] ?: FILE_CHECK; rcu_read_lock(); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2809a69418a657052e6c84b572d2964e4b369088..7b2d4318ccb6bdb1261c3daabff6703475f4dcd6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -157,6 +157,10 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, { } +__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) +{ +} + bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) { /* @@ -333,6 +337,12 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs); #endif +static void kvm_flush_shadow_all(struct kvm *kvm) +{ + kvm_arch_flush_shadow_all(kvm); + kvm_arch_guest_memory_reclaimed(kvm); +} + void kvm_reload_remote_mmus(struct kvm *kvm) { kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); @@ -487,6 +497,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); + kvm_arch_guest_memory_reclaimed(kvm); srcu_read_unlock(&kvm->srcu, idx); return 0; @@ -590,7 +601,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, int idx; idx = srcu_read_lock(&kvm->srcu); - kvm_arch_flush_shadow_all(kvm); + kvm_flush_shadow_all(kvm); srcu_read_unlock(&kvm->srcu, idx); } @@ -870,7 +881,7 @@ static void kvm_destroy_vm(struct kvm *kvm) #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); #else - kvm_arch_flush_shadow_all(kvm); + kvm_flush_shadow_all(kvm); #endif kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); @@ -1211,6 +1222,7 @@ static int kvm_set_memslot(struct kvm *kvm, * - kvm_is_visible_gfn (mmu_check_root) */ kvm_arch_flush_shadow_memslot(kvm, slot); + kvm_arch_guest_memory_reclaimed(kvm); } r = kvm_arch_prepare_memory_region(kvm, new, mem, change);