From 88c646e68d3e46d2b23c2c6c62a495c0ca2419a7 Mon Sep 17 00:00:00 2001 From: "Huawei Technologies Co., Ltd" Date: Thu, 21 Jan 2021 15:46:54 +0800 Subject: [PATCH 01/13] scsi-bus: Refactor the code that retries requests Move the code that retries requests from scsi_dma_restart_bh() to its own, non-static, function. This will allow us to call it from the retry_request_cb() of scsi-disk in a future patch. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang --- ...actor-the-code-that-retries-requests.patch | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 scsi-bus-Refactor-the-code-that-retries-requests.patch diff --git a/scsi-bus-Refactor-the-code-that-retries-requests.patch b/scsi-bus-Refactor-the-code-that-retries-requests.patch new file mode 100644 index 00000000..eae42b85 --- /dev/null +++ b/scsi-bus-Refactor-the-code-that-retries-requests.patch @@ -0,0 +1,68 @@ +From eb55d7c4f6e0adae2aab8bd750dccf9cd7a8c784 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:54 +0800 +Subject: [PATCH] scsi-bus: Refactor the code that retries requests + +Move the code that retries requests from scsi_dma_restart_bh() to its own, +non-static, function. This will allow us to call it from the +retry_request_cb() of scsi-disk in a future patch. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + hw/scsi/scsi-bus.c | 16 +++++++++++----- + include/hw/scsi/scsi.h | 1 + + 2 files changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index fdc3a0e4e0..9dc09b5f3e 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -99,14 +99,10 @@ void scsi_bus_new(SCSIBus *bus, size_t bus_size, DeviceState *host, + qbus_set_bus_hotplug_handler(BUS(bus), &error_abort); + } + +-static void scsi_dma_restart_bh(void *opaque) ++void scsi_retry_requests(SCSIDevice *s) + { +- SCSIDevice *s = opaque; + SCSIRequest *req, *next; + +- qemu_bh_delete(s->bh); +- s->bh = NULL; +- + aio_context_acquire(blk_get_aio_context(s->conf.blk)); + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { + scsi_req_ref(req); +@@ -128,6 +124,16 @@ static void scsi_dma_restart_bh(void *opaque) + aio_context_release(blk_get_aio_context(s->conf.blk)); + } + ++static void scsi_dma_restart_bh(void *opaque) ++{ ++ SCSIDevice *s = opaque; ++ ++ qemu_bh_delete(s->bh); ++ s->bh = NULL; ++ ++ scsi_retry_requests(s); ++} ++ + void scsi_req_retry(SCSIRequest *req) + { + /* No need to save a reference, because scsi_dma_restart_bh just +diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h +index 426566a5c6..1231d30b35 100644 +--- a/include/hw/scsi/scsi.h ++++ b/include/hw/scsi/scsi.h +@@ -184,6 +184,7 @@ void scsi_req_cancel_complete(SCSIRequest *req); + void scsi_req_cancel(SCSIRequest *req); + void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier); + void scsi_req_retry(SCSIRequest *req); ++void scsi_retry_requests(SCSIDevice *s); + void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense); + void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense); + void scsi_device_report_change(SCSIDevice *dev, SCSISense sense); +-- +2.27.0 + -- Gitee From 06391f3674c91cd2e054a4dc206ba55968087101 Mon Sep 17 00:00:00 2001 From: "Huawei Technologies Co., Ltd" Date: Thu, 21 Jan 2021 15:46:55 +0800 Subject: [PATCH 02/13] scsi-disk: Add support for retry on errors Mark failed requests as to be retried and implement retry_request_cb to handle these requests. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang --- ...disk-Add-support-for-retry-on-errors.patch | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 scsi-disk-Add-support-for-retry-on-errors.patch diff --git a/scsi-disk-Add-support-for-retry-on-errors.patch b/scsi-disk-Add-support-for-retry-on-errors.patch new file mode 100644 index 00000000..e0bd91ec --- /dev/null +++ b/scsi-disk-Add-support-for-retry-on-errors.patch @@ -0,0 +1,76 @@ +From 34f1552a6d7e05f2f2146ebc6d50deb2de7e5fd4 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:55 +0800 +Subject: [PATCH] scsi-disk: Add support for retry on errors + +Mark failed requests as to be retried and implement retry_request_cb to +handle these requests. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + hw/scsi/scsi-disk.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index cd90cd780e..93fdd913fe 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -184,6 +184,8 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req) + + static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + { ++ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); ++ + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + return true; +@@ -193,6 +195,7 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + return scsi_handle_rw_error(r, -ret, acct_failed); + } + ++ blk_error_retry_reset_timeout(s->qdev.conf.blk); + return false; + } + +@@ -480,6 +483,10 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) + } + } + ++ if (action == BLOCK_ERROR_ACTION_RETRY) { ++ scsi_req_retry(&r->req); ++ } ++ + blk_error_action(s->qdev.conf.blk, action, is_read, error); + if (action == BLOCK_ERROR_ACTION_IGNORE) { + scsi_req_complete(&r->req, 0); +@@ -2252,6 +2259,13 @@ static void scsi_disk_resize_cb(void *opaque) + } + } + ++static void scsi_disk_retry_request(void *opaque) ++{ ++ SCSIDiskState *s = opaque; ++ ++ scsi_retry_requests(&s->qdev); ++} ++ + static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp) + { + SCSIDiskState *s = opaque; +@@ -2300,10 +2314,12 @@ static const BlockDevOps scsi_disk_removable_block_ops = { + .is_medium_locked = scsi_cd_is_medium_locked, + + .resize_cb = scsi_disk_resize_cb, ++ .retry_request_cb = scsi_disk_retry_request, + }; + + static const BlockDevOps scsi_disk_block_ops = { + .resize_cb = scsi_disk_resize_cb, ++ .retry_request_cb = scsi_disk_retry_request, + }; + + static void scsi_disk_unit_attention_reported(SCSIDevice *dev) +-- +2.27.0 + -- Gitee From ce6e9dbebe4b77a7c89a93ee03cba440aa173dc3 Mon Sep 17 00:00:00 2001 From: "Huawei Technologies Co., Ltd" Date: Thu, 21 Jan 2021 15:46:45 +0800 Subject: [PATCH 03/13] qapi/block-core: Add retry option for error action Add a new error action 'retry' to support retry on errors. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang --- ...re-Add-retry-option-for-error-action.patch | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 qapi-block-core-Add-retry-option-for-error-action.patch diff --git a/qapi-block-core-Add-retry-option-for-error-action.patch b/qapi-block-core-Add-retry-option-for-error-action.patch new file mode 100644 index 00000000..817ff705 --- /dev/null +++ b/qapi-block-core-Add-retry-option-for-error-action.patch @@ -0,0 +1,52 @@ +From 9a95d75bdd469c9c7d44c7c72bc16d57ef2f65cc Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:45 +0800 +Subject: [PATCH] qapi/block-core: Add retry option for error action + +Add a new error action 'retry' to support retry on errors. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + blockdev.c | 2 ++ + qapi/block-core.json | 4 ++-- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index 4d141e9a1f..0f49fd290e 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -319,6 +319,8 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp) + return BLOCKDEV_ON_ERROR_STOP; + } else if (!strcmp(buf, "report")) { + return BLOCKDEV_ON_ERROR_REPORT; ++ } else if (!strcmp(buf, "retry")) { ++ return BLOCKDEV_ON_ERROR_RETRY; + } else { + error_setg(errp, "'%s' invalid %s error action", + buf, is_read ? "read" : "write"); +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 0d43d4f37c..db24f0dfe5 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1113,7 +1113,7 @@ + # Since: 1.3 + ## + { 'enum': 'BlockdevOnError', +- 'data': ['report', 'ignore', 'enospc', 'stop', 'auto'] } ++ 'data': ['report', 'ignore', 'enospc', 'stop', 'auto', 'retry'] } + + ## + # @MirrorSyncMode: +@@ -4894,7 +4894,7 @@ + # Since: 2.1 + ## + { 'enum': 'BlockErrorAction', +- 'data': [ 'ignore', 'report', 'stop' ] } ++ 'data': [ 'ignore', 'report', 'stop', 'retry' ] } + + + ## +-- +2.27.0 + -- Gitee From e15dae90d8ee0e4e93c768ac57f61e9f56fd183d Mon Sep 17 00:00:00 2001 From: "Huawei Technologies Co., Ltd" Date: Thu, 21 Jan 2021 15:46:46 +0800 Subject: [PATCH 04/13] block-backend: Introduce retry timer Add a timer to regularly trigger retry on errors. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang --- block-backend-Introduce-retry-timer.patch | 69 +++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 block-backend-Introduce-retry-timer.patch diff --git a/block-backend-Introduce-retry-timer.patch b/block-backend-Introduce-retry-timer.patch new file mode 100644 index 00000000..e45aacc1 --- /dev/null +++ b/block-backend-Introduce-retry-timer.patch @@ -0,0 +1,69 @@ +From 805c2e121e1ad612f63bafec458284554e76d034 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:46 +0800 +Subject: [PATCH] block-backend: Introduce retry timer + +Add a timer to regularly trigger retry on errors. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + block/block-backend.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 0056b526b8..a9a43b1440 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -31,6 +31,9 @@ + + static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); + ++/* block backend default retry interval */ ++#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 ++ + typedef struct BlockBackendAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); +@@ -88,6 +91,15 @@ struct BlockBackend { + * Accessed with atomic ops. + */ + unsigned int in_flight; ++ ++ /* Timer for retry on errors. */ ++ QEMUTimer *retry_timer; ++ /* Interval in ms to trigger next retry. */ ++ int64_t retry_interval; ++ /* Start time of the first error. Used to check timeout. */ ++ int64_t retry_start_time; ++ /* Retry timeout. 0 represents infinite retry. */ ++ int64_t retry_timeout; + }; + + typedef struct BlockBackendAIOCB { +@@ -337,6 +349,11 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) + blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT; + blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; + ++ blk->retry_timer = NULL; ++ blk->retry_interval = BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL; ++ blk->retry_start_time = 0; ++ blk->retry_timeout = 0; ++ + block_acct_init(&blk->stats); + + notifier_list_init(&blk->remove_bs_notifiers); +@@ -423,6 +440,10 @@ static void blk_delete(BlockBackend *blk) + QTAILQ_REMOVE(&block_backends, blk, link); + drive_info_del(blk->legacy_dinfo); + block_acct_cleanup(&blk->stats); ++ if (blk->retry_timer) { ++ timer_del(blk->retry_timer); ++ timer_free(blk->retry_timer); ++ } + g_free(blk); + } + +-- +2.27.0 + -- Gitee From 788fb23e33318deffb0c4e7021e367882baed50b Mon Sep 17 00:00:00 2001 From: "Huawei Technologies Co., Ltd" Date: Thu, 21 Jan 2021 15:46:47 +0800 Subject: [PATCH 05/13] block-backend: Add device specific retry callback Add retry_request_cb in BlockDevOps to do device specific retry action. Backend's timer would be registered only when the backend is set 'retry' on errors and the device supports retry action. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang --- ...d-Add-device-specific-retry-callback.patch | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 block-backend-Add-device-specific-retry-callback.patch diff --git a/block-backend-Add-device-specific-retry-callback.patch b/block-backend-Add-device-specific-retry-callback.patch new file mode 100644 index 00000000..d50d6bc9 --- /dev/null +++ b/block-backend-Add-device-specific-retry-callback.patch @@ -0,0 +1,53 @@ +From f74edc7c8c85874691daf8801c159874ef45aae0 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:47 +0800 +Subject: [PATCH] block-backend: Add device specific retry callback + +Add retry_request_cb in BlockDevOps to do device specific retry action. +Backend's timer would be registered only when the backend is set 'retry' +on errors and the device supports retry action. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + block/block-backend.c | 8 ++++++++ + include/sysemu/block-backend.h | 4 ++++ + 2 files changed, 12 insertions(+) + +diff --git a/block/block-backend.c b/block/block-backend.c +index a9a43b1440..b8f535a5fd 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -958,6 +958,14 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, + blk->dev_ops = ops; + blk->dev_opaque = opaque; + ++ if ((blk->on_read_error == BLOCKDEV_ON_ERROR_RETRY || ++ blk->on_write_error == BLOCKDEV_ON_ERROR_RETRY) && ++ ops->retry_request_cb) { ++ blk->retry_timer = aio_timer_new(blk->ctx, QEMU_CLOCK_REALTIME, ++ SCALE_MS, ops->retry_request_cb, ++ opaque); ++ } ++ + /* Are we currently quiesced? Should we enforce this right now? */ + if (blk->quiesce_counter && ops->drained_begin) { + ops->drained_begin(opaque); +diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h +index 733c4957eb..b58dc6bde8 100644 +--- a/include/sysemu/block-backend.h ++++ b/include/sysemu/block-backend.h +@@ -66,6 +66,10 @@ typedef struct BlockDevOps { + * Runs when the backend's last drain request ends. + */ + void (*drained_end)(void *opaque); ++ /* ++ * Runs when retrying failed requests. ++ */ ++ void (*retry_request_cb)(void *opaque); + } BlockDevOps; + + /* This struct is embedded in (the private) BlockBackend struct and contains +-- +2.27.0 + -- Gitee From c5a58a3c03574338cc7e2ac6ce69a3881d640335 Mon Sep 17 00:00:00 2001 From: "Huawei Technologies Co., Ltd" Date: Thu, 21 Jan 2021 15:46:48 +0800 Subject: [PATCH 06/13] block-backend: Enable retry action on errors Enable retry action when backend's retry timer is available. It would trigger the timer to do device specific retry action. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang --- ...ackend-Enable-retry-action-on-errors.patch | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 block-backend-Enable-retry-action-on-errors.patch diff --git a/block-backend-Enable-retry-action-on-errors.patch b/block-backend-Enable-retry-action-on-errors.patch new file mode 100644 index 00000000..241f945c --- /dev/null +++ b/block-backend-Enable-retry-action-on-errors.patch @@ -0,0 +1,42 @@ +From 8df36cddd1e5e2b3c3598c83a70e8cbb81c26cec Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:48 +0800 +Subject: [PATCH] block-backend: Enable retry action on errors + +Enable retry action when backend's retry timer is available. It would +trigger the timer to do device specific retry action. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + block/block-backend.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/block/block-backend.c b/block/block-backend.c +index b8f535a5fd..11f8ff4301 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1660,6 +1660,9 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, + return BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_IGNORE: + return BLOCK_ERROR_ACTION_IGNORE; ++ case BLOCKDEV_ON_ERROR_RETRY: ++ return (blk->retry_timer) ? ++ BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_AUTO: + default: + abort(); +@@ -1707,6 +1710,10 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, + qemu_system_vmstop_request_prepare(); + send_qmp_error_event(blk, action, is_read, error); + qemu_system_vmstop_request(RUN_STATE_IO_ERROR); ++ } else if (action == BLOCK_ERROR_ACTION_RETRY) { ++ timer_mod(blk->retry_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + ++ blk->retry_interval); ++ send_qmp_error_event(blk, action, is_read, error); + } else { + send_qmp_error_event(blk, action, is_read, error); + } +-- +2.27.0 + -- Gitee From 7cdf5c37303bc7df55eb760201c2e7711e61fbf8 Mon Sep 17 00:00:00 2001 From: "Huawei Technologies Co., Ltd" Date: Thu, 21 Jan 2021 15:46:49 +0800 Subject: [PATCH 07/13] block-backend: Add timeout support for retry Retry should only be triggered when timeout is not reached, so let's check timeout before retry. Device should also reset retry_start_time after successful retry. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang --- ...ackend-Add-timeout-support-for-retry.patch | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 block-backend-Add-timeout-support-for-retry.patch diff --git a/block-backend-Add-timeout-support-for-retry.patch b/block-backend-Add-timeout-support-for-retry.patch new file mode 100644 index 00000000..ac1bc662 --- /dev/null +++ b/block-backend-Add-timeout-support-for-retry.patch @@ -0,0 +1,74 @@ +From c58269c64af18bc2a22bbef8b92e489214272429 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:49 +0800 +Subject: [PATCH] block-backend: Add timeout support for retry + +Retry should only be triggered when timeout is not reached, so let's check +timeout before retry. Device should also reset retry_start_time after +successful retry. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + block/block-backend.c | 25 ++++++++++++++++++++++++- + include/sysemu/block-backend.h | 1 + + 2 files changed, 25 insertions(+), 1 deletion(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 11f8ff4301..0fe99ffe52 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1633,6 +1633,29 @@ void blk_drain_all(void) + bdrv_drain_all_end(); + } + ++static bool blk_error_retry_timeout(BlockBackend *blk) ++{ ++ /* No timeout set, infinite retries. */ ++ if (!blk->retry_timeout) { ++ return false; ++ } ++ ++ /* The first time an error occurs. */ ++ if (!blk->retry_start_time) { ++ blk->retry_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ return false; ++ } ++ ++ return qemu_clock_get_ms(QEMU_CLOCK_REALTIME) > (blk->retry_start_time + ++ blk->retry_timeout); ++} ++ ++void blk_error_retry_reset_timeout(BlockBackend *blk) ++{ ++ if (blk->retry_timer && blk->retry_start_time) ++ blk->retry_start_time = 0; ++} ++ + void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error) + { +@@ -1661,7 +1684,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, + case BLOCKDEV_ON_ERROR_IGNORE: + return BLOCK_ERROR_ACTION_IGNORE; + case BLOCKDEV_ON_ERROR_RETRY: +- return (blk->retry_timer) ? ++ return (blk->retry_timer && !blk_error_retry_timeout(blk)) ? + BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_AUTO: + default: +diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h +index b58dc6bde8..58dde446ca 100644 +--- a/include/sysemu/block-backend.h ++++ b/include/sysemu/block-backend.h +@@ -184,6 +184,7 @@ void blk_inc_in_flight(BlockBackend *blk); + void blk_dec_in_flight(BlockBackend *blk); + void blk_drain(BlockBackend *blk); + void blk_drain_all(void); ++void blk_error_retry_reset_timeout(BlockBackend *blk); + void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); + BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read); +-- +2.27.0 + -- Gitee From e2373c22c5c8a64d0b00a6c2c4b9dd1f64e1b7c1 Mon Sep 17 00:00:00 2001 From: "Huawei Technologies Co., Ltd" Date: Thu, 21 Jan 2021 15:46:50 +0800 Subject: [PATCH 08/13] block: Add error retry param setting Add "retry_interval" and "retry_timeout" parameter for drive and device option. These parameter are valid only when werror/rerror=retry. eg. --drive file=image,rerror=retry,retry_interval=1000,retry_timeout=5000 Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang --- block-Add-error-retry-param-setting.patch | 226 ++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 block-Add-error-retry-param-setting.patch diff --git a/block-Add-error-retry-param-setting.patch b/block-Add-error-retry-param-setting.patch new file mode 100644 index 00000000..72f214b1 --- /dev/null +++ b/block-Add-error-retry-param-setting.patch @@ -0,0 +1,226 @@ +From 3464a135565d718d0fedadd67081a0f76d81a9c6 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:50 +0800 +Subject: [PATCH] block: Add error retry param setting + +Add "retry_interval" and "retry_timeout" parameter for drive and device +option. These parameter are valid only when werror/rerror=retry. + +eg. --drive file=image,rerror=retry,retry_interval=1000,retry_timeout=5000 + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + block/block-backend.c | 13 +++++++-- + blockdev.c | 50 ++++++++++++++++++++++++++++++++++ + hw/block/block.c | 10 +++++++ + include/hw/block/block.h | 7 ++++- + include/sysemu/block-backend.h | 5 ++++ + 5 files changed, 81 insertions(+), 4 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 0fe99ffe52..2d812e2254 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -31,9 +31,6 @@ + + static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); + +-/* block backend default retry interval */ +-#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 +- + typedef struct BlockBackendAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); +@@ -1633,6 +1630,16 @@ void blk_drain_all(void) + bdrv_drain_all_end(); + } + ++void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval) ++{ ++ blk->retry_interval = interval; ++} ++ ++void blk_set_on_error_retry_timeout(BlockBackend *blk, int64_t timeout) ++{ ++ blk->retry_timeout = timeout; ++} ++ + static bool blk_error_retry_timeout(BlockBackend *blk) + { + /* No timeout set, infinite retries. */ +diff --git a/blockdev.c b/blockdev.c +index 0f49fd290e..99c92b96d2 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -470,6 +470,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + const char *buf; + int bdrv_flags = 0; + int on_read_error, on_write_error; ++ int64_t retry_interval, retry_timeout; + bool account_invalid, account_failed; + bool writethrough, read_only; + BlockBackend *blk; +@@ -565,6 +566,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + } + } + ++ retry_interval = qemu_opt_get_number(opts, "retry_interval", ++ BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL); ++ retry_timeout = qemu_opt_get_number(opts, "retry_timeout", 0); ++ + if (snapshot) { + bdrv_flags |= BDRV_O_SNAPSHOT; + } +@@ -629,6 +634,11 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + + blk_set_enable_write_cache(blk, !writethrough); + blk_set_on_error(blk, on_read_error, on_write_error); ++ if (on_read_error == BLOCKDEV_ON_ERROR_RETRY || ++ on_write_error == BLOCKDEV_ON_ERROR_RETRY) { ++ blk_set_on_error_retry_interval(blk, retry_interval); ++ blk_set_on_error_retry_timeout(blk, retry_timeout); ++ } + + if (!monitor_add_blk(blk, id, errp)) { + blk_unref(blk); +@@ -754,6 +764,14 @@ QemuOptsList qemu_legacy_drive_opts = { + .name = "werror", + .type = QEMU_OPT_STRING, + .help = "write error action", ++ },{ ++ .name = "retry_interval", ++ .type = QEMU_OPT_NUMBER, ++ .help = "interval for retry action in millisecond", ++ },{ ++ .name = "retry_timeout", ++ .type = QEMU_OPT_NUMBER, ++ .help = "timeout for retry action in millisecond", + },{ + .name = "copy-on-read", + .type = QEMU_OPT_BOOL, +@@ -776,6 +794,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, + BlockInterfaceType type; + int max_devs, bus_id, unit_id, index; + const char *werror, *rerror; ++ int64_t retry_interval, retry_timeout; + bool read_only = false; + bool copy_on_read; + const char *filename; +@@ -992,6 +1011,29 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, + qdict_put_str(bs_opts, "rerror", rerror); + } + ++ if (qemu_opt_find(legacy_opts, "retry_interval")) { ++ if ((werror == NULL || strcmp(werror, "retry")) && ++ (rerror == NULL || strcmp(rerror, "retry"))) { ++ error_setg(errp, "retry_interval is only supported " ++ "by werror/rerror=retry"); ++ goto fail; ++ } ++ retry_interval = qemu_opt_get_number(legacy_opts, "retry_interval", ++ BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL); ++ qdict_put_int(bs_opts, "retry_interval", retry_interval); ++ } ++ ++ if (qemu_opt_find(legacy_opts, "retry_timeout")) { ++ if ((werror == NULL || strcmp(werror, "retry")) && ++ (rerror == NULL || strcmp(rerror, "retry"))) { ++ error_setg(errp, "retry_timeout is only supported " ++ "by werror/rerror=retry"); ++ goto fail; ++ } ++ retry_timeout = qemu_opt_get_number(legacy_opts, "retry_timeout", 0); ++ qdict_put_int(bs_opts, "retry_timeout", retry_timeout); ++ } ++ + /* Actual block device init: Functionality shared with blockdev-add */ + blk = blockdev_init(filename, bs_opts, &local_err); + bs_opts = NULL; +@@ -4593,6 +4635,14 @@ QemuOptsList qemu_common_drive_opts = { + .name = "werror", + .type = QEMU_OPT_STRING, + .help = "write error action", ++ },{ ++ .name = "retry_interval", ++ .type = QEMU_OPT_NUMBER, ++ .help = "interval for retry action in millisecond", ++ },{ ++ .name = "retry_timeout", ++ .type = QEMU_OPT_NUMBER, ++ .help = "timeout for retry action in millisecond", + },{ + .name = BDRV_OPT_READ_ONLY, + .type = QEMU_OPT_BOOL, +diff --git a/hw/block/block.c b/hw/block/block.c +index bf56c7612b..56141940ca 100644 +--- a/hw/block/block.c ++++ b/hw/block/block.c +@@ -134,6 +134,16 @@ bool blkconf_apply_backend_options(BlockConf *conf, bool readonly, + blk_set_enable_write_cache(blk, wce); + blk_set_on_error(blk, rerror, werror); + ++ if (rerror == BLOCKDEV_ON_ERROR_RETRY || ++ werror == BLOCKDEV_ON_ERROR_RETRY) { ++ if (conf->retry_interval >= 0) { ++ blk_set_on_error_retry_interval(blk, conf->retry_interval); ++ } ++ if (conf->retry_timeout >= 0) { ++ blk_set_on_error_retry_timeout(blk, conf->retry_timeout); ++ } ++ } ++ + return true; + } + +diff --git a/include/hw/block/block.h b/include/hw/block/block.h +index 607539057a..d12603aabd 100644 +--- a/include/hw/block/block.h ++++ b/include/hw/block/block.h +@@ -30,6 +30,8 @@ typedef struct BlockConf { + bool share_rw; + BlockdevOnError rerror; + BlockdevOnError werror; ++ int64_t retry_interval; ++ int64_t retry_timeout; + } BlockConf; + + static inline unsigned int get_physical_block_exp(BlockConf *conf) +@@ -71,7 +73,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) + DEFINE_PROP_BLOCKDEV_ON_ERROR("rerror", _state, _conf.rerror, \ + BLOCKDEV_ON_ERROR_AUTO), \ + DEFINE_PROP_BLOCKDEV_ON_ERROR("werror", _state, _conf.werror, \ +- BLOCKDEV_ON_ERROR_AUTO) ++ BLOCKDEV_ON_ERROR_AUTO), \ ++ DEFINE_PROP_INT64("retry_interval", _state, _conf.retry_interval, \ ++ -1), \ ++ DEFINE_PROP_INT64("retry_timeout", _state, _conf.retry_timeout, -1) + + /* Backend access helpers */ + +diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h +index 58dde446ca..dc10e507ae 100644 +--- a/include/sysemu/block-backend.h ++++ b/include/sysemu/block-backend.h +@@ -25,6 +25,9 @@ + */ + #include "block/block.h" + ++/* block backend default retry interval */ ++#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 ++ + /* Callbacks for block device models */ + typedef struct BlockDevOps { + /* +@@ -184,6 +187,8 @@ void blk_inc_in_flight(BlockBackend *blk); + void blk_dec_in_flight(BlockBackend *blk); + void blk_drain(BlockBackend *blk); + void blk_drain_all(void); ++void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval); ++void blk_set_on_error_retry_timeout(BlockBackend *blk, int64_t timeout); + void blk_error_retry_reset_timeout(BlockBackend *blk); + void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); +-- +2.27.0 + -- Gitee From 5e4dbf3e78150a8b9f06cc7e8982086e632739c9 Mon Sep 17 00:00:00 2001 From: "Huawei Technologies Co., Ltd" Date: Thu, 21 Jan 2021 15:46:51 +0800 Subject: [PATCH 09/13] virtio-blk: Refactor the code that processes queued requests Move the code that processes queued requests from virtio_blk_dma_restart_bh() to its own, non-static, function. This will allow us to call it from the virtio_blk_data_plane_start() in a future patch. Signed-off-by: Sergio Lopez Message-Id: <20200603093240.40489-2-slp(a)redhat.com> Signed-off-by: Kevin Wolf --- ...tor-the-code-that-processes-queued-r.patch | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 virtio-blk-Refactor-the-code-that-processes-queued-r.patch diff --git a/virtio-blk-Refactor-the-code-that-processes-queued-r.patch b/virtio-blk-Refactor-the-code-that-processes-queued-r.patch new file mode 100644 index 00000000..2848fbd0 --- /dev/null +++ b/virtio-blk-Refactor-the-code-that-processes-queued-r.patch @@ -0,0 +1,70 @@ +From 21c5ffb363930dfe6213bb677c5811fede3bcee2 Mon Sep 17 00:00:00 2001 +From: Sergio Lopez +Date: Thu, 21 Jan 2021 15:46:51 +0800 +Subject: [PATCH] virtio-blk: Refactor the code that processes queued requests + +Move the code that processes queued requests from +virtio_blk_dma_restart_bh() to its own, non-static, function. This +will allow us to call it from the virtio_blk_data_plane_start() in a +future patch. + +Signed-off-by: Sergio Lopez +Message-Id: <20200603093240.40489-2-slp(a)redhat.com> +Signed-off-by: Kevin Wolf +--- + hw/block/virtio-blk.c | 16 +++++++++++----- + include/hw/virtio/virtio-blk.h | 1 + + 2 files changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 703ed4c93b..cee2c673a5 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -809,15 +809,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + virtio_blk_handle_output_do(s, vq); + } + +-static void virtio_blk_dma_restart_bh(void *opaque) ++void virtio_blk_process_queued_requests(VirtIOBlock *s) + { +- VirtIOBlock *s = opaque; + VirtIOBlockReq *req = s->rq; + MultiReqBuffer mrb = {}; + +- qemu_bh_delete(s->bh); +- s->bh = NULL; +- + s->rq = NULL; + + aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); +@@ -845,6 +841,16 @@ static void virtio_blk_dma_restart_bh(void *opaque) + aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + ++static void virtio_blk_dma_restart_bh(void *opaque) ++{ ++ VirtIOBlock *s = opaque; ++ ++ qemu_bh_delete(s->bh); ++ s->bh = NULL; ++ ++ virtio_blk_process_queued_requests(s); ++} ++ + static void virtio_blk_dma_restart_cb(void *opaque, int running, + RunState state) + { +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index cddcfbebe9..cf8eea2f58 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -84,5 +84,6 @@ typedef struct MultiReqBuffer { + } MultiReqBuffer; + + bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); ++void virtio_blk_process_queued_requests(VirtIOBlock *s); + + #endif +-- +2.27.0 + -- Gitee From 42eba74d76be15e5c5f5432caa9f49793d5acdbc Mon Sep 17 00:00:00 2001 From: "Huawei Technologies Co., Ltd" Date: Thu, 21 Jan 2021 15:46:52 +0800 Subject: [PATCH 10/13] virtio-blk: On restart, process queued requests in the proper context On restart, we were scheduling a BH to process queued requests, which would run before starting up the data plane, leading to those requests being assigned and started on coroutines on the main context. This could cause requests to be wrongly processed in parallel from different threads (the main thread and the iothread managing the data plane), potentially leading to multiple issues. For example, stopping and resuming a VM multiple times while the guest is generating I/O on a virtio_blk device can trigger a crash with a stack tracing looking like this one: <------> Thread 2 (Thread 0x7ff736765700 (LWP 1062503)): #0 0x00005567a13b99d6 in iov_memset (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:69 #1 0x00005567a13bab73 in qemu_iovec_memset (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 #5 0x00005567a12f43d9 in qemu_laio_process_completions_and_submit (s=0x7ff7182e8420) at block/linux-aio.c:236 #6 0x00005567a12f44c2 in qemu_laio_poll_cb (opaque=0x7ff7182e8430) at block/linux-aio.c:267 #7 0x00005567a13aed83 in run_poll_handlers_once (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) at util/aio-posix.c:520 #8 0x00005567a13aee9f in run_poll_handlers (ctx=0x5567a2b58c70, max_ns=16000, timeout=0x7ff7367645f8) at util/aio-posix.c:562 #9 0x00005567a13aefde in try_poll_mode (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) at util/aio-posix.c:597 #10 0x00005567a13af115 in aio_poll (ctx=0x5567a2b58c70, blocking=true) at util/aio-posix.c:639 #11 0x00005567a109acca in iothread_run (opaque=0x5567a2b29760) at iothread.c:75 #12 0x00005567a13b2790 in qemu_thread_start (args=0x5567a2b694c0) at util/qemu-thread-posix.c:519 #13 0x00007ff73eedf2de in start_thread () at /lib64/libpthread.so.0 #14 0x00007ff73ec10e83 in clone () at /lib64/libc.so.6 Thread 1 (Thread 0x7ff743986f00 (LWP 1062500)): #0 0x00005567a13b99d6 in iov_memset (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:69 #1 0x00005567a13bab73 in qemu_iovec_memset (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 #5 0x00005567a12f4a2f in laio_do_submit (fd=19, laiocb=0x7ff5f4ff9ae0, offset=472363008, type=2) at block/linux-aio.c:375 #6 0x00005567a12f4af2 in laio_co_submit (bs=0x5567a2b8c460, s=0x7ff7182e8420, fd=19, offset=472363008, qiov=0x7ff5f4ff9ca0, type=2) at block/linux-aio.c:394 #7 0x00005567a12f1803 in raw_co_prw (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, type=2) at block/file-posix.c:1892 #8 0x00005567a12f1941 in raw_co_pwritev (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, flags=0) at block/file-posix.c:1925 #9 0x00005567a12fe3e1 in bdrv_driver_pwritev (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, qiov_offset=0, flags=0) at block/io.c:1183 #10 0x00005567a1300340 in bdrv_aligned_pwritev (child=0x5567a2b5b070, req=0x7ff5f4ff9db0, offset=472363008, bytes=20480, align=512, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) at block/io.c:1980 #11 0x00005567a1300b29 in bdrv_co_pwritev_part (child=0x5567a2b5b070, offset=472363008, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) at block/io.c:2137 #12 0x00005567a12baba1 in qcow2_co_pwritev_task (bs=0x5567a2b92740, file_cluster_offset=472317952, offset=487305216, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, l2meta=0x0) at block/qcow2.c:2444 #13 0x00005567a12bacdb in qcow2_co_pwritev_task_entry (task=0x5567a2b48540) at block/qcow2.c:2475 #14 0x00005567a13167d8 in aio_task_co (opaque=0x5567a2b48540) at block/aio_task.c:45 #15 0x00005567a13cf00c in coroutine_trampoline (i0=738245600, i1=32759) at util/coroutine-ucontext.c:115 #16 0x00007ff73eb622e0 in __start_context () at /lib64/libc.so.6 #17 0x00007ff6626f1350 in () #18 0x0000000000000000 in () <------> This is also known to cause crashes with this message (assertion failed): aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule' RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1812765 Signed-off-by: Sergio Lopez Message-Id: <20200603093240.40489-3-slp(a)redhat.com> Signed-off-by: Kevin Wolf --- ...start-process-queued-requests-in-the.patch | 191 ++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 virtio-blk-On-restart-process-queued-requests-in-the.patch diff --git a/virtio-blk-On-restart-process-queued-requests-in-the.patch b/virtio-blk-On-restart-process-queued-requests-in-the.patch new file mode 100644 index 00000000..5edb6fd9 --- /dev/null +++ b/virtio-blk-On-restart-process-queued-requests-in-the.patch @@ -0,0 +1,191 @@ +From 882897127955fbede44c73703ec297c8ae89775d Mon Sep 17 00:00:00 2001 +From: Sergio Lopez +Date: Thu, 21 Jan 2021 15:46:52 +0800 +Subject: [PATCH] virtio-blk: On restart, process queued requests in the proper + context + +On restart, we were scheduling a BH to process queued requests, which +would run before starting up the data plane, leading to those requests +being assigned and started on coroutines on the main context. + +This could cause requests to be wrongly processed in parallel from +different threads (the main thread and the iothread managing the data +plane), potentially leading to multiple issues. + +For example, stopping and resuming a VM multiple times while the guest +is generating I/O on a virtio_blk device can trigger a crash with a +stack tracing looking like this one: + +<------> + Thread 2 (Thread 0x7ff736765700 (LWP 1062503)): + #0 0x00005567a13b99d6 in iov_memset + (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) + at util/iov.c:69 + #1 0x00005567a13bab73 in qemu_iovec_memset + (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 + #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 + #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 + #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 + #5 0x00005567a12f43d9 in qemu_laio_process_completions_and_submit (s=0x7ff7182e8420) + at block/linux-aio.c:236 + #6 0x00005567a12f44c2 in qemu_laio_poll_cb (opaque=0x7ff7182e8430) at block/linux-aio.c:267 + #7 0x00005567a13aed83 in run_poll_handlers_once (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) + at util/aio-posix.c:520 + #8 0x00005567a13aee9f in run_poll_handlers (ctx=0x5567a2b58c70, max_ns=16000, timeout=0x7ff7367645f8) + at util/aio-posix.c:562 + #9 0x00005567a13aefde in try_poll_mode (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) + at util/aio-posix.c:597 + #10 0x00005567a13af115 in aio_poll (ctx=0x5567a2b58c70, blocking=true) at util/aio-posix.c:639 + #11 0x00005567a109acca in iothread_run (opaque=0x5567a2b29760) at iothread.c:75 + #12 0x00005567a13b2790 in qemu_thread_start (args=0x5567a2b694c0) at util/qemu-thread-posix.c:519 + #13 0x00007ff73eedf2de in start_thread () at /lib64/libpthread.so.0 + #14 0x00007ff73ec10e83 in clone () at /lib64/libc.so.6 + + Thread 1 (Thread 0x7ff743986f00 (LWP 1062500)): + #0 0x00005567a13b99d6 in iov_memset + (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) + at util/iov.c:69 + #1 0x00005567a13bab73 in qemu_iovec_memset + (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 + #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 + #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 + #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 + #5 0x00005567a12f4a2f in laio_do_submit (fd=19, laiocb=0x7ff5f4ff9ae0, offset=472363008, type=2) + at block/linux-aio.c:375 + #6 0x00005567a12f4af2 in laio_co_submit + (bs=0x5567a2b8c460, s=0x7ff7182e8420, fd=19, offset=472363008, qiov=0x7ff5f4ff9ca0, type=2) + at block/linux-aio.c:394 + #7 0x00005567a12f1803 in raw_co_prw + (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, type=2) + at block/file-posix.c:1892 + #8 0x00005567a12f1941 in raw_co_pwritev + (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, flags=0) + at block/file-posix.c:1925 + #9 0x00005567a12fe3e1 in bdrv_driver_pwritev + (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, qiov_offset=0, flags=0) + at block/io.c:1183 + #10 0x00005567a1300340 in bdrv_aligned_pwritev + (child=0x5567a2b5b070, req=0x7ff5f4ff9db0, offset=472363008, bytes=20480, align=512, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) at block/io.c:1980 + #11 0x00005567a1300b29 in bdrv_co_pwritev_part + (child=0x5567a2b5b070, offset=472363008, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) + at block/io.c:2137 + #12 0x00005567a12baba1 in qcow2_co_pwritev_task + (bs=0x5567a2b92740, file_cluster_offset=472317952, offset=487305216, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, l2meta=0x0) at block/qcow2.c:2444 + #13 0x00005567a12bacdb in qcow2_co_pwritev_task_entry (task=0x5567a2b48540) at block/qcow2.c:2475 + #14 0x00005567a13167d8 in aio_task_co (opaque=0x5567a2b48540) at block/aio_task.c:45 + #15 0x00005567a13cf00c in coroutine_trampoline (i0=738245600, i1=32759) at util/coroutine-ucontext.c:115 + #16 0x00007ff73eb622e0 in __start_context () at /lib64/libc.so.6 + #17 0x00007ff6626f1350 in () + #18 0x0000000000000000 in () +<------> + +This is also known to cause crashes with this message (assertion +failed): + + aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule' + +RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1812765 +Signed-off-by: Sergio Lopez +Message-Id: <20200603093240.40489-3-slp(a)redhat.com> +Signed-off-by: Kevin Wolf +--- + hw/block/dataplane/virtio-blk.c | 8 ++++++++ + hw/block/virtio-blk.c | 18 ++++++++++++------ + include/hw/virtio/virtio-blk.h | 2 +- + 3 files changed, 21 insertions(+), 7 deletions(-) + +diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c +index 5fea76df85..4476f97960 100644 +--- a/hw/block/dataplane/virtio-blk.c ++++ b/hw/block/dataplane/virtio-blk.c +@@ -219,6 +219,9 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + goto fail_guest_notifiers; + } + ++ /* Process queued requests before the ones in vring */ ++ virtio_blk_process_queued_requests(vblk, false); ++ + /* Kick right away to begin processing requests already in vring */ + for (i = 0; i < nvqs; i++) { + VirtQueue *vq = virtio_get_queue(s->vdev, i); +@@ -238,6 +241,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) + return 0; + + fail_guest_notifiers: ++ /* ++ * If we failed to set up the guest notifiers queued requests will be ++ * processed on the main context. ++ */ ++ virtio_blk_process_queued_requests(vblk, false); + vblk->dataplane_disabled = true; + s->starting = false; + vblk->dataplane_started = true; +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index cee2c673a5..ddf525b9d7 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -809,7 +809,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + virtio_blk_handle_output_do(s, vq); + } + +-void virtio_blk_process_queued_requests(VirtIOBlock *s) ++void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) + { + VirtIOBlockReq *req = s->rq; + MultiReqBuffer mrb = {}; +@@ -837,7 +837,9 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s) + if (mrb.num_reqs) { + virtio_blk_submit_multireq(s->blk, &mrb); + } +- blk_dec_in_flight(s->conf.conf.blk); ++ if (is_bh) { ++ blk_dec_in_flight(s->conf.conf.blk); ++ } + aio_context_release(blk_get_aio_context(s->conf.conf.blk)); + } + +@@ -848,21 +850,25 @@ static void virtio_blk_dma_restart_bh(void *opaque) + qemu_bh_delete(s->bh); + s->bh = NULL; + +- virtio_blk_process_queued_requests(s); ++ virtio_blk_process_queued_requests(s, true); + } + + static void virtio_blk_dma_restart_cb(void *opaque, int running, + RunState state) + { + VirtIOBlock *s = opaque; ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); ++ VirtioBusState *bus = VIRTIO_BUS(qbus); + + if (!running) { + return; + } + +- if (!s->bh) { +- /* FIXME The data plane is not started yet, so these requests are +- * processed in the main thread. */ ++ /* ++ * If ioeventfd is enabled, don't schedule the BH here as queued ++ * requests will be processed while starting the data plane. ++ */ ++ if (!s->bh && !virtio_bus_ioeventfd_enabled(bus)) { + s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk), + virtio_blk_dma_restart_bh, s); + blk_inc_in_flight(s->conf.conf.blk); +diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h +index cf8eea2f58..e77f0db3b0 100644 +--- a/include/hw/virtio/virtio-blk.h ++++ b/include/hw/virtio/virtio-blk.h +@@ -84,6 +84,6 @@ typedef struct MultiReqBuffer { + } MultiReqBuffer; + + bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); +-void virtio_blk_process_queued_requests(VirtIOBlock *s); ++void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh); + + #endif +-- +2.27.0 + -- Gitee From 4b34648df69dc5bcc15a7bd23425003753c300a6 Mon Sep 17 00:00:00 2001 From: "Huawei Technologies Co., Ltd" Date: Thu, 21 Jan 2021 15:46:53 +0800 Subject: [PATCH 11/13] virtio_blk: Add support for retry on errors Insert failed requests into device's list for later retry and handle queued requests to implement retry_request_cb. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang --- ..._blk-Add-support-for-retry-on-errors.patch | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 virtio_blk-Add-support-for-retry-on-errors.patch diff --git a/virtio_blk-Add-support-for-retry-on-errors.patch b/virtio_blk-Add-support-for-retry-on-errors.patch new file mode 100644 index 00000000..e7d8efd7 --- /dev/null +++ b/virtio_blk-Add-support-for-retry-on-errors.patch @@ -0,0 +1,89 @@ +From f3158cc327d435939d87ecee23485d082ebf3ba2 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:53 +0800 +Subject: [PATCH] virtio_blk: Add support for retry on errors + +Insert failed requests into device's list for later retry and handle +queued requests to implement retry_request_cb. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +--- + hw/block/virtio-blk.c | 21 ++++++++++++++++++--- + 1 file changed, 18 insertions(+), 3 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index ddf525b9d7..2db9804cfe 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -101,6 +101,10 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, + block_acct_failed(blk_get_stats(s->blk), &req->acct); + } + virtio_blk_free_request(req); ++ } else if (action == BLOCK_ERROR_ACTION_RETRY) { ++ req->mr_next = NULL; ++ req->next = s->rq; ++ s->rq = req; + } + + blk_error_action(s->blk, action, is_read, error); +@@ -142,6 +146,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret) + } + } + ++ blk_error_retry_reset_timeout(s->blk); + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + block_acct_done(blk_get_stats(s->blk), &req->acct); + virtio_blk_free_request(req); +@@ -161,6 +166,7 @@ static void virtio_blk_flush_complete(void *opaque, int ret) + } + } + ++ blk_error_retry_reset_timeout(s->blk); + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + block_acct_done(blk_get_stats(s->blk), &req->acct); + virtio_blk_free_request(req); +@@ -183,6 +189,7 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) + } + } + ++ blk_error_retry_reset_timeout(s->blk); + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + if (is_write_zeroes) { + block_acct_done(blk_get_stats(s->blk), &req->acct); +@@ -811,12 +818,12 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) + + void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) + { +- VirtIOBlockReq *req = s->rq; ++ VirtIOBlockReq *req; + MultiReqBuffer mrb = {}; + +- s->rq = NULL; +- + aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); ++ req = s->rq; ++ s->rq = NULL; + while (req) { + VirtIOBlockReq *next = req->next; + if (virtio_blk_handle_request(req, &mrb)) { +@@ -1101,8 +1108,16 @@ static void virtio_blk_resize(void *opaque) + virtio_notify_config(vdev); + } + ++static void virtio_blk_retry_request(void *opaque) ++{ ++ VirtIOBlock *s = VIRTIO_BLK(opaque); ++ ++ virtio_blk_process_queued_requests(s, false); ++} ++ + static const BlockDevOps virtio_block_ops = { + .resize_cb = virtio_blk_resize, ++ .retry_request_cb = virtio_blk_retry_request, + }; + + static void virtio_blk_device_realize(DeviceState *dev, Error **errp) +-- +2.27.0 + -- Gitee From 4bffc50ac385392c5d527551e8b324942cc0785f Mon Sep 17 00:00:00 2001 From: Euler Robot Date: Sat, 30 Jan 2021 16:26:59 +0800 Subject: [PATCH 12/13] spec: Update patch and changelog with !65 scsi-bus: Refactor the code that retries requests scsi-disk: Add support for retry on errors qapi/block-core: Add retry option for error action block-backend: Introduce retry timer block-backend: Add device specific retry callback block-backend: Enable retry action on errors block-backend: Add timeout support for retry block: Add error retry param setting virtio-blk: Refactor the code that processes queued requests virtio-blk: On restart, process queued requests in the proper context virtio_blk: Add support for retry on errors Signed-off-by: Alex Chen --- qemu.spec | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/qemu.spec b/qemu.spec index 15655bd5..542f47f3 100644 --- a/qemu.spec +++ b/qemu.spec @@ -288,6 +288,17 @@ Patch0275: hw-net-e1000e-advance-desc_offset-in-case-of-null-de.patch Patch0276: hostmem-Fix-up-free-host_nodes-list-right-after-visi.patch Patch0277: target-arm-Fix-write-redundant-values-to-kvm.patch Patch0278: memory-clamp-cached-translation-in-case-it-points-to.patch +Patch0279: scsi-bus-Refactor-the-code-that-retries-requests.patch +Patch0280: scsi-disk-Add-support-for-retry-on-errors.patch +Patch0281: qapi-block-core-Add-retry-option-for-error-action.patch +Patch0282: block-backend-Introduce-retry-timer.patch +Patch0283: block-backend-Add-device-specific-retry-callback.patch +Patch0284: block-backend-Enable-retry-action-on-errors.patch +Patch0285: block-backend-Add-timeout-support-for-retry.patch +Patch0286: block-Add-error-retry-param-setting.patch +Patch0287: virtio-blk-Refactor-the-code-that-processes-queued-r.patch +Patch0288: virtio-blk-On-restart-process-queued-requests-in-the.patch +Patch0289: virtio_blk-Add-support-for-retry-on-errors.patch BuildRequires: flex BuildRequires: bison @@ -667,6 +678,19 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Sat Jan 30 2021 Huawei Technologies Co., Ltd +- scsi-bus: Refactor the code that retries requests +- scsi-disk: Add support for retry on errors +- qapi/block-core: Add retry option for error action +- block-backend: Introduce retry timer +- block-backend: Add device specific retry callback +- block-backend: Enable retry action on errors +- block-backend: Add timeout support for retry +- block: Add error retry param setting +- virtio-blk: Refactor the code that processes queued requests +- virtio-blk: On restart, process queued requests in the proper context +- virtio_blk: Add support for retry on errors + * Mon Jan 18 2021 Huawei Technologies Co., Ltd - feature: enable spice protocol -- Gitee From 34a163501518a11661bba3bbb4442417007d71af Mon Sep 17 00:00:00 2001 From: Euler Robot Date: Sat, 30 Jan 2021 16:26:59 +0800 Subject: [PATCH 13/13] spec: Update release version with !65 increase release verison by one Signed-off-by: Euler Robot --- qemu.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu.spec b/qemu.spec index 542f47f3..f546abe7 100644 --- a/qemu.spec +++ b/qemu.spec @@ -1,6 +1,6 @@ Name: qemu Version: 4.1.0 -Release: 43 +Release: 44 Epoch: 2 Summary: QEMU is a generic and open source machine emulator and virtualizer License: GPLv2 and BSD and MIT and CC-BY-SA-4.0 -- Gitee