From 4c3d47e04886e072acc0e4fefdc49e9d1f6b4ad1 Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Thu, 21 Jan 2021 15:46:45 +0800 Subject: [PATCH 1/7] qapi/block-core: Add retry option for error action Add a new error action 'retry' to support retry on errors. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang Signed-off-by: Alex Chen --- blockdev.c | 2 ++ qapi/block-core.json | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/blockdev.c b/blockdev.c index b35072644e..6f1981635b 100644 --- a/blockdev.c +++ b/blockdev.c @@ -333,6 +333,8 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp) return BLOCKDEV_ON_ERROR_STOP; } else if (!strcmp(buf, "report")) { return BLOCKDEV_ON_ERROR_REPORT; + } else if (!strcmp(buf, "retry")) { + return BLOCKDEV_ON_ERROR_RETRY; } else { error_setg(errp, "'%s' invalid %s error action", buf, is_read ? "read" : "write"); diff --git a/qapi/block-core.json b/qapi/block-core.json index 1d3dd9cb48..804beabfb0 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1146,7 +1146,7 @@ # Since: 1.3 ## { 'enum': 'BlockdevOnError', - 'data': ['report', 'ignore', 'enospc', 'stop', 'auto'] } + 'data': ['report', 'ignore', 'enospc', 'stop', 'auto', 'retry'] } ## # @MirrorSyncMode: @@ -4952,7 +4952,7 @@ # Since: 2.1 ## { 'enum': 'BlockErrorAction', - 'data': [ 'ignore', 'report', 'stop' ] } + 'data': [ 'ignore', 'report', 'stop', 'retry' ] } ## -- Gitee From 4dc180e87fb641f64fce7be3a0807488d0cc0a51 Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Thu, 21 Jan 2021 15:46:46 +0800 Subject: [PATCH 2/7] block-backend: Introduce retry timer Add a timer to regularly trigger retry on errors. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang Signed-off-by: Alex Chen --- block/block-backend.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index 12ef80ea17..257cd775c0 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -35,6 +35,9 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); +/* block backend default retry interval */ +#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 + typedef struct BlockBackendAioNotifier { void (*attached_aio_context)(AioContext *new_context, void *opaque); void (*detach_aio_context)(void *opaque); @@ -95,6 +98,15 @@ struct BlockBackend { * Accessed with atomic ops. */ unsigned int in_flight; + + /* Timer for retry on errors. */ + QEMUTimer *retry_timer; + /* Interval in ms to trigger next retry. */ + int64_t retry_interval; + /* Start time of the first error. Used to check timeout. */ + int64_t retry_start_time; + /* Retry timeout. 0 represents infinite retry. */ + int64_t retry_timeout; }; typedef struct BlockBackendAIOCB { @@ -353,6 +365,11 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT; blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; + blk->retry_timer = NULL; + blk->retry_interval = BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL; + blk->retry_start_time = 0; + blk->retry_timeout = 0; + block_acct_init(&blk->stats); qemu_co_queue_init(&blk->queued_requests); @@ -471,6 +488,10 @@ static void blk_delete(BlockBackend *blk) QTAILQ_REMOVE(&block_backends, blk, link); drive_info_del(blk->legacy_dinfo); block_acct_cleanup(&blk->stats); + if (blk->retry_timer) { + timer_del(blk->retry_timer); + timer_free(blk->retry_timer); + } g_free(blk); } -- Gitee From dfda8c57de71f2f10b57cf21b1e36f18d4ed37a3 Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Thu, 21 Jan 2021 15:46:47 +0800 Subject: [PATCH 3/7] block-backend: Add device specific retry callback Add retry_request_cb in BlockDevOps to do device specific retry action. Backend's timer would be registered only when the backend is set 'retry' on errors and the device supports retry action. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang Signed-off-by: Alex Chen --- block/block-backend.c | 8 ++++++++ include/sysemu/block-backend.h | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index 257cd775c0..24003adf0b 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1018,6 +1018,14 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, blk->dev_ops = ops; blk->dev_opaque = opaque; + if ((blk->on_read_error == BLOCKDEV_ON_ERROR_RETRY || + blk->on_write_error == BLOCKDEV_ON_ERROR_RETRY) && + ops->retry_request_cb) { + blk->retry_timer = aio_timer_new(blk->ctx, QEMU_CLOCK_REALTIME, + SCALE_MS, ops->retry_request_cb, + opaque); + } + /* Are we currently quiesced? Should we enforce this right now? */ if (blk->quiesce_counter && ops->drained_begin) { ops->drained_begin(opaque); diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index e5e1524f06..a7a13d47de 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -70,6 +70,10 @@ typedef struct BlockDevOps { * Is the device still busy? */ bool (*drained_poll)(void *opaque); + /* + * Runs when retrying failed requests. + */ + void (*retry_request_cb)(void *opaque); } BlockDevOps; /* This struct is embedded in (the private) BlockBackend struct and contains -- Gitee From 2e1c75e5a0339d2bf417e5a4437d8e627a303286 Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Thu, 21 Jan 2021 15:46:48 +0800 Subject: [PATCH 4/7] block-backend: Enable retry action on errors Enable retry action when backend's retry timer is available. It would trigger the timer to do device specific retry action. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang Signed-off-by: Alex Chen --- block/block-backend.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index 24003adf0b..5a016d32fa 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1793,6 +1793,9 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, return BLOCK_ERROR_ACTION_REPORT; case BLOCKDEV_ON_ERROR_IGNORE: return BLOCK_ERROR_ACTION_IGNORE; + case BLOCKDEV_ON_ERROR_RETRY: + return (blk->retry_timer) ? + BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT; case BLOCKDEV_ON_ERROR_AUTO: default: abort(); @@ -1840,6 +1843,10 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, qemu_system_vmstop_request_prepare(); send_qmp_error_event(blk, action, is_read, error); qemu_system_vmstop_request(RUN_STATE_IO_ERROR); + } else if (action == BLOCK_ERROR_ACTION_RETRY) { + timer_mod(blk->retry_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + + blk->retry_interval); + send_qmp_error_event(blk, action, is_read, error); } else { send_qmp_error_event(blk, action, is_read, error); } -- Gitee From 953590f4854d75e6051237f668c9fb393235f471 Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Thu, 21 Jan 2021 15:46:49 +0800 Subject: [PATCH 5/7] block-backend: Add timeout support for retry Retry should only be triggered when timeout is not reached, so let's check timeout before retry. Device should also reset retry_start_time after successful retry. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang Signed-off-by: Alex Chen --- block/block-backend.c | 25 ++++++++++++++++++++++++- include/sysemu/block-backend.h | 1 + 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/block/block-backend.c b/block/block-backend.c index 5a016d32fa..37e21c473e 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1766,6 +1766,29 @@ void blk_drain_all(void) bdrv_drain_all_end(); } +static bool blk_error_retry_timeout(BlockBackend *blk) +{ + /* No timeout set, infinite retries. */ + if (!blk->retry_timeout) { + return false; + } + + /* The first time an error occurs. */ + if (!blk->retry_start_time) { + blk->retry_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + return false; + } + + return qemu_clock_get_ms(QEMU_CLOCK_REALTIME) > (blk->retry_start_time + + blk->retry_timeout); +} + +void blk_error_retry_reset_timeout(BlockBackend *blk) +{ + if (blk->retry_timer && blk->retry_start_time) + blk->retry_start_time = 0; +} + void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, BlockdevOnError on_write_error) { @@ -1794,7 +1817,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, case BLOCKDEV_ON_ERROR_IGNORE: return BLOCK_ERROR_ACTION_IGNORE; case BLOCKDEV_ON_ERROR_RETRY: - return (blk->retry_timer) ? + return (blk->retry_timer && !blk_error_retry_timeout(blk)) ? BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT; case BLOCKDEV_ON_ERROR_AUTO: default: diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index a7a13d47de..56a403883d 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -198,6 +198,7 @@ void blk_inc_in_flight(BlockBackend *blk); void blk_dec_in_flight(BlockBackend *blk); void blk_drain(BlockBackend *blk); void blk_drain_all(void); +void blk_error_retry_reset_timeout(BlockBackend *blk); void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, BlockdevOnError on_write_error); BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read); -- Gitee From a58fda7b158441c645e143bf658d12914ffbc7b8 Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Thu, 21 Jan 2021 15:46:50 +0800 Subject: [PATCH 6/7] block: Add error retry param setting Add "retry_interval" and "retry_timeout" parameter for drive and device option. These parameter are valid only when werror/rerror=retry. eg. --drive file=image,rerror=retry,retry_interval=1000,retry_timeout=5000 Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang Signed-off-by: Alex Chen --- block/block-backend.c | 13 +++++++-- blockdev.c | 50 ++++++++++++++++++++++++++++++++++ hw/block/block.c | 10 +++++++ include/hw/block/block.h | 7 ++++- include/sysemu/block-backend.h | 5 ++++ 5 files changed, 81 insertions(+), 4 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 37e21c473e..d3d90a95a5 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -35,9 +35,6 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); -/* block backend default retry interval */ -#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 - typedef struct BlockBackendAioNotifier { void (*attached_aio_context)(AioContext *new_context, void *opaque); void (*detach_aio_context)(void *opaque); @@ -1766,6 +1763,16 @@ void blk_drain_all(void) bdrv_drain_all_end(); } +void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval) +{ + blk->retry_interval = interval; +} + +void blk_set_on_error_retry_timeout(BlockBackend *blk, int64_t timeout) +{ + blk->retry_timeout = timeout; +} + static bool blk_error_retry_timeout(BlockBackend *blk) { /* No timeout set, infinite retries. */ diff --git a/blockdev.c b/blockdev.c index 6f1981635b..10a73fa423 100644 --- a/blockdev.c +++ b/blockdev.c @@ -480,6 +480,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, const char *buf; int bdrv_flags = 0; int on_read_error, on_write_error; + int64_t retry_interval, retry_timeout; bool account_invalid, account_failed; bool writethrough, read_only; BlockBackend *blk; @@ -572,6 +573,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, } } + retry_interval = qemu_opt_get_number(opts, "retry_interval", + BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL); + retry_timeout = qemu_opt_get_number(opts, "retry_timeout", 0); + if (snapshot) { bdrv_flags |= BDRV_O_SNAPSHOT; } @@ -635,6 +640,11 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, blk_set_enable_write_cache(blk, !writethrough); blk_set_on_error(blk, on_read_error, on_write_error); + if (on_read_error == BLOCKDEV_ON_ERROR_RETRY || + on_write_error == BLOCKDEV_ON_ERROR_RETRY) { + blk_set_on_error_retry_interval(blk, retry_interval); + blk_set_on_error_retry_timeout(blk, retry_timeout); + } if (!monitor_add_blk(blk, id, errp)) { blk_unref(blk); @@ -761,6 +771,14 @@ QemuOptsList qemu_legacy_drive_opts = { .name = "werror", .type = QEMU_OPT_STRING, .help = "write error action", + },{ + .name = "retry_interval", + .type = QEMU_OPT_NUMBER, + .help = "interval for retry action in millisecond", + },{ + .name = "retry_timeout", + .type = QEMU_OPT_NUMBER, + .help = "timeout for retry action in millisecond", },{ .name = "copy-on-read", .type = QEMU_OPT_BOOL, @@ -783,6 +801,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, BlockInterfaceType type; int max_devs, bus_id, unit_id, index; const char *werror, *rerror; + int64_t retry_interval, retry_timeout; bool read_only = false; bool copy_on_read; const char *filename; @@ -990,6 +1009,29 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, qdict_put_str(bs_opts, "rerror", rerror); } + if (qemu_opt_find(legacy_opts, "retry_interval")) { + if ((werror == NULL || strcmp(werror, "retry")) && + (rerror == NULL || strcmp(rerror, "retry"))) { + error_setg(errp, "retry_interval is only supported " + "by werror/rerror=retry"); + goto fail; + } + retry_interval = qemu_opt_get_number(legacy_opts, "retry_interval", + BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL); + qdict_put_int(bs_opts, "retry_interval", retry_interval); + } + + if (qemu_opt_find(legacy_opts, "retry_timeout")) { + if ((werror == NULL || strcmp(werror, "retry")) && + (rerror == NULL || strcmp(rerror, "retry"))) { + error_setg(errp, "retry_timeout is only supported " + "by werror/rerror=retry"); + goto fail; + } + retry_timeout = qemu_opt_get_number(legacy_opts, "retry_timeout", 0); + qdict_put_int(bs_opts, "retry_timeout", retry_timeout); + } + /* Actual block device init: Functionality shared with blockdev-add */ blk = blockdev_init(filename, bs_opts, errp); bs_opts = NULL; @@ -3806,6 +3848,14 @@ QemuOptsList qemu_common_drive_opts = { .name = "werror", .type = QEMU_OPT_STRING, .help = "write error action", + },{ + .name = "retry_interval", + .type = QEMU_OPT_NUMBER, + .help = "interval for retry action in millisecond", + },{ + .name = "retry_timeout", + .type = QEMU_OPT_NUMBER, + .help = "timeout for retry action in millisecond", },{ .name = BDRV_OPT_READ_ONLY, .type = QEMU_OPT_BOOL, diff --git a/hw/block/block.c b/hw/block/block.c index d47ebf005a..26c0767552 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -206,6 +206,16 @@ bool blkconf_apply_backend_options(BlockConf *conf, bool readonly, blk_set_enable_write_cache(blk, wce); blk_set_on_error(blk, rerror, werror); + if (rerror == BLOCKDEV_ON_ERROR_RETRY || + werror == BLOCKDEV_ON_ERROR_RETRY) { + if (conf->retry_interval >= 0) { + blk_set_on_error_retry_interval(blk, conf->retry_interval); + } + if (conf->retry_timeout >= 0) { + blk_set_on_error_retry_timeout(blk, conf->retry_timeout); + } + } + return true; } diff --git a/include/hw/block/block.h b/include/hw/block/block.h index 5902c0440a..24fb7d77af 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -33,6 +33,8 @@ typedef struct BlockConf { bool share_rw; BlockdevOnError rerror; BlockdevOnError werror; + int64_t retry_interval; + int64_t retry_timeout; } BlockConf; static inline unsigned int get_physical_block_exp(BlockConf *conf) @@ -79,7 +81,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) DEFINE_PROP_BLOCKDEV_ON_ERROR("rerror", _state, _conf.rerror, \ BLOCKDEV_ON_ERROR_AUTO), \ DEFINE_PROP_BLOCKDEV_ON_ERROR("werror", _state, _conf.werror, \ - BLOCKDEV_ON_ERROR_AUTO) + BLOCKDEV_ON_ERROR_AUTO), \ + DEFINE_PROP_INT64("retry_interval", _state, _conf.retry_interval, \ + -1), \ + DEFINE_PROP_INT64("retry_timeout", _state, _conf.retry_timeout, -1) /* Backend access helpers */ diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 56a403883d..887c19ff5d 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -25,6 +25,9 @@ */ #include "block/block.h" +/* block backend default retry interval */ +#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 + /* Callbacks for block device models */ typedef struct BlockDevOps { /* @@ -198,6 +201,8 @@ void blk_inc_in_flight(BlockBackend *blk); void blk_dec_in_flight(BlockBackend *blk); void blk_drain(BlockBackend *blk); void blk_drain_all(void); +void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval); +void blk_set_on_error_retry_timeout(BlockBackend *blk, int64_t timeout); void blk_error_retry_reset_timeout(BlockBackend *blk); void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, BlockdevOnError on_write_error); -- Gitee From a81122e37595fe1cc9eaa2adbbfccbfdf8f988b8 Mon Sep 17 00:00:00 2001 From: Jiahui Cen Date: Thu, 21 Jan 2021 15:46:53 +0800 Subject: [PATCH 7/7] virtio_blk: Add support for retry on errors Insert failed requests into device's list for later retry and handle queued requests to implement retry_request_cb. Signed-off-by: Jiahui Cen Signed-off-by: Ying Fang Signed-off-by: Alex Chen --- hw/block/virtio-blk.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index f139cd7cc9..c8d94a3dfb 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -108,6 +108,10 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, block_acct_failed(blk_get_stats(s->blk), &req->acct); } virtio_blk_free_request(req); + } else if (action == BLOCK_ERROR_ACTION_RETRY) { + req->mr_next = NULL; + req->next = s->rq; + s->rq = req; } blk_error_action(s->blk, action, is_read, error); @@ -149,6 +153,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret) } } + blk_error_retry_reset_timeout(s->blk); virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); block_acct_done(blk_get_stats(s->blk), &req->acct); virtio_blk_free_request(req); @@ -168,6 +173,7 @@ static void virtio_blk_flush_complete(void *opaque, int ret) } } + blk_error_retry_reset_timeout(s->blk); virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); block_acct_done(blk_get_stats(s->blk), &req->acct); virtio_blk_free_request(req); @@ -190,6 +196,7 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) } } + blk_error_retry_reset_timeout(s->blk); virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); if (is_write_zeroes) { block_acct_done(blk_get_stats(s->blk), &req->acct); @@ -828,12 +835,12 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) { - VirtIOBlockReq *req = s->rq; + VirtIOBlockReq *req; MultiReqBuffer mrb = {}; - s->rq = NULL; - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + req = s->rq; + s->rq = NULL; while (req) { VirtIOBlockReq *next = req->next; if (virtio_blk_handle_request(req, &mrb)) { @@ -1138,8 +1145,16 @@ static void virtio_blk_resize(void *opaque) aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev); } +static void virtio_blk_retry_request(void *opaque) +{ + VirtIOBlock *s = VIRTIO_BLK(opaque); + + virtio_blk_process_queued_requests(s, false); +} + static const BlockDevOps virtio_block_ops = { .resize_cb = virtio_blk_resize, + .retry_request_cb = virtio_blk_retry_request, }; static void virtio_blk_device_realize(DeviceState *dev, Error **errp) -- Gitee