From 08801d190afd21f7d3db9a2cdce2b1528903ac2c Mon Sep 17 00:00:00 2001
From: liuxiangdong <liuxiangdong5@huawei.com>
Date: Tue, 8 Feb 2022 15:10:25 +0800
Subject: [PATCH 01/55] net/dump.c: Suppress spurious compiler warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Compiling with gcc version 11.2.0 (Ubuntu 11.2.0-13ubuntu1) results in
a (spurious) warning:

  In function ‘dump_receive_iov’,
      inlined from ‘filter_dump_receive_iov’ at ../net/dump.c:157:5:
  ../net/dump.c:89:9: error: ‘writev’ specified size 18446744073709551600
exceeds maximum object size 9223372036854775807 [-Werror=stringop-overflow=]
     89 |     if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) {
        |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  In file included from /home/ptomsich/qemu/include/qemu/osdep.h:108,
                   from ../net/dump.c:25:
  ../net/dump.c: In function ‘filter_dump_receive_iov’:
  /usr/include/x86_64-linux-gnu/sys/uio.h:52:16: note: in a call to function
‘writev’ declared with attribute ‘read_only (2, 3)’
     52 | extern ssize_t writev (int __fd, const struct iovec *__iovec, int
__count)
        |                ^~~~~~
  cc1: all warnings being treated as errors

This change helps that version of GCC to understand what is going on
and suppresses this warning.

Signed-off-by: Philipp Tomsich <philipp.toms...@vrull.eu>
---
 net/dump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dump.c b/net/dump.c
index a07ba62401..c32d3bf4e6 100644
--- a/net/dump.c
+++ b/net/dump.c
@@ -86,7 +86,7 @@ static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt)
     dumpiov[0].iov_len = sizeof(hdr);
     cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, 0, caplen);
 
-    if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) {
+    if (writev(s->fd, &dumpiov[0], cnt + 1) != sizeof(hdr) + caplen) {
         error_report("network dump write error - stopping dump");
         close(s->fd);
         s->fd = -1;
-- 
Gitee


From 4c3d47e04886e072acc0e4fefdc49e9d1f6b4ad1 Mon Sep 17 00:00:00 2001
From: Jiahui Cen <cenjiahui@huawei.com>
Date: Thu, 21 Jan 2021 15:46:45 +0800
Subject: [PATCH 02/55] qapi/block-core: Add retry option for error action

Add a new error action 'retry' to support retry on errors.

Signed-off-by: Jiahui Cen <cenjiahui(a)huawei.com>
Signed-off-by: Ying Fang <fangying1(a)huawei.com>
Signed-off-by: Alex Chen <alex.chen@huawei.com>
---
 blockdev.c           | 2 ++
 qapi/block-core.json | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index b35072644e..6f1981635b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -333,6 +333,8 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp)
         return BLOCKDEV_ON_ERROR_STOP;
     } else if (!strcmp(buf, "report")) {
         return BLOCKDEV_ON_ERROR_REPORT;
+    } else if (!strcmp(buf, "retry")) {
+        return BLOCKDEV_ON_ERROR_RETRY;
     } else {
         error_setg(errp, "'%s' invalid %s error action",
                    buf, is_read ? "read" : "write");
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1d3dd9cb48..804beabfb0 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1146,7 +1146,7 @@
 # Since: 1.3
 ##
 { 'enum': 'BlockdevOnError',
-  'data': ['report', 'ignore', 'enospc', 'stop', 'auto'] }
+  'data': ['report', 'ignore', 'enospc', 'stop', 'auto', 'retry'] }
 
 ##
 # @MirrorSyncMode:
@@ -4952,7 +4952,7 @@
 # Since: 2.1
 ##
 { 'enum': 'BlockErrorAction',
-  'data': [ 'ignore', 'report', 'stop' ] }
+  'data': [ 'ignore', 'report', 'stop', 'retry' ] }
 
 
 ##
-- 
Gitee


From 4dc180e87fb641f64fce7be3a0807488d0cc0a51 Mon Sep 17 00:00:00 2001
From: Jiahui Cen <cenjiahui@huawei.com>
Date: Thu, 21 Jan 2021 15:46:46 +0800
Subject: [PATCH 03/55] block-backend: Introduce retry timer

Add a timer to regularly trigger retry on errors.

Signed-off-by: Jiahui Cen <cenjiahui(a)huawei.com>
Signed-off-by: Ying Fang <fangying1(a)huawei.com>
Signed-off-by: Alex Chen <alex.chen@huawei.com>
---
 block/block-backend.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 12ef80ea17..257cd775c0 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -35,6 +35,9 @@
 
 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
 
+/* block backend default retry interval */
+#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL   1000
+
 typedef struct BlockBackendAioNotifier {
     void (*attached_aio_context)(AioContext *new_context, void *opaque);
     void (*detach_aio_context)(void *opaque);
@@ -95,6 +98,15 @@ struct BlockBackend {
      * Accessed with atomic ops.
      */
     unsigned int in_flight;
+
+    /* Timer for retry on errors. */
+    QEMUTimer *retry_timer;
+    /* Interval in ms to trigger next retry. */
+    int64_t retry_interval;
+    /* Start time of the first error. Used to check timeout. */
+    int64_t retry_start_time;
+    /* Retry timeout. 0 represents infinite retry. */
+    int64_t retry_timeout;
 };
 
 typedef struct BlockBackendAIOCB {
@@ -353,6 +365,11 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
     blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT;
     blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
 
+    blk->retry_timer = NULL;
+    blk->retry_interval = BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL;
+    blk->retry_start_time = 0;
+    blk->retry_timeout = 0;
+
     block_acct_init(&blk->stats);
 
     qemu_co_queue_init(&blk->queued_requests);
@@ -471,6 +488,10 @@ static void blk_delete(BlockBackend *blk)
     QTAILQ_REMOVE(&block_backends, blk, link);
     drive_info_del(blk->legacy_dinfo);
     block_acct_cleanup(&blk->stats);
+    if (blk->retry_timer) {
+        timer_del(blk->retry_timer);
+        timer_free(blk->retry_timer);
+    }
     g_free(blk);
 }
 
-- 
Gitee


From dfda8c57de71f2f10b57cf21b1e36f18d4ed37a3 Mon Sep 17 00:00:00 2001
From: Jiahui Cen <cenjiahui@huawei.com>
Date: Thu, 21 Jan 2021 15:46:47 +0800
Subject: [PATCH 04/55] block-backend: Add device specific retry callback

Add retry_request_cb in BlockDevOps to do device specific retry action.
Backend's timer would be registered only when the backend is set 'retry'
on errors and the device supports retry action.

Signed-off-by: Jiahui Cen <cenjiahui(a)huawei.com>
Signed-off-by: Ying Fang <fangying1(a)huawei.com>
Signed-off-by: Alex Chen <alex.chen@huawei.com>
---
 block/block-backend.c          | 8 ++++++++
 include/sysemu/block-backend.h | 4 ++++
 2 files changed, 12 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 257cd775c0..24003adf0b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1018,6 +1018,14 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
     blk->dev_ops = ops;
     blk->dev_opaque = opaque;
 
+    if ((blk->on_read_error == BLOCKDEV_ON_ERROR_RETRY ||
+         blk->on_write_error == BLOCKDEV_ON_ERROR_RETRY) &&
+        ops->retry_request_cb) {
+        blk->retry_timer = aio_timer_new(blk->ctx, QEMU_CLOCK_REALTIME,
+                                         SCALE_MS, ops->retry_request_cb,
+                                         opaque);
+    }
+
     /* Are we currently quiesced? Should we enforce this right now? */
     if (blk->quiesce_counter && ops->drained_begin) {
         ops->drained_begin(opaque);
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index e5e1524f06..a7a13d47de 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -70,6 +70,10 @@ typedef struct BlockDevOps {
      * Is the device still busy?
      */
     bool (*drained_poll)(void *opaque);
+    /*
+     * Runs when retrying failed requests.
+     */
+    void (*retry_request_cb)(void *opaque);
 } BlockDevOps;
 
 /* This struct is embedded in (the private) BlockBackend struct and contains
-- 
Gitee


From 2e1c75e5a0339d2bf417e5a4437d8e627a303286 Mon Sep 17 00:00:00 2001
From: Jiahui Cen <cenjiahui@huawei.com>
Date: Thu, 21 Jan 2021 15:46:48 +0800
Subject: [PATCH 05/55] block-backend: Enable retry action on errors

Enable retry action when backend's retry timer is available. It would
trigger the timer to do device specific retry action.

Signed-off-by: Jiahui Cen <cenjiahui(a)huawei.com>
Signed-off-by: Ying Fang <fangying1(a)huawei.com>
Signed-off-by: Alex Chen <alex.chen@huawei.com>
---
 block/block-backend.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 24003adf0b..5a016d32fa 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1793,6 +1793,9 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
         return BLOCK_ERROR_ACTION_REPORT;
     case BLOCKDEV_ON_ERROR_IGNORE:
         return BLOCK_ERROR_ACTION_IGNORE;
+    case BLOCKDEV_ON_ERROR_RETRY:
+        return (blk->retry_timer) ?
+               BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT;
     case BLOCKDEV_ON_ERROR_AUTO:
     default:
         abort();
@@ -1840,6 +1843,10 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action,
         qemu_system_vmstop_request_prepare();
         send_qmp_error_event(blk, action, is_read, error);
         qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
+    } else if (action == BLOCK_ERROR_ACTION_RETRY) {
+        timer_mod(blk->retry_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) +
+                                    blk->retry_interval);
+        send_qmp_error_event(blk, action, is_read, error);
     } else {
         send_qmp_error_event(blk, action, is_read, error);
     }
-- 
Gitee


From 953590f4854d75e6051237f668c9fb393235f471 Mon Sep 17 00:00:00 2001
From: Jiahui Cen <cenjiahui@huawei.com>
Date: Thu, 21 Jan 2021 15:46:49 +0800
Subject: [PATCH 06/55] block-backend: Add timeout support for retry

Retry should only be triggered when timeout is not reached, so let's check
timeout before retry. Device should also reset retry_start_time after
successful retry.

Signed-off-by: Jiahui Cen <cenjiahui(a)huawei.com>
Signed-off-by: Ying Fang <fangying1(a)huawei.com>
Signed-off-by: Alex Chen <alex.chen@huawei.com>
---
 block/block-backend.c          | 25 ++++++++++++++++++++++++-
 include/sysemu/block-backend.h |  1 +
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 5a016d32fa..37e21c473e 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1766,6 +1766,29 @@ void blk_drain_all(void)
     bdrv_drain_all_end();
 }
 
+static bool blk_error_retry_timeout(BlockBackend *blk)
+{
+    /* No timeout set, infinite retries. */
+    if (!blk->retry_timeout) {
+        return false;
+    }
+
+    /* The first time an error occurs. */
+    if (!blk->retry_start_time) {
+        blk->retry_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+        return false;
+    }
+
+    return qemu_clock_get_ms(QEMU_CLOCK_REALTIME) > (blk->retry_start_time +
+                                                     blk->retry_timeout);
+}
+
+void blk_error_retry_reset_timeout(BlockBackend *blk)
+{
+    if (blk->retry_timer && blk->retry_start_time)
+        blk->retry_start_time = 0;
+}
+
 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
                       BlockdevOnError on_write_error)
 {
@@ -1794,7 +1817,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
     case BLOCKDEV_ON_ERROR_IGNORE:
         return BLOCK_ERROR_ACTION_IGNORE;
     case BLOCKDEV_ON_ERROR_RETRY:
-        return (blk->retry_timer) ?
+        return (blk->retry_timer && !blk_error_retry_timeout(blk)) ?
                BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT;
     case BLOCKDEV_ON_ERROR_AUTO:
     default:
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index a7a13d47de..56a403883d 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -198,6 +198,7 @@ void blk_inc_in_flight(BlockBackend *blk);
 void blk_dec_in_flight(BlockBackend *blk);
 void blk_drain(BlockBackend *blk);
 void blk_drain_all(void);
+void blk_error_retry_reset_timeout(BlockBackend *blk);
 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
                       BlockdevOnError on_write_error);
 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
-- 
Gitee


From a58fda7b158441c645e143bf658d12914ffbc7b8 Mon Sep 17 00:00:00 2001
From: Jiahui Cen <cenjiahui@huawei.com>
Date: Thu, 21 Jan 2021 15:46:50 +0800
Subject: [PATCH 07/55] block: Add error retry param setting

Add "retry_interval" and "retry_timeout" parameter for drive and device
option. These parameter are valid only when werror/rerror=retry.

eg. --drive file=image,rerror=retry,retry_interval=1000,retry_timeout=5000

Signed-off-by: Jiahui Cen <cenjiahui(a)huawei.com>
Signed-off-by: Ying Fang <fangying1(a)huawei.com>
Signed-off-by: Alex Chen <alex.chen@huawei.com>
---
 block/block-backend.c          | 13 +++++++--
 blockdev.c                     | 50 ++++++++++++++++++++++++++++++++++
 hw/block/block.c               | 10 +++++++
 include/hw/block/block.h       |  7 ++++-
 include/sysemu/block-backend.h |  5 ++++
 5 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 37e21c473e..d3d90a95a5 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -35,9 +35,6 @@
 
 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
 
-/* block backend default retry interval */
-#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL   1000
-
 typedef struct BlockBackendAioNotifier {
     void (*attached_aio_context)(AioContext *new_context, void *opaque);
     void (*detach_aio_context)(void *opaque);
@@ -1766,6 +1763,16 @@ void blk_drain_all(void)
     bdrv_drain_all_end();
 }
 
+void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval)
+{
+    blk->retry_interval = interval;
+}
+
+void blk_set_on_error_retry_timeout(BlockBackend *blk, int64_t timeout)
+{
+    blk->retry_timeout = timeout;
+}
+
 static bool blk_error_retry_timeout(BlockBackend *blk)
 {
     /* No timeout set, infinite retries. */
diff --git a/blockdev.c b/blockdev.c
index 6f1981635b..10a73fa423 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -480,6 +480,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
     const char *buf;
     int bdrv_flags = 0;
     int on_read_error, on_write_error;
+    int64_t retry_interval, retry_timeout;
     bool account_invalid, account_failed;
     bool writethrough, read_only;
     BlockBackend *blk;
@@ -572,6 +573,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
         }
     }
 
+    retry_interval = qemu_opt_get_number(opts, "retry_interval",
+                                         BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL);
+    retry_timeout = qemu_opt_get_number(opts, "retry_timeout", 0);
+
     if (snapshot) {
         bdrv_flags |= BDRV_O_SNAPSHOT;
     }
@@ -635,6 +640,11 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
 
     blk_set_enable_write_cache(blk, !writethrough);
     blk_set_on_error(blk, on_read_error, on_write_error);
+    if (on_read_error == BLOCKDEV_ON_ERROR_RETRY ||
+        on_write_error == BLOCKDEV_ON_ERROR_RETRY) {
+        blk_set_on_error_retry_interval(blk, retry_interval);
+        blk_set_on_error_retry_timeout(blk, retry_timeout);
+    }
 
     if (!monitor_add_blk(blk, id, errp)) {
         blk_unref(blk);
@@ -761,6 +771,14 @@ QemuOptsList qemu_legacy_drive_opts = {
             .name = "werror",
             .type = QEMU_OPT_STRING,
             .help = "write error action",
+        },{
+            .name = "retry_interval",
+            .type = QEMU_OPT_NUMBER,
+            .help = "interval for retry action in millisecond",
+        },{
+            .name = "retry_timeout",
+            .type = QEMU_OPT_NUMBER,
+            .help = "timeout for retry action in millisecond",
         },{
             .name = "copy-on-read",
             .type = QEMU_OPT_BOOL,
@@ -783,6 +801,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
     BlockInterfaceType type;
     int max_devs, bus_id, unit_id, index;
     const char *werror, *rerror;
+    int64_t retry_interval, retry_timeout;
     bool read_only = false;
     bool copy_on_read;
     const char *filename;
@@ -990,6 +1009,29 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
         qdict_put_str(bs_opts, "rerror", rerror);
     }
 
+    if (qemu_opt_find(legacy_opts, "retry_interval")) {
+        if ((werror == NULL || strcmp(werror, "retry")) &&
+            (rerror == NULL || strcmp(rerror, "retry"))) {
+            error_setg(errp, "retry_interval is only supported "
+                             "by werror/rerror=retry");
+            goto fail;
+        }
+        retry_interval = qemu_opt_get_number(legacy_opts, "retry_interval",
+                             BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL);
+        qdict_put_int(bs_opts, "retry_interval", retry_interval);
+    }
+
+    if (qemu_opt_find(legacy_opts, "retry_timeout")) {
+        if ((werror == NULL || strcmp(werror, "retry")) &&
+            (rerror == NULL || strcmp(rerror, "retry"))) {
+            error_setg(errp, "retry_timeout is only supported "
+                             "by werror/rerror=retry");
+            goto fail;
+        }
+        retry_timeout = qemu_opt_get_number(legacy_opts, "retry_timeout", 0);
+        qdict_put_int(bs_opts, "retry_timeout", retry_timeout);
+    }
+
     /* Actual block device init: Functionality shared with blockdev-add */
     blk = blockdev_init(filename, bs_opts, errp);
     bs_opts = NULL;
@@ -3806,6 +3848,14 @@ QemuOptsList qemu_common_drive_opts = {
             .name = "werror",
             .type = QEMU_OPT_STRING,
             .help = "write error action",
+        },{
+            .name = "retry_interval",
+            .type = QEMU_OPT_NUMBER,
+            .help = "interval for retry action in millisecond",
+        },{
+            .name = "retry_timeout",
+            .type = QEMU_OPT_NUMBER,
+            .help = "timeout for retry action in millisecond",
         },{
             .name = BDRV_OPT_READ_ONLY,
             .type = QEMU_OPT_BOOL,
diff --git a/hw/block/block.c b/hw/block/block.c
index d47ebf005a..26c0767552 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -206,6 +206,16 @@ bool blkconf_apply_backend_options(BlockConf *conf, bool readonly,
     blk_set_enable_write_cache(blk, wce);
     blk_set_on_error(blk, rerror, werror);
 
+    if (rerror == BLOCKDEV_ON_ERROR_RETRY ||
+        werror == BLOCKDEV_ON_ERROR_RETRY) {
+        if (conf->retry_interval >= 0) {
+            blk_set_on_error_retry_interval(blk, conf->retry_interval);
+        }
+        if (conf->retry_timeout >= 0) {
+            blk_set_on_error_retry_timeout(blk, conf->retry_timeout);
+        }
+    }
+
     return true;
 }
 
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index 5902c0440a..24fb7d77af 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -33,6 +33,8 @@ typedef struct BlockConf {
     bool share_rw;
     BlockdevOnError rerror;
     BlockdevOnError werror;
+    int64_t retry_interval;
+    int64_t retry_timeout;
 } BlockConf;
 
 static inline unsigned int get_physical_block_exp(BlockConf *conf)
@@ -79,7 +81,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
     DEFINE_PROP_BLOCKDEV_ON_ERROR("rerror", _state, _conf.rerror,       \
                                   BLOCKDEV_ON_ERROR_AUTO),              \
     DEFINE_PROP_BLOCKDEV_ON_ERROR("werror", _state, _conf.werror,       \
-                                  BLOCKDEV_ON_ERROR_AUTO)
+                                  BLOCKDEV_ON_ERROR_AUTO),              \
+    DEFINE_PROP_INT64("retry_interval", _state, _conf.retry_interval,   \
+                      -1),                                              \
+    DEFINE_PROP_INT64("retry_timeout", _state, _conf.retry_timeout, -1)
 
 /* Backend access helpers */
 
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 56a403883d..887c19ff5d 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -25,6 +25,9 @@
  */
 #include "block/block.h"
 
+/* block backend default retry interval */
+#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL   1000
+
 /* Callbacks for block device models */
 typedef struct BlockDevOps {
     /*
@@ -198,6 +201,8 @@ void blk_inc_in_flight(BlockBackend *blk);
 void blk_dec_in_flight(BlockBackend *blk);
 void blk_drain(BlockBackend *blk);
 void blk_drain_all(void);
+void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval);
+void blk_set_on_error_retry_timeout(BlockBackend *blk, int64_t timeout);
 void blk_error_retry_reset_timeout(BlockBackend *blk);
 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
                       BlockdevOnError on_write_error);
-- 
Gitee


From a81122e37595fe1cc9eaa2adbbfccbfdf8f988b8 Mon Sep 17 00:00:00 2001
From: Jiahui Cen <cenjiahui@huawei.com>
Date: Thu, 21 Jan 2021 15:46:53 +0800
Subject: [PATCH 08/55] virtio_blk: Add support for retry on errors

Insert failed requests into device's list for later retry and handle
queued requests to implement retry_request_cb.

Signed-off-by: Jiahui Cen <cenjiahui(a)huawei.com>
Signed-off-by: Ying Fang <fangying1(a)huawei.com>
Signed-off-by: Alex Chen <alex.chen@huawei.com>
---
 hw/block/virtio-blk.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index f139cd7cc9..c8d94a3dfb 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -108,6 +108,10 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
             block_acct_failed(blk_get_stats(s->blk), &req->acct);
         }
         virtio_blk_free_request(req);
+    } else if (action == BLOCK_ERROR_ACTION_RETRY) {
+        req->mr_next = NULL;
+        req->next = s->rq;
+        s->rq = req;
     }
 
     blk_error_action(s->blk, action, is_read, error);
@@ -149,6 +153,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
             }
         }
 
+        blk_error_retry_reset_timeout(s->blk);
         virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
         block_acct_done(blk_get_stats(s->blk), &req->acct);
         virtio_blk_free_request(req);
@@ -168,6 +173,7 @@ static void virtio_blk_flush_complete(void *opaque, int ret)
         }
     }
 
+    blk_error_retry_reset_timeout(s->blk);
     virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
     block_acct_done(blk_get_stats(s->blk), &req->acct);
     virtio_blk_free_request(req);
@@ -190,6 +196,7 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret)
         }
     }
 
+    blk_error_retry_reset_timeout(s->blk);
     virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
     if (is_write_zeroes) {
         block_acct_done(blk_get_stats(s->blk), &req->acct);
@@ -828,12 +835,12 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 
 void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh)
 {
-    VirtIOBlockReq *req = s->rq;
+    VirtIOBlockReq *req;
     MultiReqBuffer mrb = {};
 
-    s->rq = NULL;
-
     aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
+    req = s->rq;
+    s->rq = NULL;
     while (req) {
         VirtIOBlockReq *next = req->next;
         if (virtio_blk_handle_request(req, &mrb)) {
@@ -1138,8 +1145,16 @@ static void virtio_blk_resize(void *opaque)
     aio_bh_schedule_oneshot(qemu_get_aio_context(), virtio_resize_cb, vdev);
 }
 
+static void virtio_blk_retry_request(void *opaque)
+{
+    VirtIOBlock *s = VIRTIO_BLK(opaque);
+
+    virtio_blk_process_queued_requests(s, false);
+}
+
 static const BlockDevOps virtio_block_ops = {
     .resize_cb = virtio_blk_resize,
+    .retry_request_cb = virtio_blk_retry_request,
 };
 
 static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
-- 
Gitee


From d41206e959717f68a31da4a2d875d33035baeb9f Mon Sep 17 00:00:00 2001
From: Chuan Zheng <zhengchuan@huawei.com>
Date: Mon, 29 Jul 2019 16:22:12 +0800
Subject: [PATCH 09/55] vhost: cancel migration when vhost-user restarted
 during migraiton

Qemu will abort when vhost-user process is restarted during migration
when vhost_log_global_start/stop is called. The reason is clear that
vhost_dev_set_log returns -1 because network connection is temporarily
lost. Let's cancel migraiton and report it to user in this abnormal
situation.

Signed-off-by: Ying Fang <fangying1@huawei.com>
Reviewed-by: Gonglei <arei.gonglei@huawei.com>
---
 hw/virtio/vhost.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 437347ad01..dafb23c481 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -25,6 +25,7 @@
 #include "hw/virtio/virtio-access.h"
 #include "migration/blocker.h"
 #include "migration/qemu-file-types.h"
+#include "migration/migration.h"
 #include "sysemu/dma.h"
 #include "sysemu/tcg.h"
 #include "trace.h"
@@ -947,20 +948,24 @@ check_dev_state:
 static void vhost_log_global_start(MemoryListener *listener)
 {
     int r;
+    Error *errp = NULL;
 
     r = vhost_migration_log(listener, true);
     if (r < 0) {
-        abort();
+        error_setg(&errp, "Failed to start vhost migration log");
+        migrate_fd_error(migrate_get_current(), errp);
     }
 }
 
 static void vhost_log_global_stop(MemoryListener *listener)
 {
     int r;
+    Error *errp = NULL;
 
     r = vhost_migration_log(listener, false);
     if (r < 0) {
-        abort();
+        error_setg(&errp, "Failed to stop vhost migration log");
+        migrate_fd_error(migrate_get_current(), errp);
     }
 }
 
-- 
Gitee


From 39d851b5d5517fbcecc8d16229ae72ca152899b7 Mon Sep 17 00:00:00 2001
From: Chuan Zheng <zhengchuan@huawei.com>
Date: Sat, 30 Jan 2021 14:57:54 +0800
Subject: [PATCH 10/55] migration: Add multi-thread compress method

A multi-thread compress method parameter is added to hold the method we
are going to use. By default the 'zlib' method is used to maintain the
compatibility as before.

Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
Signed-off-by: Zeyu Jin <jinzeyu@huawei.com>
Signed-off-by: Ying Fang <fangying1@huawei.com>
---
 hw/core/qdev-properties-system.c | 11 +++++++++++
 include/hw/qdev-properties.h     |  4 ++++
 migration/migration.c            | 11 +++++++++++
 monitor/hmp-cmds.c               | 13 +++++++++++++
 qapi/migration.json              | 26 +++++++++++++++++++++++++-
 5 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index a91f60567a..8c265bed6f 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -1119,3 +1119,14 @@ const PropertyInfo qdev_prop_uuid = {
     .set   = set_uuid,
     .set_default_value = set_default_uuid_auto,
 };
+
+/* --- CompressMethod --- */
+const PropertyInfo qdev_prop_compress_method = {
+    .name = "CompressMethod",
+    .description = "multi-thread compression method, "
+                   "zlib",
+    .enum_table = &CompressMethod_lookup,
+    .get = qdev_propinfo_get_enum,
+    .set = qdev_propinfo_set_enum,
+    .set_default_value = qdev_propinfo_set_default_value_enum,
+};
diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index f7925f67d0..ea129d65a6 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -58,6 +58,7 @@ extern const PropertyInfo qdev_prop_int64;
 extern const PropertyInfo qdev_prop_size;
 extern const PropertyInfo qdev_prop_string;
 extern const PropertyInfo qdev_prop_on_off_auto;
+extern const PropertyInfo qdev_prop_compress_method;
 extern const PropertyInfo qdev_prop_size32;
 extern const PropertyInfo qdev_prop_arraylen;
 extern const PropertyInfo qdev_prop_link;
@@ -161,6 +162,9 @@ extern const PropertyInfo qdev_prop_link;
     DEFINE_PROP(_n, _s, _f, qdev_prop_string, char*)
 #define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \
     DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_on_off_auto, OnOffAuto)
+#define DEFINE_PROP_COMPRESS_METHOD(_n, _s, _f, _d) \
+    DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_compress_method, \
+                        CompressMethod)
 #define DEFINE_PROP_SIZE32(_n, _s, _f, _d)                       \
     DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_size32, uint32_t)
 
diff --git a/migration/migration.c b/migration/migration.c
index abaf6f9e3d..fa3db87d75 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -83,6 +83,7 @@
 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
+#define DEFAULT_MIGRATE_COMPRESS_METHOD COMPRESS_METHOD_ZLIB
 /* Define default autoconverge cpu throttle migration parameters */
 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50
 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
@@ -855,6 +856,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
     params->compress_wait_thread = s->parameters.compress_wait_thread;
     params->has_decompress_threads = true;
     params->decompress_threads = s->parameters.decompress_threads;
+    params->has_compress_method = true;
+    params->compress_method = s->parameters.compress_method;
     params->has_throttle_trigger_threshold = true;
     params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold;
     params->has_cpu_throttle_initial = true;
@@ -1491,6 +1494,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
         dest->decompress_threads = params->decompress_threads;
     }
 
+    if (params->has_compress_method) {
+        dest->compress_method = params->compress_method;
+    }
+
     if (params->has_throttle_trigger_threshold) {
         dest->throttle_trigger_threshold = params->throttle_trigger_threshold;
     }
@@ -4159,6 +4166,9 @@ static Property migration_properties[] = {
     DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
                       parameters.decompress_threads,
                       DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
+    DEFINE_PROP_COMPRESS_METHOD("compress-method", MigrationState,
+                      parameters.compress_method,
+                      DEFAULT_MIGRATE_COMPRESS_METHOD),
     DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
                       parameters.throttle_trigger_threshold,
                       DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD),
@@ -4275,6 +4285,7 @@ static void migration_instance_init(Object *obj)
     params->has_compress_level = true;
     params->has_compress_threads = true;
     params->has_decompress_threads = true;
+    params->has_compress_method = true;
     params->has_throttle_trigger_threshold = true;
     params->has_cpu_throttle_initial = true;
     params->has_cpu_throttle_increment = true;
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 9c91bf93e9..294652034e 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -45,6 +45,7 @@
 #include "qapi/qapi-visit-net.h"
 #include "qapi/qapi-visit-migration.h"
 #include "qapi/qmp/qdict.h"
+#include "qapi/qapi-visit-migration.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/string-input-visitor.h"
 #include "qapi/string-output-visitor.h"
@@ -429,6 +430,9 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
             MigrationParameter_str(MIGRATION_PARAMETER_DECOMPRESS_THREADS),
             params->decompress_threads);
         assert(params->has_throttle_trigger_threshold);
+        monitor_printf(mon, "%s: %s\n",
+            MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_METHOD),
+            CompressMethod_str(params->compress_method));
         monitor_printf(mon, "%s: %u\n",
             MigrationParameter_str(MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD),
             params->throttle_trigger_threshold);
@@ -1191,6 +1195,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
     MigrateSetParameters *p = g_new0(MigrateSetParameters, 1);
     uint64_t valuebw = 0;
     uint64_t cache_size;
+    CompressMethod compress_method;
     Error *err = NULL;
     int val, ret;
 
@@ -1216,6 +1221,14 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
         p->has_decompress_threads = true;
         visit_type_uint8(v, param, &p->decompress_threads, &err);
         break;
+    case MIGRATION_PARAMETER_COMPRESS_METHOD:
+        p->has_compress_method = true;
+        visit_type_CompressMethod(v, param, &compress_method, &err);
+        if (err) {
+            break;
+        }
+        p->compress_method = compress_method;
+        break;
     case MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD:
         p->has_throttle_trigger_threshold = true;
         visit_type_uint8(v, param, &p->throttle_trigger_threshold, &err);
diff --git a/qapi/migration.json b/qapi/migration.json
index bbfd48cf0b..3a76907ea9 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -596,6 +596,19 @@
       'bitmaps': [ 'BitmapMigrationBitmapAlias' ]
   } }
 
+##
+# @CompressMethod:
+#
+# An enumeration of multi-thread compression methods.
+#
+# @zlib: use zlib compression method.
+#
+# Since: 5.0
+#
+##
+{ 'enum': 'CompressMethod',
+  'data': [ 'zlib' ] }
+
 ##
 # @MigrationParameter:
 #
@@ -632,6 +645,9 @@
 #                      compression, so set the decompress-threads to the number about 1/4
 #                      of compress-threads is adequate.
 #
+# @compress-method: Which multi-thread compression method to use.
+#                   Defaults to none. (Since 5.0)
+#
 # @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period
 #                              to trigger throttling. It is expressed as percentage.
 #                              The default value is 50. (Since 5.0)
@@ -758,7 +774,7 @@
   'data': ['announce-initial', 'announce-max',
            'announce-rounds', 'announce-step',
            'compress-level', 'compress-threads', 'decompress-threads',
-           'compress-wait-thread', 'throttle-trigger-threshold',
+           'compress-wait-thread', 'compress-method', 'throttle-trigger-threshold',
            'cpu-throttle-initial', 'cpu-throttle-increment',
            'cpu-throttle-tailslow',
            'tls-creds', 'tls-hostname', 'tls-authz', 'max-bandwidth',
@@ -797,6 +813,9 @@
 #
 # @decompress-threads: decompression thread count
 #
+# @compress-method: Set compression method to use in multi-thread compression.
+#                   Defaults to none. (Since 5.0)
+#
 # @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period
 #                              to trigger throttling. It is expressed as percentage.
 #                              The default value is 50. (Since 5.0)
@@ -930,6 +949,7 @@
             '*compress-threads': 'uint8',
             '*compress-wait-thread': 'bool',
             '*decompress-threads': 'uint8',
+            '*compress-method': 'CompressMethod',
             '*throttle-trigger-threshold': 'uint8',
             '*cpu-throttle-initial': 'uint8',
             '*cpu-throttle-increment': 'uint8',
@@ -995,6 +1015,9 @@
 #
 # @decompress-threads: decompression thread count
 #
+# @compress-method: Which multi-thread compression method to use.
+#                   Defaults to none. (Since 5.0)
+#
 # @throttle-trigger-threshold: The ratio of bytes_dirty_period and bytes_xfer_period
 #                              to trigger throttling. It is expressed as percentage.
 #                              The default value is 50. (Since 5.0)
@@ -1128,6 +1151,7 @@
             '*compress-threads': 'uint8',
             '*compress-wait-thread': 'bool',
             '*decompress-threads': 'uint8',
+            '*compress-method': 'CompressMethod',
             '*throttle-trigger-threshold': 'uint8',
             '*cpu-throttle-initial': 'uint8',
             '*cpu-throttle-increment': 'uint8',
-- 
Gitee


From b871594aa1798ddcc7f5124e5b3e1c5d858c155c Mon Sep 17 00:00:00 2001
From: Chuan Zheng <zhengchuan@huawei.com>
Date: Sat, 30 Jan 2021 15:21:17 +0800
Subject: [PATCH 11/55] migration: Refactoring multi-thread compress migration

Code refactor for the compression procedure which includes:

1. Move qemu_compress_data and qemu_put_compression_data from qemu-file.c to
ram.c, for the reason that most part of the code logical has nothing to do
with qemu-file. Besides, the decompression code is located at ram.c only.

2. Simplify the function input arguments for compression and decompression.
Wrap the input into the param structure which already exists. This change also
makes the function much more flexible for other compression methods.

Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
Signed-off-by: Zeyu Jin <jinzeyu@huawei.com>
Signed-off-by: Ying Fang <fangying1@huawei.com>
---
 migration/qemu-file.c | 61 ++++++-------------------------
 migration/qemu-file.h |  4 +-
 migration/ram.c       | 85 +++++++++++++++++++++++++++++++------------
 3 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 6338d8e2ff..e07026da4f 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -745,55 +745,6 @@ uint64_t qemu_get_be64(QEMUFile *f)
     return v;
 }
 
-/* return the size after compression, or negative value on error */
-static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
-                              const uint8_t *source, size_t source_len)
-{
-    int err;
-
-    err = deflateReset(stream);
-    if (err != Z_OK) {
-        return -1;
-    }
-
-    stream->avail_in = source_len;
-    stream->next_in = (uint8_t *)source;
-    stream->avail_out = dest_len;
-    stream->next_out = dest;
-
-    err = deflate(stream, Z_FINISH);
-    if (err != Z_STREAM_END) {
-        return -1;
-    }
-
-    return stream->next_out - dest;
-}
-
-/* Compress size bytes of data start at p and store the compressed
- * data to the buffer of f.
- *
- * Since the file is dummy file with empty_ops, return -1 if f has no space to
- * save the compressed data.
- */
-ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
-                                  const uint8_t *p, size_t size)
-{
-    ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t);
-
-    if (blen < compressBound(size)) {
-        return -1;
-    }
-
-    blen = qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int32_t),
-                              blen, p, size);
-    if (blen < 0) {
-        return -1;
-    }
-
-    qemu_put_be32(f, blen);
-    add_buf_to_iovec(f, blen);
-    return blen + sizeof(int32_t);
-}
 
 /* Put the data in the buffer of f_src to the buffer of f_des, and
  * then reset the buf_index of f_src to 0.
@@ -866,3 +817,15 @@ QIOChannel *qemu_file_get_ioc(QEMUFile *file)
 {
     return file->has_ioc ? QIO_CHANNEL(file->opaque) : NULL;
 }
+
+ssize_t qemu_put_compress_start(QEMUFile *f, uint8_t **dest_ptr)
+{
+    *dest_ptr = f->buf + f->buf_index + sizeof(int32_t);
+    return IO_BUF_SIZE - f->buf_index - sizeof(int32_t);
+}
+
+void qemu_put_compress_end(QEMUFile *f, unsigned int v)
+{
+    qemu_put_be32(f, v);
+    add_buf_to_iovec(f, v);
+}
diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index 3f36d4dc8c..617a1373ad 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -139,8 +139,6 @@ bool qemu_file_is_writable(QEMUFile *f);
 
 size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset);
 size_t qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size);
-ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream,
-                                  const uint8_t *p, size_t size);
 int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src);
 
 /*
@@ -167,6 +165,8 @@ void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
 void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
 void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
 
+ssize_t qemu_put_compress_start(QEMUFile *f, uint8_t **dest_ptr);
+void qemu_put_compress_end(QEMUFile *f, unsigned int v);
 /* Whenever this is found in the data stream, the flags
  * will be passed to ram_control_load_hook in the incoming-migration
  * side. This lets before_ram_iterate/after_ram_iterate add
diff --git a/migration/ram.c b/migration/ram.c
index 863035d235..1176816fba 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -449,26 +449,22 @@ static QemuThread *decompress_threads;
 static QemuMutex decomp_done_lock;
 static QemuCond decomp_done_cond;
 
-static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
-                                 ram_addr_t offset, uint8_t *source_buf);
+static bool do_compress_ram_page(CompressParam *param, RAMBlock *block);
 
 static void *do_data_compress(void *opaque)
 {
     CompressParam *param = opaque;
     RAMBlock *block;
-    ram_addr_t offset;
     bool zero_page;
 
     qemu_mutex_lock(&param->mutex);
     while (!param->quit) {
         if (param->block) {
             block = param->block;
-            offset = param->offset;
             param->block = NULL;
             qemu_mutex_unlock(&param->mutex);
 
-            zero_page = do_compress_ram_page(param->file, &param->stream,
-                                             block, offset, param->originbuf);
+            zero_page = do_compress_ram_page(param, block);
 
             qemu_mutex_lock(&comp_done_lock);
             param->done = true;
@@ -1342,28 +1338,73 @@ static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
     return 1;
 }
 
-static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
-                                 ram_addr_t offset, uint8_t *source_buf)
+/*
+ * Compress size bytes of data start at p and store the compressed
+ * data to the buffer of f.
+ *
+ * Since the file is dummy file with empty_ops, return -1 if f has no space to
+ * save the compressed data.
+ */
+static ssize_t qemu_put_compression_data(CompressParam *param, size_t size)
+{
+    int err;
+    uint8_t *dest = NULL;
+    z_stream *stream = &param->stream;
+    uint8_t *p = param->originbuf;
+    QEMUFile *f = f = param->file;
+    ssize_t blen = qemu_put_compress_start(f, &dest);
+
+    if (blen < compressBound(size)) {
+        return -1;
+    }
+
+    err = deflateReset(stream);
+    if (err != Z_OK) {
+        return -1;
+    }
+
+    stream->avail_in = size;
+    stream->next_in = p;
+    stream->avail_out = blen;
+    stream->next_out = dest;
+
+    err = deflate(stream, Z_FINISH);
+    if (err != Z_STREAM_END) {
+        return -1;
+    }
+
+    blen = stream->next_out - dest;
+    if (blen < 0) {
+        return -1;
+    }
+
+    qemu_put_compress_end(f, blen);
+    return blen + sizeof(int32_t);
+}
+
+static bool do_compress_ram_page(CompressParam *param, RAMBlock *block)
 {
     RAMState *rs = ram_state;
+    ram_addr_t offset = param->offset;
     uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
     bool zero_page = false;
     int ret;
 
-    if (save_zero_page_to_file(rs, f, block, offset)) {
+    if (save_zero_page_to_file(rs, param->file, block, offset)) {
         zero_page = true;
         goto exit;
     }
 
-    save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
+    save_page_header(rs, param->file, block,
+                         offset | RAM_SAVE_FLAG_COMPRESS_PAGE);
 
     /*
      * copy it to a internal buffer to avoid it being modified by VM
      * so that we can catch up the error during compression and
      * decompression
      */
-    memcpy(source_buf, p, TARGET_PAGE_SIZE);
-    ret = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
+    memcpy(param->originbuf, p, TARGET_PAGE_SIZE);
+    ret = qemu_put_compression_data(param, TARGET_PAGE_SIZE);
     if (ret < 0) {
         qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
         error_report("compressed data failed!");
@@ -3374,19 +3415,20 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
 
 /* return the size after decompression, or negative value on error */
 static int
-qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
-                     const uint8_t *source, size_t source_len)
+qemu_uncompress_data(DecompressParam *param, uint8_t *dest, size_t pagesize)
 {
     int err;
 
+    z_stream *stream = &param->stream;
+
     err = inflateReset(stream);
     if (err != Z_OK) {
         return -1;
     }
 
-    stream->avail_in = source_len;
-    stream->next_in = (uint8_t *)source;
-    stream->avail_out = dest_len;
+    stream->avail_in = param->len;
+    stream->next_in = param->compbuf;
+    stream->avail_out = pagesize;
     stream->next_out = dest;
 
     err = inflate(stream, Z_NO_FLUSH);
@@ -3400,22 +3442,17 @@ qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
 static void *do_data_decompress(void *opaque)
 {
     DecompressParam *param = opaque;
-    unsigned long pagesize;
     uint8_t *des;
-    int len, ret;
+    int ret;
 
     qemu_mutex_lock(&param->mutex);
     while (!param->quit) {
         if (param->des) {
             des = param->des;
-            len = param->len;
             param->des = 0;
             qemu_mutex_unlock(&param->mutex);
 
-            pagesize = TARGET_PAGE_SIZE;
-
-            ret = qemu_uncompress_data(&param->stream, des, pagesize,
-                                       param->compbuf, len);
+            ret = qemu_uncompress_data(param, des, TARGET_PAGE_SIZE);
             if (ret < 0 && migrate_get_current()->decompress_error_check) {
                 error_report("decompress data failed");
                 qemu_file_set_error(decomp_file, ret);
-- 
Gitee


From 5e4bc7ceaf81a4932c92e479e9add947b698395b Mon Sep 17 00:00:00 2001
From: Chuan Zheng <zhengchuan@huawei.com>
Date: Sat, 30 Jan 2021 15:57:31 +0800
Subject: [PATCH 12/55] migration: Add multi-thread compress ops

Add the MigrationCompressOps and MigrationDecompressOps structures to make
the compression method configurable for multi-thread compression migration.

Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
Signed-off-by: Zeyu Jin <jinzeyu@huawei.com>
Signed-off-by: Ying Fang <fangying1@huawei.com>
---
 migration/migration.c |   9 ++
 migration/migration.h |   1 +
 migration/ram.c       | 269 ++++++++++++++++++++++++++++++------------
 3 files changed, 201 insertions(+), 78 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index fa3db87d75..07dc059251 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2456,6 +2456,15 @@ int migrate_decompress_threads(void)
     return s->parameters.decompress_threads;
 }
 
+CompressMethod migrate_compress_method(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->parameters.compress_method;
+}
+
 bool migrate_dirty_bitmaps(void)
 {
     MigrationState *s;
diff --git a/migration/migration.h b/migration/migration.h
index 8130b703eb..4ed4f555da 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -355,6 +355,7 @@ int migrate_compress_level(void);
 int migrate_compress_threads(void);
 int migrate_compress_wait_thread(void);
 int migrate_decompress_threads(void);
+CompressMethod migrate_compress_method(void);
 bool migrate_use_events(void);
 bool migrate_postcopy_blocktime(void);
 bool migrate_background_snapshot(void);
diff --git a/migration/ram.c b/migration/ram.c
index 1176816fba..069560e7f9 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -417,6 +417,9 @@ struct CompressParam {
     /* internally used fields */
     z_stream stream;
     uint8_t *originbuf;
+
+    /* for zlib compression */
+    z_stream stream;
 };
 typedef struct CompressParam CompressParam;
 
@@ -428,12 +431,29 @@ struct DecompressParam {
     void *des;
     uint8_t *compbuf;
     int len;
+
+    /* for zlib compression */
     z_stream stream;
 };
 typedef struct DecompressParam DecompressParam;
 
+typedef struct {
+    int (*save_setup)(CompressParam *param);
+    void (*save_cleanup)(CompressParam *param);
+    ssize_t (*compress_data)(CompressParam *param, size_t size);
+} MigrationCompressOps;
+
+typedef struct {
+    int (*load_setup)(DecompressParam *param);
+    void (*load_cleanup)(DecompressParam *param);
+    int (*decompress_data)(DecompressParam *param, uint8_t *dest, size_t size);
+    int (*check_len)(int len);
+} MigrationDecompressOps;
+
 static CompressParam *comp_param;
 static QemuThread *compress_threads;
+static MigrationCompressOps *compress_ops;
+static MigrationDecompressOps *decompress_ops;
 /* comp_done_cond is used to wake up the migration thread when
  * one of the compression threads has finished the compression.
  * comp_done_lock is used to co-work with comp_done_cond.
@@ -451,6 +471,157 @@ static QemuCond decomp_done_cond;
 
 static bool do_compress_ram_page(CompressParam *param, RAMBlock *block);
 
+static int zlib_save_setup(CompressParam *param)
+{
+    if (deflateInit(&param->stream,
+                    migrate_compress_level()) != Z_OK) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static ssize_t zlib_compress_data(CompressParam *param, size_t size)
+
+    int err;
+    uint8_t *dest = NULL;
+    z_stream *stream = &param->stream;
+    uint8_t *p = param->originbuf;
+    QEMUFile *f = f = param->file;
+    ssize_t blen = qemu_put_compress_start(f, &dest);
+
+    if (blen < compressBound(size)) {
+       return -1;
+    }
+
+    err = deflateReset(stream);
+    if (err != Z_OK) {
+        return -1;
+    }
+
+    stream->avail_in = size;
+    stream->next_in = p;
+    stream->avail_out = blen;
+    stream->next_out = dest;
+
+    err = deflate(stream, Z_FINISH);
+    if (err != Z_STREAM_END) {
+        return -1;
+    }
+
+    blen = stream->next_out - dest;
+    if (blen < 0) {
+        return -1;
+    }
+
+    qemu_put_compress_end(f, blen);
+    return blen + sizeof(int32_t);
+}
+
+static void zlib_save_cleanup(CompressParam *param)
+{
+    deflateEnd(&param->stream);
+}
+
+static int zlib_load_setup(DecompressParam *param)
+{
+    if (inflateInit(&param->stream) != Z_OK) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static int
+zlib_decompress_data(DecompressParam *param, uint8_t *dest, size_t size)
+{
+    int err;
+
+    z_stream *stream = &param->stream;
+
+    err = inflateReset(stream);
+    if (err != Z_OK) {
+        return -1;
+    }
+
+    stream->avail_in = param->len;
+    stream->next_in = param->compbuf;
+    stream->avail_out = size;
+    stream->next_out = dest;
+
+    err = inflate(stream, Z_NO_FLUSH);
+    if (err != Z_STREAM_END) {
+        return -1;
+    }
+
+    return stream->total_out;
+}
+
+static void zlib_load_cleanup(DecompressParam *param)
+{
+    inflateEnd(&param->stream);
+}
+
+static int zlib_check_len(int len)
+{
+    return len < 0 || len > compressBound(TARGET_PAGE_SIZE);
+}
+
+static int set_compress_ops(void)
+{
+   compress_ops = g_new0(MigrationCompressOps, 1);
+
+    switch (migrate_compress_method()) {
+    case COMPRESS_METHOD_ZLIB:
+        compress_ops->save_setup = zlib_save_setup;
+        compress_ops->save_cleanup = zlib_save_cleanup;
+        compress_ops->compress_data = zlib_compress_data;
+        break;
+    default:
+        return -1;
+    }
+
+    return 0;
+}
+
+static int set_decompress_ops(void)
+{
+   decompress_ops = g_new0(MigrationDecompressOps, 1);
+
+    switch (migrate_compress_method()) {
+    case COMPRESS_METHOD_ZLIB:
+        decompress_ops->load_setup = zlib_load_setup;
+        decompress_ops->load_cleanup = zlib_load_cleanup;
+        decompress_ops->decompress_data = zlib_decompress_data;
+        decompress_ops->check_len = zlib_check_len;
+        break;
+    default:
+        return -1;
+   }
+
+   return 0;
+}
+
+static void clean_compress_ops(void)
+{
+    compress_ops->save_setup = NULL;
+    compress_ops->save_cleanup = NULL;
+    compress_ops->compress_data = NULL;
+
+    g_free(compress_ops);
+    compress_ops = NULL;
+}
+
+static void clean_decompress_ops(void)
+{
+    decompress_ops->load_setup = NULL;
+    decompress_ops->load_cleanup = NULL;
+    decompress_ops->decompress_data = NULL;
+
+    g_free(decompress_ops);
+    decompress_ops = NULL;
+}
+
 static void *do_data_compress(void *opaque)
 {
     CompressParam *param = opaque;
@@ -508,7 +679,7 @@ static void compress_threads_save_cleanup(void)
         qemu_thread_join(compress_threads + i);
         qemu_mutex_destroy(&comp_param[i].mutex);
         qemu_cond_destroy(&comp_param[i].cond);
-        deflateEnd(&comp_param[i].stream);
+        compress_ops->save_cleanup(&comp_param[i]);
         g_free(comp_param[i].originbuf);
         qemu_fclose(comp_param[i].file);
         comp_param[i].file = NULL;
@@ -519,6 +690,7 @@ static void compress_threads_save_cleanup(void)
     g_free(comp_param);
     compress_threads = NULL;
     comp_param = NULL;
+    clean_compress_ops();
 }
 
 static int compress_threads_save_setup(void)
@@ -528,6 +700,12 @@ static int compress_threads_save_setup(void)
     if (!migrate_use_compression()) {
         return 0;
     }
+
+    if (set_compress_ops() < 0) {
+        clean_compress_ops();
+        return -1;
+    }
+
     thread_count = migrate_compress_threads();
     compress_threads = g_new0(QemuThread, thread_count);
     comp_param = g_new0(CompressParam, thread_count);
@@ -539,8 +717,7 @@ static int compress_threads_save_setup(void)
             goto exit;
         }
 
-        if (deflateInit(&comp_param[i].stream,
-                        migrate_compress_level()) != Z_OK) {
+        if (compress_ops->save_setup(&comp_param[i]) < 0) {
             g_free(comp_param[i].originbuf);
             goto exit;
         }
@@ -1338,50 +1515,6 @@ static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
     return 1;
 }
 
-/*
- * Compress size bytes of data start at p and store the compressed
- * data to the buffer of f.
- *
- * Since the file is dummy file with empty_ops, return -1 if f has no space to
- * save the compressed data.
- */
-static ssize_t qemu_put_compression_data(CompressParam *param, size_t size)
-{
-    int err;
-    uint8_t *dest = NULL;
-    z_stream *stream = &param->stream;
-    uint8_t *p = param->originbuf;
-    QEMUFile *f = f = param->file;
-    ssize_t blen = qemu_put_compress_start(f, &dest);
-
-    if (blen < compressBound(size)) {
-        return -1;
-    }
-
-    err = deflateReset(stream);
-    if (err != Z_OK) {
-        return -1;
-    }
-
-    stream->avail_in = size;
-    stream->next_in = p;
-    stream->avail_out = blen;
-    stream->next_out = dest;
-
-    err = deflate(stream, Z_FINISH);
-    if (err != Z_STREAM_END) {
-        return -1;
-    }
-
-    blen = stream->next_out - dest;
-    if (blen < 0) {
-        return -1;
-    }
-
-    qemu_put_compress_end(f, blen);
-    return blen + sizeof(int32_t);
-}
-
 static bool do_compress_ram_page(CompressParam *param, RAMBlock *block)
 {
     RAMState *rs = ram_state;
@@ -1404,7 +1537,7 @@ static bool do_compress_ram_page(CompressParam *param, RAMBlock *block)
      * decompression
      */
     memcpy(param->originbuf, p, TARGET_PAGE_SIZE);
-    ret = qemu_put_compression_data(param, TARGET_PAGE_SIZE);
+    ret = compress_ops->compress_data(param, TARGET_PAGE_SIZE);
     if (ret < 0) {
         qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
         error_report("compressed data failed!");
@@ -3413,32 +3546,6 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
     }
 }
 
-/* return the size after decompression, or negative value on error */
-static int
-qemu_uncompress_data(DecompressParam *param, uint8_t *dest, size_t pagesize)
-{
-    int err;
-
-    z_stream *stream = &param->stream;
-
-    err = inflateReset(stream);
-    if (err != Z_OK) {
-        return -1;
-    }
-
-    stream->avail_in = param->len;
-    stream->next_in = param->compbuf;
-    stream->avail_out = pagesize;
-    stream->next_out = dest;
-
-    err = inflate(stream, Z_NO_FLUSH);
-    if (err != Z_STREAM_END) {
-        return -1;
-    }
-
-    return stream->total_out;
-}
-
 static void *do_data_decompress(void *opaque)
 {
     DecompressParam *param = opaque;
@@ -3452,7 +3559,7 @@ static void *do_data_decompress(void *opaque)
             param->des = 0;
             qemu_mutex_unlock(&param->mutex);
 
-            ret = qemu_uncompress_data(param, des, TARGET_PAGE_SIZE);
+            ret = decompress_ops->decompress_data(param, des, TARGET_PAGE_SIZE);
             if (ret < 0 && migrate_get_current()->decompress_error_check) {
                 error_report("decompress data failed");
                 qemu_file_set_error(decomp_file, ret);
@@ -3522,7 +3629,7 @@ static void compress_threads_load_cleanup(void)
         qemu_thread_join(decompress_threads + i);
         qemu_mutex_destroy(&decomp_param[i].mutex);
         qemu_cond_destroy(&decomp_param[i].cond);
-        inflateEnd(&decomp_param[i].stream);
+        decompress_ops->load_cleanup(&decomp_param[i]);
         g_free(decomp_param[i].compbuf);
         decomp_param[i].compbuf = NULL;
     }
@@ -3531,6 +3638,7 @@ static void compress_threads_load_cleanup(void)
     decompress_threads = NULL;
     decomp_param = NULL;
     decomp_file = NULL;
+    clean_decompress_ops();
 }
 
 static int compress_threads_load_setup(QEMUFile *f)
@@ -3541,6 +3649,11 @@ static int compress_threads_load_setup(QEMUFile *f)
         return 0;
     }
 
+    if (set_decompress_ops() < 0) {
+        clean_decompress_ops();
+        return -1;
+    }
+
     thread_count = migrate_decompress_threads();
     decompress_threads = g_new0(QemuThread, thread_count);
     decomp_param = g_new0(DecompressParam, thread_count);
@@ -3548,7 +3661,7 @@ static int compress_threads_load_setup(QEMUFile *f)
     qemu_cond_init(&decomp_done_cond);
     decomp_file = f;
     for (i = 0; i < thread_count; i++) {
-        if (inflateInit(&decomp_param[i].stream) != Z_OK) {
+        if (decompress_ops->load_setup(&decomp_param[i]) < 0) {
             goto exit;
         }
 
@@ -4156,7 +4269,7 @@ static int ram_load_precopy(QEMUFile *f)
 
         case RAM_SAVE_FLAG_COMPRESS_PAGE:
             len = qemu_get_be32(f);
-            if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
+            if (decompress_ops->check_len(len)) {
                 error_report("Invalid compressed data length: %d", len);
                 ret = -EINVAL;
                 break;
-- 
Gitee


From bafba05f7405ba31213120d99679cc4b6c1be68e Mon Sep 17 00:00:00 2001
From: Chuan Zheng <zhengchuan@huawei.com>
Date: Sat, 30 Jan 2021 16:15:10 +0800
Subject: [PATCH 13/55] migration: Add zstd support in multi-thread compression

This patch enables zstd option in multi-thread compression.

Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
Signed-off-by: Zeyu Jin <jinzeyu@huawei.com>
Signed-off-by: Ying Fang <fangying1@huawei.com>
---
 hw/core/qdev-properties-system.c |   2 +-
 migration/ram.c                  | 131 ++++++++++++++++++++++++++++++-
 qapi/migration.json              |   3 +-
 3 files changed, 132 insertions(+), 4 deletions(-)

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 8c265bed6f..6a6ff03be7 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -1124,7 +1124,7 @@ const PropertyInfo qdev_prop_uuid = {
 const PropertyInfo qdev_prop_compress_method = {
     .name = "CompressMethod",
     .description = "multi-thread compression method, "
-                   "zlib",
+                   "zlib/zstd",
     .enum_table = &CompressMethod_lookup,
     .get = qdev_propinfo_get_enum,
     .set = qdev_propinfo_set_enum,
diff --git a/migration/ram.c b/migration/ram.c
index 069560e7f9..c3484ee1a9 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -62,6 +62,11 @@
 #include "qemu/userfaultfd.h"
 #endif /* defined(__linux__) */
 
+#ifdef CONFIG_ZSTD
+#include <zstd.h>
+#include <zstd_errors.h>
+#endif
+
 /***********************************************************/
 /* ram save/restore */
 
@@ -415,11 +420,16 @@ struct CompressParam {
     ram_addr_t offset;
 
     /* internally used fields */
-    z_stream stream;
     uint8_t *originbuf;
 
     /* for zlib compression */
     z_stream stream;
+
+#ifdef CONFIG_ZSTD
+    ZSTD_CStream *zstd_cs;
+    ZSTD_inBuffer in;
+    ZSTD_outBuffer out;
+#endif
 };
 typedef struct CompressParam CompressParam;
 
@@ -434,6 +444,11 @@ struct DecompressParam {
 
     /* for zlib compression */
     z_stream stream;
+#ifdef CONFIG_ZSTD
+    ZSTD_DStream *zstd_ds;
+    ZSTD_inBuffer in;
+    ZSTD_outBuffer out;
+#endif
 };
 typedef struct DecompressParam DecompressParam;
 
@@ -482,7 +497,7 @@ static int zlib_save_setup(CompressParam *param)
 }
 
 static ssize_t zlib_compress_data(CompressParam *param, size_t size)
-
+{
     int err;
     uint8_t *dest = NULL;
     z_stream *stream = &param->stream;
@@ -567,6 +582,103 @@ static int zlib_check_len(int len)
     return len < 0 || len > compressBound(TARGET_PAGE_SIZE);
 }
 
+#ifdef CONFIG_ZSTD
+static int zstd_save_setup(CompressParam *param)
+{
+    int res;
+    param->zstd_cs = ZSTD_createCStream();
+    if (!param->zstd_cs) {
+        return -1;
+    }
+    res = ZSTD_initCStream(param->zstd_cs, migrate_compress_level());
+    if (ZSTD_isError(res)) {
+        return -1;
+    }
+    return 0;
+}
+static void zstd_save_cleanup(CompressParam *param)
+{
+    ZSTD_freeCStream(param->zstd_cs);
+    param->zstd_cs = NULL;
+}
+static ssize_t zstd_compress_data(CompressParam *param, size_t size)
+{
+    int ret;
+    uint8_t *dest = NULL;
+    uint8_t *p = param->originbuf;
+    QEMUFile *f = f = param->file;
+    ssize_t blen = qemu_put_compress_start(f, &dest);
+    if (blen < ZSTD_compressBound(size)) {
+        return -1;
+    }
+    param->out.dst = dest;
+    param->out.size = blen;
+    param->out.pos = 0;
+    param->in.src = p;
+    param->in.size = size;
+    param->in.pos = 0;
+    do {
+        ret = ZSTD_compressStream2(param->zstd_cs, &param->out,
+                                   &param->in, ZSTD_e_end);
+    } while (ret > 0 && (param->in.size - param->in.pos > 0)
+            && (param->out.size - param->out.pos > 0));
+    if (ret > 0 && (param->in.size - param->in.pos > 0)) {
+        return -1;
+    }
+    if (ZSTD_isError(ret)) {
+        return -1;
+    }
+    blen = param->out.pos;
+    qemu_put_compress_end(f, blen);
+    return blen + sizeof(int32_t);
+}
+
+static int zstd_load_setup(DecompressParam *param)
+{
+    int ret;
+    param->zstd_ds = ZSTD_createDStream();
+    if (!param->zstd_ds) {
+        return -1;
+    }
+    ret = ZSTD_initDStream(param->zstd_ds);
+    if (ZSTD_isError(ret)) {
+        return -1;
+    }
+    return 0;
+}
+static void zstd_load_cleanup(DecompressParam *param)
+{
+    ZSTD_freeDStream(param->zstd_ds);
+    param->zstd_ds = NULL;
+}
+static int
+zstd_decompress_data(DecompressParam *param, uint8_t *dest, size_t size)
+{
+    int ret;
+    param->out.dst = dest;
+    param->out.size = size;
+    param->out.pos = 0;
+    param->in.src = param->compbuf;
+    param->in.size = param->len;
+    param->in.pos = 0;
+    do {
+        ret = ZSTD_decompressStream(param->zstd_ds, &param->out, &param->in);
+    } while (ret > 0 && (param->in.size - param->in.pos > 0)
+                    && (param->out.size - param->out.pos > 0));
+    if (ret > 0 && (param->in.size - param->in.pos > 0)) {
+        return -1;
+    }
+    if (ZSTD_isError(ret)) {
+        return -1;
+    }
+    return ret;
+}
+static int zstd_check_len(int len)
+{
+    return len < 0 || len > ZSTD_compressBound(TARGET_PAGE_SIZE);
+}
+#endif
+
 static int set_compress_ops(void)
 {
    compress_ops = g_new0(MigrationCompressOps, 1);
@@ -577,6 +689,13 @@ static int set_compress_ops(void)
         compress_ops->save_cleanup = zlib_save_cleanup;
         compress_ops->compress_data = zlib_compress_data;
         break;
+#ifdef CONFIG_ZSTD
+    case COMPRESS_METHOD_ZSTD:
+        compress_ops->save_setup = zstd_save_setup;
+        compress_ops->save_cleanup = zstd_save_cleanup;
+        compress_ops->compress_data = zstd_compress_data;
+        break;
+#endif
     default:
         return -1;
     }
@@ -595,6 +714,14 @@ static int set_decompress_ops(void)
         decompress_ops->decompress_data = zlib_decompress_data;
         decompress_ops->check_len = zlib_check_len;
         break;
+#ifdef CONFIG_ZSTD
+    case COMPRESS_METHOD_ZSTD:
+        decompress_ops->load_setup = zstd_load_setup;
+        decompress_ops->load_cleanup = zstd_load_cleanup;
+        decompress_ops->decompress_data = zstd_decompress_data;
+        decompress_ops->check_len = zstd_check_len;
+        break;
+#endif
     default:
         return -1;
    }
diff --git a/qapi/migration.json b/qapi/migration.json
index 3a76907ea9..d4ebc5f028 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -602,12 +602,13 @@
 # An enumeration of multi-thread compression methods.
 #
 # @zlib: use zlib compression method.
+# @zstd: use zstd compression method.
 #
 # Since: 5.0
 #
 ##
 { 'enum': 'CompressMethod',
-  'data': [ 'zlib' ] }
+  'data': [ 'zlib', { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] }
 
 ##
 # @MigrationParameter:
-- 
Gitee


From 84780210ac31e430d59b0c6d3d9f522c626b6380 Mon Sep 17 00:00:00 2001
From: Chuan Zheng <zhengchuan@huawei.com>
Date: Sat, 30 Jan 2021 16:23:15 +0800
Subject: [PATCH 14/55] migration: Add compress_level sanity check

Zlib compression has level from 1 to 9. However Zstd compression has level
from 1 to 22 (level >= 20 not recommanded). Let's do sanity check here
to make sure a vaild compress_level is given by user.

Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
Signed-off-by: Zeyu Jin <jinzeyu@huawei.com>
Signed-off-by: Ying Fang <fangying1@huawei.com>
---
 migration/migration.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 07dc059251..f86dd8cccd 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1320,14 +1320,40 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
     }
 }
 
+static bool compress_level_check(MigrationParameters *params, Error **errp)
+{
+    switch (params->compress_method) {
+    case COMPRESS_METHOD_ZLIB:
+        if (params->compress_level > 9 || params->compress_level < 1) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
+                           "a value in the range of 0 to 9 for Zlib method");
+            return false;
+        }
+        break;
+#ifdef CONFIG_ZSTD
+    case COMPRESS_METHOD_ZSTD:
+        if (params->compress_level > 19 || params->compress_level < 1) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
+                        "a value in the range of 1 to 19 for Zstd method");
+            return false;
+        }
+        break;
+#endif
+    default:
+        error_setg(errp, "Checking compress_level failed for unknown reason");
+        return false;
+    }
+
+    return true;
+}
+
 /*
  * Check whether the parameters are valid. Error will be put into errp
  * (if provided). Return true if valid, otherwise false.
  */
 static bool migrate_params_check(MigrationParameters *params, Error **errp)
 {
-    if (params->has_compress_level &&
-        (params->compress_level > 9)) {
+    if (params->has_compress_level && !compress_level_check(params, errp)) {
         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
                    "a value between 0 and 9");
         return false;
-- 
Gitee


From 213bd45d2c5337f10216c69c13f0438dd40c58d8 Mon Sep 17 00:00:00 2001
From: Chuan Zheng <zhengchuan@huawei.com>
Date: Sat, 30 Jan 2021 16:36:47 +0800
Subject: [PATCH 15/55] doc: Update multi-thread compression doc

Modify the doc to fit the previous changes.

Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
Signed-off-by: Zeyu Jin <jinzeyu@huawei.com>
Signed-off-by: Ying Fang <fangying1@huawei.com>
---
 docs/multi-thread-compression.txt | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/docs/multi-thread-compression.txt b/docs/multi-thread-compression.txt
index bb88c6bdf1..d429963cb0 100644
--- a/docs/multi-thread-compression.txt
+++ b/docs/multi-thread-compression.txt
@@ -33,14 +33,15 @@ thread compression can be used to accelerate the compression process.
 
 The decompression speed of Zlib is at least 4 times as quick as
 compression, if the source and destination CPU have equal speed,
-keeping the compression thread count 4 times the decompression
-thread count can avoid resource waste.
+and you choose Zlib as compression method, keeping the compression
+thread count 4 times the decompression thread count can avoid resource waste.
 
 Compression level can be used to control the compression speed and the
-compression ratio. High compression ratio will take more time, level 0
-stands for no compression, level 1 stands for the best compression
-speed, and level 9 stands for the best compression ratio. Users can
-select a level number between 0 and 9.
+compression ratio. High compression ratio will take more time,
+level 1 stands for the best compression speed, and higher level means higher
+compression ration. For Zlib, users can select a level number between 0 and 9,
+where level 0 stands for no compression. For Zstd, users can select a
+level number between 1 and 22.
 
 
 When to use the multiple thread compression in live migration
@@ -116,16 +117,19 @@ to support the multiple thread compression migration:
 2. Activate compression on the source:
     {qemu} migrate_set_capability compress on
 
-3. Set the compression thread count on source:
+3. Set the compression method:
+    {qemu} migrate_set_parameter compress_method zstd
+
+4. Set the compression thread count on source:
     {qemu} migrate_set_parameter compress_threads 12
 
-4. Set the compression level on the source:
+5. Set the compression level on the source:
     {qemu} migrate_set_parameter compress_level 1
 
-5. Set the decompression thread count on destination:
+6. Set the decompression thread count on destination:
     {qemu} migrate_set_parameter decompress_threads 3
 
-6. Start outgoing migration:
+7. Start outgoing migration:
     {qemu} migrate -d tcp:destination.host:4444
     {qemu} info migrate
     Capabilities: ... compress: on
@@ -136,6 +140,7 @@ The following are the default settings:
     compress_threads: 8
     decompress_threads: 2
     compress_level: 1 (which means best speed)
+    compress_method: zlib
 
 So, only the first two steps are required to use the multiple
 thread compression in migration. You can do more if the default
@@ -143,7 +148,7 @@ settings are not appropriate.
 
 TODO
 ====
-Some faster (de)compression method such as LZ4 and Quicklz can help
-to reduce the CPU consumption when doing (de)compression. If using
-these faster (de)compression method, less (de)compression threads
+Comparing to Zlib, Some faster (de)compression method such as LZ4
+and Quicklz can help to reduce the CPU consumption when doing (de)compression.
+If using these faster (de)compression method, less (de)compression threads
 are needed when doing the migration.
-- 
Gitee


From ef83cde8dd2c9b404527354489b14d2bd238733d Mon Sep 17 00:00:00 2001
From: Xu Yandong <xuyandong2@huawei.com>
Date: Tue, 8 Feb 2022 20:48:17 +0800
Subject: [PATCH 16/55] cpu: parse +/- feature to avoid failure

To avoid cpu feature parse failure, +/- feature is added.

Signed-off-by: Xu Yandong <xuyandong2@huawei.com>
Signed-off-by: Mingwang Li <limingwang@huawei.com>
---
 target/arm/cpu64.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 15245a60a8..019edc66c9 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -933,10 +933,47 @@ static gchar *aarch64_gdb_arch_name(CPUState *cs)
     return g_strdup("aarch64");
 }
 
+/* Parse "+feature,-feature,feature=foo" CPU feature string
+ */
+static void arm_cpu_parse_featurestr(const char *typename, char *features,
+                                     Error **errp )
+{
+    char *featurestr;
+    char *val;
+    static bool cpu_globals_initialized;
+
+    if (cpu_globals_initialized) {
+        return;
+    }
+    cpu_globals_initialized = true;
+
+    featurestr = features ? strtok(features, ",") : NULL;
+    while (featurestr) {
+        val = strchr(featurestr, '=');
+        if (val) {
+            GlobalProperty *prop = g_new0(typeof(*prop), 1);
+            *val = 0;
+            val++;
+            prop->driver = typename;
+            prop->property = g_strdup(featurestr);
+            prop->value = g_strdup(val);
+            qdev_prop_register_global(prop);
+        } else if (featurestr[0] == '+' || featurestr[0] == '-') {
+            warn_report("Ignore %s feature\n", featurestr);
+        } else {
+            error_setg(errp, "Expected key=value format, found %s.",
+                       featurestr);
+            return;
+        }
+        featurestr = strtok(NULL, ",");
+    }
+}
+
 static void aarch64_cpu_class_init(ObjectClass *oc, void *data)
 {
     CPUClass *cc = CPU_CLASS(oc);
 
+    cc->parse_features = arm_cpu_parse_featurestr;
     cc->gdb_read_register = aarch64_cpu_gdb_read_register;
     cc->gdb_write_register = aarch64_cpu_gdb_write_register;
     cc->gdb_num_core_regs = 34;
-- 
Gitee


From 8ebab06c4824626ab4d7204133cd1e7b9c67f468 Mon Sep 17 00:00:00 2001
From: Xu Yandong <xuyandong2@huawei.com>
Date: Tue, 8 Feb 2022 21:36:22 +0800
Subject: [PATCH 17/55] cpu: add Kunpeng-920 cpu support

Add the Kunpeng-920 CPU model

Signed-off-by: Xu Yandong <xuyandong2@huawei.com>
Signed-off-by: Mingwang Li <limingwang@huawei.com>
---
 hw/arm/virt.c      |  1 +
 target/arm/cpu64.c | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 30da05dfe0..a4a35584e9 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -201,6 +201,7 @@ static const char *valid_cpus[] = {
     ARM_CPU_TYPE_NAME("cortex-a53"),
     ARM_CPU_TYPE_NAME("cortex-a57"),
     ARM_CPU_TYPE_NAME("cortex-a72"),
+    ARM_CPU_TYPE_NAME("Kunpeng-920"),
     ARM_CPU_TYPE_NAME("a64fx"),
     ARM_CPU_TYPE_NAME("host"),
     ARM_CPU_TYPE_NAME("max"),
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 019edc66c9..aaca79f7c3 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -248,6 +248,26 @@ static void aarch64_a72_initfn(Object *obj)
     define_arm_cp_regs(cpu, cortex_a72_a57_a53_cp_reginfo);
 }
 
+static void aarch64_kunpeng_920_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    /*
+     * Hisilicon Kunpeng-920 CPU is similar to cortex-a72,
+     * so first initialize cpu data as cortex-a72,
+     * and then update the special register.
+     */
+    aarch64_a72_initfn(obj);
+
+    cpu->midr = 0x480fd010;
+    cpu->ctr = 0x84448004;
+    cpu->isar.id_aa64pfr0 = 0x11001111;
+    cpu->isar.id_aa64dfr0 = 0x110305408;
+    cpu->isar.id_aa64isar0 = 0x10211120;
+    cpu->isar.id_aa64mmfr0 = 0x101125;
+    cpu->kvm_target = KVM_ARM_TARGET_GENERIC_V8;
+}
+
 void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
 {
     /*
@@ -892,6 +912,7 @@ static const ARMCPUInfo aarch64_cpus[] = {
     { .name = "cortex-a57",         .initfn = aarch64_a57_initfn },
     { .name = "cortex-a53",         .initfn = aarch64_a53_initfn },
     { .name = "cortex-a72",         .initfn = aarch64_a72_initfn },
+    { .name = "Kunpeng-920",        .initfn = aarch64_kunpeng_920_initfn},
     { .name = "a64fx",              .initfn = aarch64_a64fx_initfn },
     { .name = "max",                .initfn = aarch64_max_initfn },
 };
-- 
Gitee


From f0da7fa5230b5f771570b2c12288e4a56a20dd97 Mon Sep 17 00:00:00 2001
From: Xu Yandong <xuyandong2@huawei.com>
Date: Tue, 8 Feb 2022 22:18:55 +0800
Subject: [PATCH 18/55] cpu: add Cortex-A72 processor kvm target support

The ARM Cortex-A72 is ARMv8-A micro-architecture,
add kvm target to ARM Cortex-A72 processor definition.

Signed-off-by: Xu Yandong <xuyandong2@huawei.com>
Signed-off-by: Mingwang Li <limingwang@huawei.com>
---
 target/arm/cpu64.c      | 1 +
 target/arm/kvm-consts.h | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index aaca79f7c3..556b6f3691 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -202,6 +202,7 @@ static void aarch64_a72_initfn(Object *obj)
     ARMCPU *cpu = ARM_CPU(obj);
 
     cpu->dtb_compatible = "arm,cortex-a72";
+    cpu->kvm_target = QEMU_KVM_ARM_TARGET_GENERIC_V8;
     set_feature(&cpu->env, ARM_FEATURE_V8);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
     set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h
index 580f1c1fee..5f1311ade7 100644
--- a/target/arm/kvm-consts.h
+++ b/target/arm/kvm-consts.h
@@ -130,6 +130,8 @@ MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED);
 #define QEMU_KVM_ARM_TARGET_CORTEX_A57 2
 #define QEMU_KVM_ARM_TARGET_XGENE_POTENZA 3
 #define QEMU_KVM_ARM_TARGET_CORTEX_A53 4
+/* Generic ARM v8 target */
+#define QEMU_KVM_ARM_TARGET_GENERIC_V8 5
 
 /* There's no kernel define for this: sentinel value which
  * matches no KVM target value for either 64 or 32 bit
@@ -141,6 +143,7 @@ MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8);
 MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A57, KVM_ARM_TARGET_CORTEX_A57);
 MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_XGENE_POTENZA, KVM_ARM_TARGET_XGENE_POTENZA);
 MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A53, KVM_ARM_TARGET_CORTEX_A53);
+MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_GENERIC_V8, KVM_ARM_TARGET_GENERIC_V8);
 
 #define CP_REG_ARM64                   0x6000000000000000ULL
 #define CP_REG_ARM_COPROC_MASK         0x000000000FFF0000
-- 
Gitee


From ec35c96006851a956a7e401f29af0ffe137c4bb9 Mon Sep 17 00:00:00 2001
From: Jiadong Zeng <zengjiadong@phytium.com.cn>
Date: Tue, 8 Feb 2022 22:56:37 +0800
Subject: [PATCH 19/55] add Phytium's CPU models: FT-2000+ and Tengyun-S2500.

Signed-off-by: Jiadong Zeng <zengjiadong@phytium.com.cn>
Signed-off-by: Mingwang Li <limingwang@huawei.com>
---
 hw/arm/virt.c      |  2 ++
 target/arm/cpu64.c | 28 ++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a4a35584e9..3c972fdab0 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -202,6 +202,8 @@ static const char *valid_cpus[] = {
     ARM_CPU_TYPE_NAME("cortex-a57"),
     ARM_CPU_TYPE_NAME("cortex-a72"),
     ARM_CPU_TYPE_NAME("Kunpeng-920"),
+    ARM_CPU_TYPE_NAME("FT-2000+"),
+    ARM_CPU_TYPE_NAME("Tengyun-S2500"),
     ARM_CPU_TYPE_NAME("a64fx"),
     ARM_CPU_TYPE_NAME("host"),
     ARM_CPU_TYPE_NAME("max"),
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 556b6f3691..08d886de7b 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -676,6 +676,32 @@ static Property arm_cpu_pauth_property =
 static Property arm_cpu_pauth_impdef_property =
     DEFINE_PROP_BOOL("pauth-impdef", ARMCPU, prop_pauth_impdef, false);
 
+static void aarch64_max_ft2000plus_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    if (kvm_enabled()) {
+        kvm_arm_set_cpu_features_from_host(cpu);
+        kvm_arm_add_vcpu_properties(obj);
+    } else {
+        aarch64_a72_initfn(obj);
+        cpu->midr = 0x70186622;
+    }
+}
+
+static void aarch64_max_tengyun_s2500_initfn(Object *obj)
+{
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    if (kvm_enabled()) {
+        kvm_arm_set_cpu_features_from_host(cpu);
+        kvm_arm_add_vcpu_properties(obj);
+    } else {
+        aarch64_a72_initfn(obj);
+        cpu->midr = 0x70186632;
+    }
+}
+
 /* -cpu max: if KVM is enabled, like -cpu host (best possible with this host);
  * otherwise, a CPU with as many features enabled as our emulation supports.
  * The version of '-cpu max' for qemu-system-arm is defined in cpu.c;
@@ -914,6 +940,8 @@ static const ARMCPUInfo aarch64_cpus[] = {
     { .name = "cortex-a53",         .initfn = aarch64_a53_initfn },
     { .name = "cortex-a72",         .initfn = aarch64_a72_initfn },
     { .name = "Kunpeng-920",        .initfn = aarch64_kunpeng_920_initfn},
+    { .name = "FT-2000+",           .initfn = aarch64_max_ft2000plus_initfn },
+    { .name = "Tengyun-S2500",      .initfn = aarch64_max_tengyun_s2500_initfn },
     { .name = "a64fx",              .initfn = aarch64_a64fx_initfn },
     { .name = "max",                .initfn = aarch64_max_initfn },
 };
-- 
Gitee


From 3cb1b0ce091998532a30793e3272925da4e6f3aa Mon Sep 17 00:00:00 2001
From: Jiajie Li <lijiajie11@huawei.com>
Date: Mon, 7 Feb 2022 14:31:34 +0800
Subject: [PATCH 20/55] hugepages: hugepages files maybe leftover

Before qemu uses the hugepage memory directory /dev/hugepages/libvirt/qemu/xxx,
The directory may be deleted because of the destroy virtual machine.
Cause qemu to create files directly under /dev/hugepages/libvirt/qemu/.
After the file is created, the file is not cleaned up by unlink,
and when the virtual machine is destroyed, libvirt will only clean up
/dev/hugepages/libvirt/qemu/xxx directory. After creating the hugepage file,
execute unlink to clean up the file to fix the problem.

Signed-off-by: Jinhua Cao <caojinhua1@huawei.com>
Signed-off-by: Jiajie Li <lijiajie11@huawei.com>
---
 include/qemu/mmap-alloc.h |  3 +++
 softmmu/physmem.c         | 10 +++++++++-
 util/mmap-alloc.c         | 22 ++++++++++++++++++++++
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
index 90d0eee705..707202e5be 100644
--- a/include/qemu/mmap-alloc.h
+++ b/include/qemu/mmap-alloc.h
@@ -1,6 +1,9 @@
 #ifndef QEMU_MMAP_ALLOC_H
 #define QEMU_MMAP_ALLOC_H
 
+#define HUGETLBFS_MAGIC       0x958458f6
+
+size_t qemu_fd_getfiletype(int fd);
 
 size_t qemu_fd_getpagesize(int fd);
 
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 3524c04c2a..3b9a61448c 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -1496,7 +1496,14 @@ static int file_ram_open(const char *path,
             /* @path names a file that doesn't exist, create it */
             fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
             if (fd >= 0) {
-                *created = true;
+                info_report("open %s success \n", path);
+                /* if fd file type is HUGETLBFS_MAGIC, unlink it, */
+                /* in case to prevent residue after qemu killed */
+                if (qemu_fd_getfiletype(fd) == HUGETLBFS_MAGIC) {
+                    unlink(path);
+                } else {
+                    *created = true;
+                }
                 break;
             }
         } else if (errno == EISDIR) {
@@ -1515,6 +1522,7 @@ static int file_ram_open(const char *path,
 
             fd = mkstemp(filename);
             if (fd >= 0) {
+                info_report("mkstemp %s success \n", filename);
                 unlink(filename);
                 g_free(filename);
                 break;
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
index 893d864354..4993dd5bfa 100644
--- a/util/mmap-alloc.c
+++ b/util/mmap-alloc.c
@@ -29,6 +29,28 @@
 #include <sys/vfs.h>
 #endif
 
+size_t qemu_fd_getfiletype(int fd)
+{
+    struct statfs fs;
+    int ret;
+
+    if (fd != -1) {
+        do {
+            ret = fstatfs(fd, &fs);
+        } while (ret != 0 && errno == EINTR);
+
+        if (ret != 0) {
+            fprintf(stderr, "Couldn't fstatfs() fd: %s\n",
+                    strerror(errno));
+            return -1;
+        }
+        return fs.f_type;
+    } else {
+        fprintf(stderr, "fd is invalid \n");
+        return -1;
+    }
+}
+
 size_t qemu_fd_getpagesize(int fd)
 {
 #ifdef CONFIG_LINUX
-- 
Gitee


From a09c3928b33b0c53831bd9eeb56f8171c26057bc Mon Sep 17 00:00:00 2001
From: Jiajie Li <lijiajie11@huawei.com>
Date: Tue, 8 Feb 2022 09:46:53 +0800
Subject: [PATCH 21/55] target-i386: Modify the VM's physical bits value set
 policy.

To resolve the problem that a VM with large memory capacity fails
to be live migrated, determine whether the VM is a large memory
capacity based on the memory size (4 TB). If yes, set the bus width
of the VM address to 46 bits. If no, set the bus width to 42 bits.

Signed-off-by: Jinhua Cao <caojinhua1@huawei.com>
Signed-off-by: Jiajie Li <lijiajie11@huawei.com>
---
 target/i386/cpu.c      | 19 ++++++++++++++++++-
 target/i386/cpu.h      |  6 ++++++
 target/i386/host-cpu.c | 13 +++++++------
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index aa9e636800..868cf3e7e8 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6678,6 +6678,23 @@ static void x86_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.eip = value;
 }
 
+/* At present, we check the vm is *LARGE* or not, i.e. whether
+ * the memory size is more than 4T or not.
+ */
+const uint64_t large_vm_mem_size = 0x40000000000UL;
+void x86_cpu_adjuest_by_ram_size(ram_addr_t ram_size, X86CPU *cpu)
+{
+    /* If there is not a large vm, we set the phys_bits to 42 bits,
+     * otherwise, we increase the phys_bits to 46 bits.
+     */
+    if (ram_size < large_vm_mem_size) {
+        cpu->phys_bits = DEFAULT_VM_CPU_PHYS_BITS;
+    } else {
+        cpu->phys_bits = LARGE_VM_CPU_PHYS_BITS;
+        cpu->fill_mtrr_mask = true;
+    }
+}
+
 int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request)
 {
     X86CPU *cpu = X86_CPU(cs);
@@ -6862,7 +6879,7 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0),
     DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false),
     DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0),
-    DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true),
+    DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, false),
     DEFINE_PROP_UINT32("level-func7", X86CPU, env.cpuid_level_func7,
                        UINT32_MAX),
     DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, UINT32_MAX),
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 04f2b790c9..6f777fd6ca 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -24,6 +24,7 @@
 #include "cpu-qom.h"
 #include "kvm/hyperv-proto.h"
 #include "exec/cpu-defs.h"
+#include "exec/cpu-common.h"
 #include "qapi/qapi-types-common.h"
 
 /* The x86 has a strong memory model with some store-after-load re-ordering */
@@ -1841,6 +1842,11 @@ struct X86CPU {
 extern const VMStateDescription vmstate_x86_cpu;
 #endif
 
+#define DEFAULT_VM_CPU_PHYS_BITS 42
+#define LARGE_VM_CPU_PHYS_BITS 46
+
+void x86_cpu_adjuest_by_ram_size(ram_addr_t ram_size, X86CPU *cpu);
+
 int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request);
 
 int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu,
diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c
index 10f8aba86e..5a1bbefa36 100644
--- a/target/i386/host-cpu.c
+++ b/target/i386/host-cpu.c
@@ -12,6 +12,7 @@
 #include "host-cpu.h"
 #include "qapi/error.h"
 #include "sysemu/sysemu.h"
+#include "hw/boards.h"
 
 /* Note: Only safe for use on x86(-64) hosts */
 static uint32_t host_cpu_phys_bits(void)
@@ -56,14 +57,14 @@ static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu)
     uint32_t phys_bits = cpu->phys_bits;
     static bool warned;
 
-    /*
-     * Print a warning if the user set it to a value that's not the
-     * host value.
-     */
-    if (phys_bits != host_phys_bits && phys_bits != 0 &&
+    /* adjust x86 cpu phys_bits according to ram_size. */
+    x86_cpu_adjuest_by_ram_size(current_machine->ram_size, cpu);
+
+    /* Print a warning if the host value less than the user set. */
+    if (phys_bits > host_phys_bits && phys_bits != 0 &&
         !warned) {
         warn_report("Host physical bits (%u)"
-                    " does not match phys-bits property (%u)",
+                    " less than phys-bits property (%u)",
                     host_phys_bits, phys_bits);
         warned = true;
     }
-- 
Gitee


From cae52ca5b1dd4a295eaabc9649481f3d6a684f06 Mon Sep 17 00:00:00 2001
From: Mingwang Li <limingwang@huawei.com>
Date: Wed, 9 Feb 2022 17:33:26 +0800
Subject: [PATCH 22/55] Revert "cpu: parse +/- feature to avoid failure"

This reverts commit ef83cde8dd2c9b404527354489b14d2bd238733d.

Signed-off-by: Mingwang Li <limingwang@huawei.com>
---
 target/arm/cpu64.c | 37 -------------------------------------
 1 file changed, 37 deletions(-)

diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 08d886de7b..bd8e5b5676 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -983,47 +983,10 @@ static gchar *aarch64_gdb_arch_name(CPUState *cs)
     return g_strdup("aarch64");
 }
 
-/* Parse "+feature,-feature,feature=foo" CPU feature string
- */
-static void arm_cpu_parse_featurestr(const char *typename, char *features,
-                                     Error **errp )
-{
-    char *featurestr;
-    char *val;
-    static bool cpu_globals_initialized;
-
-    if (cpu_globals_initialized) {
-        return;
-    }
-    cpu_globals_initialized = true;
-
-    featurestr = features ? strtok(features, ",") : NULL;
-    while (featurestr) {
-        val = strchr(featurestr, '=');
-        if (val) {
-            GlobalProperty *prop = g_new0(typeof(*prop), 1);
-            *val = 0;
-            val++;
-            prop->driver = typename;
-            prop->property = g_strdup(featurestr);
-            prop->value = g_strdup(val);
-            qdev_prop_register_global(prop);
-        } else if (featurestr[0] == '+' || featurestr[0] == '-') {
-            warn_report("Ignore %s feature\n", featurestr);
-        } else {
-            error_setg(errp, "Expected key=value format, found %s.",
-                       featurestr);
-            return;
-        }
-        featurestr = strtok(NULL, ",");
-    }
-}
-
 static void aarch64_cpu_class_init(ObjectClass *oc, void *data)
 {
     CPUClass *cc = CPU_CLASS(oc);
 
-    cc->parse_features = arm_cpu_parse_featurestr;
     cc->gdb_read_register = aarch64_cpu_gdb_read_register;
     cc->gdb_write_register = aarch64_cpu_gdb_write_register;
     cc->gdb_num_core_regs = 34;
-- 
Gitee


From 7474971c6fd6c6f77e66ded125e5f2521c7e12a2 Mon Sep 17 00:00:00 2001
From: Mingwang Li <limingwang@huawei.com>
Date: Wed, 9 Feb 2022 17:35:52 +0800
Subject: [PATCH 23/55] Revert "cpu: add Cortex-A72 processor kvm target
 support"

This reverts commit f0da7fa5230b5f771570b2c12288e4a56a20dd97.

Signed-off-by: Mingwang Li <limingwang@huawei.com>
---
 target/arm/cpu64.c      | 1 -
 target/arm/kvm-consts.h | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index bd8e5b5676..26419fe994 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -202,7 +202,6 @@ static void aarch64_a72_initfn(Object *obj)
     ARMCPU *cpu = ARM_CPU(obj);
 
     cpu->dtb_compatible = "arm,cortex-a72";
-    cpu->kvm_target = QEMU_KVM_ARM_TARGET_GENERIC_V8;
     set_feature(&cpu->env, ARM_FEATURE_V8);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
     set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h
index 5f1311ade7..580f1c1fee 100644
--- a/target/arm/kvm-consts.h
+++ b/target/arm/kvm-consts.h
@@ -130,8 +130,6 @@ MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED);
 #define QEMU_KVM_ARM_TARGET_CORTEX_A57 2
 #define QEMU_KVM_ARM_TARGET_XGENE_POTENZA 3
 #define QEMU_KVM_ARM_TARGET_CORTEX_A53 4
-/* Generic ARM v8 target */
-#define QEMU_KVM_ARM_TARGET_GENERIC_V8 5
 
 /* There's no kernel define for this: sentinel value which
  * matches no KVM target value for either 64 or 32 bit
@@ -143,7 +141,6 @@ MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8);
 MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A57, KVM_ARM_TARGET_CORTEX_A57);
 MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_XGENE_POTENZA, KVM_ARM_TARGET_XGENE_POTENZA);
 MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A53, KVM_ARM_TARGET_CORTEX_A53);
-MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_GENERIC_V8, KVM_ARM_TARGET_GENERIC_V8);
 
 #define CP_REG_ARM64                   0x6000000000000000ULL
 #define CP_REG_ARM_COPROC_MASK         0x000000000FFF0000
-- 
Gitee


From 8ddc2bcb196a34cc641d50b057550e4b11dc8700 Mon Sep 17 00:00:00 2001
From: Xu Yandong <xuyandong2@huawei.com>
Date: Wed, 9 Feb 2022 17:39:34 +0800
Subject: [PATCH 24/55] cpu: add Cortex-A72 processor kvm target support

The ARM Cortex-A72 is ARMv8-A micro-architecture,
add kvm target to ARM Cortex-A72 processor definition.

Signed-off-by: Xu Yandong <xuyandong2@huawei.com>
Signed-off-by: Mingwang Li <limingwang@huawei.com>
---
 target/arm/cpu64.c      | 2 +-
 target/arm/kvm-consts.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 26419fe994..1b56261964 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -202,6 +202,7 @@ static void aarch64_a72_initfn(Object *obj)
     ARMCPU *cpu = ARM_CPU(obj);
 
     cpu->dtb_compatible = "arm,cortex-a72";
+    cpu->kvm_target = QEMU_KVM_ARM_TARGET_GENERIC_V8;
     set_feature(&cpu->env, ARM_FEATURE_V8);
     set_feature(&cpu->env, ARM_FEATURE_NEON);
     set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
@@ -265,7 +266,6 @@ static void aarch64_kunpeng_920_initfn(Object *obj)
     cpu->isar.id_aa64dfr0 = 0x110305408;
     cpu->isar.id_aa64isar0 = 0x10211120;
     cpu->isar.id_aa64mmfr0 = 0x101125;
-    cpu->kvm_target = KVM_ARM_TARGET_GENERIC_V8;
 }
 
 void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h
index 580f1c1fee..5f1311ade7 100644
--- a/target/arm/kvm-consts.h
+++ b/target/arm/kvm-consts.h
@@ -130,6 +130,8 @@ MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED);
 #define QEMU_KVM_ARM_TARGET_CORTEX_A57 2
 #define QEMU_KVM_ARM_TARGET_XGENE_POTENZA 3
 #define QEMU_KVM_ARM_TARGET_CORTEX_A53 4
+/* Generic ARM v8 target */
+#define QEMU_KVM_ARM_TARGET_GENERIC_V8 5
 
 /* There's no kernel define for this: sentinel value which
  * matches no KVM target value for either 64 or 32 bit
@@ -141,6 +143,7 @@ MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8);
 MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A57, KVM_ARM_TARGET_CORTEX_A57);
 MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_XGENE_POTENZA, KVM_ARM_TARGET_XGENE_POTENZA);
 MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A53, KVM_ARM_TARGET_CORTEX_A53);
+MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_GENERIC_V8, KVM_ARM_TARGET_GENERIC_V8);
 
 #define CP_REG_ARM64                   0x6000000000000000ULL
 #define CP_REG_ARM_COPROC_MASK         0x000000000FFF0000
-- 
Gitee


From eee7ff398496524881225d503309a9853972c5df Mon Sep 17 00:00:00 2001
From: Binfeng Wu <wubinfeng@huawei.com>
Date: Tue, 8 Feb 2022 17:00:39 +0800
Subject: [PATCH 25/55] vfio/pci: Ascend310 need 4Bytes quirk in bar4

---
 hw/vfio/pci-quirks.c | 75 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 0cf69a8c6d..d86bcaf309 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1209,6 +1209,80 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
     return 0;
 }
 
+#define PCI_VENDOR_ID_HUAWEI      0x19e5
+#define PCI_DEVICE_ID_ASCEND310   0xd100
+#define ASCEND310_XLOADER_SIZE    4
+#define ASCEND310_XLOADER_OFFSET  0x400
+
+typedef struct VFIOAscendBarQuirk {
+    struct VFIOPCIDevice *vdev;
+    pcibus_t offset;
+    uint8_t bar;
+    MemoryRegion *mem;
+} VFIOAscendBarQuirk;
+
+static uint64_t vfio_ascend_quirk_read(void *opaque,
+                                       hwaddr addr, unsigned size)
+{
+    VFIOAscendBarQuirk *quirk = opaque;
+    VFIOPCIDevice *vdev = quirk->vdev;
+
+    qemu_log("read RO region! addr=0x%" HWADDR_PRIx ", size=%d\n",
+            addr + quirk->offset, size);
+
+    return vfio_region_read(&vdev->bars[quirk->bar].region,
+                            addr + quirk->offset, size);
+}
+
+static void vfio_ascend_quirk_write(void *opaque, hwaddr addr,
+                                    uint64_t data, unsigned size)
+{
+    VFIOAscendBarQuirk *quirk = opaque;
+
+    qemu_log("modifying RO region is not allowed! addr=0x%"
+            HWADDR_PRIx ", data=0x%" PRIx64 ", size=%d\n",
+            addr + quirk->offset, data, size);
+}
+
+static const MemoryRegionOps vfio_ascend_intercept_regs_quirk = {
+    .read = vfio_ascend_quirk_read,
+    .write = vfio_ascend_quirk_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_probe_ascend310_bar4_quirk(VFIOPCIDevice *vdev, int nr)
+{
+    VFIOQuirk *quirk;
+    VFIOAscendBarQuirk *bar4_quirk;
+
+    if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 4 ||
+        vdev->device_id != PCI_DEVICE_ID_ASCEND310) {
+        return;
+    }
+
+    quirk = g_malloc0(sizeof(*quirk));
+    quirk->nr_mem = 1;
+    quirk->mem = g_new0(MemoryRegion, quirk->nr_mem);
+    bar4_quirk = quirk->data = g_new0(typeof(*bar4_quirk), quirk->nr_mem);
+    bar4_quirk[0].vdev = vdev;
+    bar4_quirk[0].offset = ASCEND310_XLOADER_OFFSET;
+    bar4_quirk[0].bar = nr;
+
+    /*
+     * intercept w/r to the xloader-updating register,
+     * so the vm can't enable xloader-updating
+     */
+    memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
+                          &vfio_ascend_intercept_regs_quirk,
+                          &bar4_quirk[0],
+                          "vfio-ascend310-bar4-intercept-regs-quirk",
+                          ASCEND310_XLOADER_SIZE);
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
+                                        bar4_quirk[0].offset,
+                                        &quirk->mem[0], 1);
+    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+}
+
 /*
  * Common quirk probe entry points.
  */
@@ -1261,6 +1335,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
 #ifdef CONFIG_VFIO_IGD
     vfio_probe_igd_bar4_quirk(vdev, nr);
 #endif
+    vfio_probe_ascend310_bar4_quirk(vdev, nr);
 }
 
 void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
-- 
Gitee


From bcc63ff3975cca783308fac7517a7911c29bd7c1 Mon Sep 17 00:00:00 2001
From: Binfeng Wu <wubinfeng@huawei.com>
Date: Tue, 8 Feb 2022 17:16:04 +0800
Subject: [PATCH 26/55] vfio/pci: Ascend710 need 4Bytes quirk in bar0

---
 hw/vfio/pci-quirks.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index d86bcaf309..6a9fc0afc5 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1210,7 +1210,10 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
 }
 
 #define PCI_VENDOR_ID_HUAWEI      0x19e5
+#define PCI_DEVICE_ID_ASCEND710   0xd500
 #define PCI_DEVICE_ID_ASCEND310   0xd100
+#define ASCEND710_XLOADER_SIZE    4
+#define ASCEND710_XLOADER_OFFSET  0x20430
 #define ASCEND310_XLOADER_SIZE    4
 #define ASCEND310_XLOADER_OFFSET  0x400
 
@@ -1250,6 +1253,39 @@ static const MemoryRegionOps vfio_ascend_intercept_regs_quirk = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static void vfio_probe_ascend710_bar0_quirk(VFIOPCIDevice *vdev, int nr)
+{
+    VFIOQuirk *quirk;
+    VFIOAscendBarQuirk *bar0_quirk;
+
+    if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 0 ||
+        vdev->device_id != PCI_DEVICE_ID_ASCEND710) {
+        return;
+    }
+
+    quirk = g_malloc0(sizeof(*quirk));
+    quirk->nr_mem = 1;
+    quirk->mem = g_new0(MemoryRegion, quirk->nr_mem);
+    bar0_quirk = quirk->data = g_new0(typeof(*bar0_quirk), quirk->nr_mem);
+    bar0_quirk[0].vdev = vdev;
+    bar0_quirk[0].offset = ASCEND710_XLOADER_OFFSET;
+    bar0_quirk[0].bar = nr;
+
+    /*
+     * intercept w/r to the xloader-updating register,
+     * so the vm can't enable xloader-updating
+     */
+    memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
+                          &vfio_ascend_intercept_regs_quirk,
+                          &bar0_quirk[0],
+                          "vfio-ascend710-bar0-intercept-regs-quirk",
+                          ASCEND710_XLOADER_SIZE);
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
+                                        bar0_quirk[0].offset,
+                                        &quirk->mem[0], 1);
+    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+}
+
 static void vfio_probe_ascend310_bar4_quirk(VFIOPCIDevice *vdev, int nr)
 {
     VFIOQuirk *quirk;
@@ -1335,6 +1371,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
 #ifdef CONFIG_VFIO_IGD
     vfio_probe_igd_bar4_quirk(vdev, nr);
 #endif
+    vfio_probe_ascend710_bar0_quirk(vdev, nr);
     vfio_probe_ascend310_bar4_quirk(vdev, nr);
 }
 
-- 
Gitee


From 4cf7d00e43c4e90327e13afa3cbc9c7ca3657c9f Mon Sep 17 00:00:00 2001
From: Binfeng Wu <wubinfeng@huawei.com>
Date: Tue, 8 Feb 2022 19:20:36 +0800
Subject: [PATCH 27/55] vfio/pci: Ascend910 need 4Bytes quirk in bar0

---
 hw/vfio/pci-quirks.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 6a9fc0afc5..2457a61196 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1210,8 +1210,11 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
 }
 
 #define PCI_VENDOR_ID_HUAWEI      0x19e5
+#define PCI_DEVICE_ID_ASCEND910   0xd801
 #define PCI_DEVICE_ID_ASCEND710   0xd500
 #define PCI_DEVICE_ID_ASCEND310   0xd100
+#define ASCEND910_XLOADER_SIZE    4
+#define ASCEND910_XLOADER_OFFSET  0x80400
 #define ASCEND710_XLOADER_SIZE    4
 #define ASCEND710_XLOADER_OFFSET  0x20430
 #define ASCEND310_XLOADER_SIZE    4
@@ -1253,6 +1256,39 @@ static const MemoryRegionOps vfio_ascend_intercept_regs_quirk = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static void vfio_probe_ascend910_bar0_quirk(VFIOPCIDevice *vdev, int nr)
+{
+    VFIOQuirk *quirk;
+    VFIOAscendBarQuirk *bar0_quirk;
+
+    if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 0 ||
+        vdev->device_id != PCI_DEVICE_ID_ASCEND910) {
+        return;
+    }
+
+    quirk = g_malloc0(sizeof(*quirk));
+    quirk->nr_mem = 1;
+    quirk->mem = g_new0(MemoryRegion, quirk->nr_mem);
+    bar0_quirk = quirk->data = g_new0(typeof(*bar0_quirk), quirk->nr_mem);
+    bar0_quirk[0].vdev = vdev;
+    bar0_quirk[0].offset = ASCEND910_XLOADER_OFFSET;
+    bar0_quirk[0].bar = nr;
+
+    /*
+     * intercept w/r to the xloader-updating register,
+     * so the vm can't enable xloader-updating
+     */
+    memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
+                          &vfio_ascend_intercept_regs_quirk,
+                          &bar0_quirk[0],
+                          "vfio-ascend910-bar0-intercept-regs-quirk",
+                          ASCEND910_XLOADER_SIZE);
+    memory_region_add_subregion_overlap(vdev->bars[nr].region.mem,
+                                        bar0_quirk[0].offset,
+                                        &quirk->mem[0], 1);
+    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+}
+
 static void vfio_probe_ascend710_bar0_quirk(VFIOPCIDevice *vdev, int nr)
 {
     VFIOQuirk *quirk;
@@ -1371,6 +1407,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
 #ifdef CONFIG_VFIO_IGD
     vfio_probe_igd_bar4_quirk(vdev, nr);
 #endif
+    vfio_probe_ascend910_bar0_quirk(vdev, nr);
     vfio_probe_ascend710_bar0_quirk(vdev, nr);
     vfio_probe_ascend310_bar4_quirk(vdev, nr);
 }
-- 
Gitee


From b04e92ed13e49f666f62c8f3daa5746109caf17b Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Mon, 22 Nov 2021 11:26:51 +0800
Subject: [PATCH 28/55] qapi/machine.json: Fix incorrect description for die-id

In terms of scope, die-id should mean "the die number within
socket the CPU belongs to" instead of "the die number within
node/board the CPU belongs to". Fix it to avoid confusing
the Doc reader.

Fixes: 176d2cda0d ("i386/cpu: Consolidate die-id validity in smp context")
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20211122032651.16064-1-wangyanan55@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qapi/machine.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qapi/machine.json b/qapi/machine.json
index 067e3f5378..f1839acf20 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -867,7 +867,7 @@
 #
 # @node-id: NUMA node ID the CPU belongs to
 # @socket-id: socket number within node/board the CPU belongs to
-# @die-id: die number within node/board the CPU belongs to (Since 4.1)
+# @die-id: die number within socket the CPU belongs to (since 4.1)
 # @core-id: core number within die the CPU belongs to
 # @thread-id: thread number within core the CPU belongs to
 #
-- 
Gitee


From d8b2aee4fd6ccd8eb621522b647c392c1dd7955c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Mon, 15 Nov 2021 12:32:09 +0100
Subject: [PATCH 29/55] tests/unit/test-smp-parse: Pass machine type as
 argument to tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use g_test_add_data_func() instead of g_test_add_func() so we can
pass the machine type to the tests (we will soon have different
machine types).

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211216132015.815493-2-philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index b02450e25a..37c6b4981d 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -487,9 +487,10 @@ static void machine_base_class_init(ObjectClass *oc, void *data)
     mc->name = g_strdup(SMP_MACHINE_NAME);
 }
 
-static void test_generic(void)
+static void test_generic(const void *opaque)
 {
-    Object *obj = object_new(TYPE_MACHINE);
+    const char *machine_type = opaque;
+    Object *obj = object_new(machine_type);
     MachineState *ms = MACHINE(obj);
     MachineClass *mc = MACHINE_GET_CLASS(obj);
     SMPTestData *data = &(SMPTestData){{ }};
@@ -525,9 +526,10 @@ static void test_generic(void)
     object_unref(obj);
 }
 
-static void test_with_dies(void)
+static void test_with_dies(const void *opaque)
 {
-    Object *obj = object_new(TYPE_MACHINE);
+    const char *machine_type = opaque;
+    Object *obj = object_new(machine_type);
     MachineState *ms = MACHINE(obj);
     MachineClass *mc = MACHINE_GET_CLASS(obj);
     SMPTestData *data = &(SMPTestData){{ }};
@@ -599,8 +601,12 @@ int main(int argc, char *argv[])
 
     g_test_init(&argc, &argv, NULL);
 
-    g_test_add_func("/test-smp-parse/generic", test_generic);
-    g_test_add_func("/test-smp-parse/with_dies", test_with_dies);
+    g_test_add_data_func("/test-smp-parse/generic",
+                         TYPE_MACHINE,
+                         test_generic);
+    g_test_add_data_func("/test-smp-parse/with_dies",
+                         TYPE_MACHINE,
+                         test_with_dies);
 
     g_test_run();
 
-- 
Gitee


From fad259cf9996dbc4001cb94ec3c846d649401027 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Mon, 15 Nov 2021 12:35:43 +0100
Subject: [PATCH 30/55] tests/unit/test-smp-parse: Split the 'generic' test in
 valid / invalid
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split the 'generic' test in two tests: 'valid' and 'invalid'.
This will allow us to remove the hack which modifies the
MachineClass internal state.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211216132015.815493-3-philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index 37c6b4981d..425ed6b6b9 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -487,7 +487,7 @@ static void machine_base_class_init(ObjectClass *oc, void *data)
     mc->name = g_strdup(SMP_MACHINE_NAME);
 }
 
-static void test_generic(const void *opaque)
+static void test_generic_valid(const void *opaque)
 {
     const char *machine_type = opaque;
     Object *obj = object_new(machine_type);
@@ -508,6 +508,18 @@ static void test_generic(const void *opaque)
         smp_parse_test(ms, data, true);
     }
 
+    object_unref(obj);
+}
+
+static void test_generic_invalid(const void *opaque)
+{
+    const char *machine_type = opaque;
+    Object *obj = object_new(machine_type);
+    MachineState *ms = MACHINE(obj);
+    MachineClass *mc = MACHINE_GET_CLASS(obj);
+    SMPTestData *data = &(SMPTestData){};
+    int i;
+
     /* Force invalid min CPUs and max CPUs */
     mc->min_cpus = 2;
     mc->max_cpus = 511;
@@ -601,9 +613,12 @@ int main(int argc, char *argv[])
 
     g_test_init(&argc, &argv, NULL);
 
-    g_test_add_data_func("/test-smp-parse/generic",
+    g_test_add_data_func("/test-smp-parse/generic/valid",
+                         TYPE_MACHINE,
+                         test_generic_valid);
+    g_test_add_data_func("/test-smp-parse/generic/invalid",
                          TYPE_MACHINE,
-                         test_generic);
+                         test_generic_invalid);
     g_test_add_data_func("/test-smp-parse/with_dies",
                          TYPE_MACHINE,
                          test_with_dies);
-- 
Gitee


From 4981e75623db6ca681d13719ffcf61b0cfac3edc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Mon, 15 Nov 2021 12:39:12 +0100
Subject: [PATCH 31/55] tests/unit/test-smp-parse: Add 'smp-with-dies' machine
 type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid modifying the MachineClass internals by adding the
'smp-with-dies' machine, which inherits from TYPE_MACHINE.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Tested-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211216132015.815493-4-philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index 425ed6b6b9..f66cf7bb59 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -487,6 +487,16 @@ static void machine_base_class_init(ObjectClass *oc, void *data)
     mc->name = g_strdup(SMP_MACHINE_NAME);
 }
 
+static void machine_with_dies_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->min_cpus = MIN_CPUS;
+    mc->max_cpus = MAX_CPUS;
+
+    mc->smp_props.dies_supported = true;
+}
+
 static void test_generic_valid(const void *opaque)
 {
     const char *machine_type = opaque;
@@ -548,9 +558,6 @@ static void test_with_dies(const void *opaque)
     unsigned int num_dies = 2;
     int i;
 
-    /* Force the SMP compat properties */
-    mc->smp_props.dies_supported = true;
-
     for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) {
         *data = data_generic_valid[i];
         unsupported_params_init(mc, data);
@@ -588,9 +595,6 @@ static void test_with_dies(const void *opaque)
         smp_parse_test(ms, data, false);
     }
 
-    /* Restore the SMP compat properties */
-    mc->smp_props.dies_supported = false;
-
     object_unref(obj);
 }
 
@@ -602,6 +606,10 @@ static const TypeInfo smp_machine_types[] = {
         .class_init     = machine_base_class_init,
         .class_size     = sizeof(MachineClass),
         .instance_size  = sizeof(MachineState),
+    }, {
+        .name           = MACHINE_TYPE_NAME("smp-with-dies"),
+        .parent         = TYPE_MACHINE,
+        .class_init     = machine_with_dies_class_init,
     }
 };
 
@@ -620,7 +628,7 @@ int main(int argc, char *argv[])
                          TYPE_MACHINE,
                          test_generic_invalid);
     g_test_add_data_func("/test-smp-parse/with_dies",
-                         TYPE_MACHINE,
+                         MACHINE_TYPE_NAME("smp-with-dies"),
                          test_with_dies);
 
     g_test_run();
-- 
Gitee


From 9a98659dcb37c81e69f54d8f6cbe5116ceba5a36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Mon, 15 Nov 2021 15:44:07 +0100
Subject: [PATCH 32/55] tests/unit/test-smp-parse: Add 'smp-generic-invalid'
 machine type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid modifying the MachineClass internals by adding the
'smp-generic-invalid' machine, which inherits from TYPE_MACHINE.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20211216132015.815493-5-philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index f66cf7bb59..47e11089e2 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -487,6 +487,17 @@ static void machine_base_class_init(ObjectClass *oc, void *data)
     mc->name = g_strdup(SMP_MACHINE_NAME);
 }
 
+static void machine_generic_invalid_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    /* Force invalid min CPUs and max CPUs */
+    mc->min_cpus = 2;
+    mc->max_cpus = 511;
+
+    mc->smp_props.dies_supported = false;
+}
+
 static void machine_with_dies_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -530,10 +541,6 @@ static void test_generic_invalid(const void *opaque)
     SMPTestData *data = &(SMPTestData){};
     int i;
 
-    /* Force invalid min CPUs and max CPUs */
-    mc->min_cpus = 2;
-    mc->max_cpus = 511;
-
     for (i = 0; i < ARRAY_SIZE(data_generic_invalid); i++) {
         *data = data_generic_invalid[i];
         unsupported_params_init(mc, data);
@@ -541,10 +548,6 @@ static void test_generic_invalid(const void *opaque)
         smp_parse_test(ms, data, false);
     }
 
-    /* Reset the supported min CPUs and max CPUs */
-    mc->min_cpus = MIN_CPUS;
-    mc->max_cpus = MAX_CPUS;
-
     object_unref(obj);
 }
 
@@ -606,6 +609,10 @@ static const TypeInfo smp_machine_types[] = {
         .class_init     = machine_base_class_init,
         .class_size     = sizeof(MachineClass),
         .instance_size  = sizeof(MachineState),
+    }, {
+        .name           = MACHINE_TYPE_NAME("smp-generic-invalid"),
+        .parent         = TYPE_MACHINE,
+        .class_init     = machine_generic_invalid_class_init,
     }, {
         .name           = MACHINE_TYPE_NAME("smp-with-dies"),
         .parent         = TYPE_MACHINE,
@@ -625,7 +632,7 @@ int main(int argc, char *argv[])
                          TYPE_MACHINE,
                          test_generic_valid);
     g_test_add_data_func("/test-smp-parse/generic/invalid",
-                         TYPE_MACHINE,
+                         MACHINE_TYPE_NAME("smp-generic-invalid"),
                          test_generic_invalid);
     g_test_add_data_func("/test-smp-parse/with_dies",
                          MACHINE_TYPE_NAME("smp-with-dies"),
-- 
Gitee


From c33c7dd51eebf5ae7b7ece1e829b0a5ffdcebfe1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Mon, 15 Nov 2021 15:49:59 +0100
Subject: [PATCH 33/55] tests/unit/test-smp-parse: Add 'smp-generic-valid'
 machine type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Keep the common TYPE_MACHINE class initialization in
machine_base_class_init(), make it abstract, and move
the non-common code to a new class: "smp-generic-valid".

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20211216132015.815493-6-philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index 47e11089e2..b20bf2c235 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -478,13 +478,19 @@ static void machine_base_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
 
+    mc->smp_props.prefer_sockets = true;
+
+    mc->name = g_strdup(SMP_MACHINE_NAME);
+}
+
+static void machine_generic_valid_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
     mc->min_cpus = MIN_CPUS;
     mc->max_cpus = MAX_CPUS;
 
-    mc->smp_props.prefer_sockets = true;
     mc->smp_props.dies_supported = false;
-
-    mc->name = g_strdup(SMP_MACHINE_NAME);
 }
 
 static void machine_generic_invalid_class_init(ObjectClass *oc, void *data)
@@ -606,9 +612,14 @@ static const TypeInfo smp_machine_types[] = {
     {
         .name           = TYPE_MACHINE,
         .parent         = TYPE_OBJECT,
+        .abstract       = true,
         .class_init     = machine_base_class_init,
         .class_size     = sizeof(MachineClass),
         .instance_size  = sizeof(MachineState),
+    }, {
+        .name           = MACHINE_TYPE_NAME("smp-generic-valid"),
+        .parent         = TYPE_MACHINE,
+        .class_init     = machine_generic_valid_class_init,
     }, {
         .name           = MACHINE_TYPE_NAME("smp-generic-invalid"),
         .parent         = TYPE_MACHINE,
@@ -629,7 +640,7 @@ int main(int argc, char *argv[])
     g_test_init(&argc, &argv, NULL);
 
     g_test_add_data_func("/test-smp-parse/generic/valid",
-                         TYPE_MACHINE,
+                         MACHINE_TYPE_NAME("smp-generic-valid"),
                          test_generic_valid);
     g_test_add_data_func("/test-smp-parse/generic/invalid",
                          MACHINE_TYPE_NAME("smp-generic-invalid"),
-- 
Gitee


From 964965721bbed1941bf77e5a748efc1274b7c289 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Thu, 11 Nov 2021 08:58:40 +0100
Subject: [PATCH 34/55] tests/unit/test-smp-parse: Simplify pointer to compound
 literal use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We can simply use a local variable (and pass its pointer) instead
of a pointer to a compound literal.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Tested-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211216132015.815493-7-philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 66 ++++++++++++++++++-------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index b20bf2c235..395929b66c 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -520,19 +520,19 @@ static void test_generic_valid(const void *opaque)
     Object *obj = object_new(machine_type);
     MachineState *ms = MACHINE(obj);
     MachineClass *mc = MACHINE_GET_CLASS(obj);
-    SMPTestData *data = &(SMPTestData){{ }};
+    SMPTestData data = {};
     int i;
 
     for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) {
-        *data = data_generic_valid[i];
-        unsupported_params_init(mc, data);
+        data = data_generic_valid[i];
+        unsupported_params_init(mc, &data);
 
-        smp_parse_test(ms, data, true);
+        smp_parse_test(ms, &data, true);
 
         /* Unsupported parameters can be provided with their values as 1 */
-        data->config.has_dies = true;
-        data->config.dies = 1;
-        smp_parse_test(ms, data, true);
+        data.config.has_dies = true;
+        data.config.dies = 1;
+        smp_parse_test(ms, &data, true);
     }
 
     object_unref(obj);
@@ -544,14 +544,14 @@ static void test_generic_invalid(const void *opaque)
     Object *obj = object_new(machine_type);
     MachineState *ms = MACHINE(obj);
     MachineClass *mc = MACHINE_GET_CLASS(obj);
-    SMPTestData *data = &(SMPTestData){};
+    SMPTestData data = {};
     int i;
 
     for (i = 0; i < ARRAY_SIZE(data_generic_invalid); i++) {
-        *data = data_generic_invalid[i];
-        unsupported_params_init(mc, data);
+        data = data_generic_invalid[i];
+        unsupported_params_init(mc, &data);
 
-        smp_parse_test(ms, data, false);
+        smp_parse_test(ms, &data, false);
     }
 
     object_unref(obj);
@@ -563,45 +563,45 @@ static void test_with_dies(const void *opaque)
     Object *obj = object_new(machine_type);
     MachineState *ms = MACHINE(obj);
     MachineClass *mc = MACHINE_GET_CLASS(obj);
-    SMPTestData *data = &(SMPTestData){{ }};
+    SMPTestData data = {};
     unsigned int num_dies = 2;
     int i;
 
     for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) {
-        *data = data_generic_valid[i];
-        unsupported_params_init(mc, data);
+        data = data_generic_valid[i];
+        unsupported_params_init(mc, &data);
 
         /* when dies parameter is omitted, it will be set as 1 */
-        data->expect_prefer_sockets.dies = 1;
-        data->expect_prefer_cores.dies = 1;
+        data.expect_prefer_sockets.dies = 1;
+        data.expect_prefer_cores.dies = 1;
 
-        smp_parse_test(ms, data, true);
+        smp_parse_test(ms, &data, true);
 
         /* when dies parameter is specified */
-        data->config.has_dies = true;
-        data->config.dies = num_dies;
-        if (data->config.has_cpus) {
-            data->config.cpus *= num_dies;
+        data.config.has_dies = true;
+        data.config.dies = num_dies;
+        if (data.config.has_cpus) {
+            data.config.cpus *= num_dies;
         }
-        if (data->config.has_maxcpus) {
-            data->config.maxcpus *= num_dies;
+        if (data.config.has_maxcpus) {
+            data.config.maxcpus *= num_dies;
         }
 
-        data->expect_prefer_sockets.dies = num_dies;
-        data->expect_prefer_sockets.cpus *= num_dies;
-        data->expect_prefer_sockets.max_cpus *= num_dies;
-        data->expect_prefer_cores.dies = num_dies;
-        data->expect_prefer_cores.cpus *= num_dies;
-        data->expect_prefer_cores.max_cpus *= num_dies;
+        data.expect_prefer_sockets.dies = num_dies;
+        data.expect_prefer_sockets.cpus *= num_dies;
+        data.expect_prefer_sockets.max_cpus *= num_dies;
+        data.expect_prefer_cores.dies = num_dies;
+        data.expect_prefer_cores.cpus *= num_dies;
+        data.expect_prefer_cores.max_cpus *= num_dies;
 
-        smp_parse_test(ms, data, true);
+        smp_parse_test(ms, &data, true);
     }
 
     for (i = 0; i < ARRAY_SIZE(data_with_dies_invalid); i++) {
-        *data = data_with_dies_invalid[i];
-        unsupported_params_init(mc, data);
+        data = data_with_dies_invalid[i];
+        unsupported_params_init(mc, &data);
 
-        smp_parse_test(ms, data, false);
+        smp_parse_test(ms, &data, false);
     }
 
     object_unref(obj);
-- 
Gitee


From bcf4b802bd8971c0c5a255e606b15900cd47c6b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Thu, 11 Nov 2021 10:23:06 +0100
Subject: [PATCH 35/55] tests/unit/test-smp-parse: Constify some pointer/struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Declare structures const when we don't need to modify
them at runtime.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Tested-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211216132015.815493-8-philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index 395929b66c..0f98c9509e 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -83,7 +83,7 @@ typedef struct SMPTestData {
  * then test the automatic calculation algorithm of the missing
  * values in the parser.
  */
-static struct SMPTestData data_generic_valid[] = {
+static const struct SMPTestData data_generic_valid[] = {
     {
         /* config: no configuration provided
          * expect: cpus=1,sockets=1,cores=1,threads=1,maxcpus=1 */
@@ -285,7 +285,7 @@ static struct SMPTestData data_generic_valid[] = {
     },
 };
 
-static struct SMPTestData data_generic_invalid[] = {
+static const struct SMPTestData data_generic_invalid[] = {
     {
         /* config: -smp 2,dies=2 */
         .config = SMP_CONFIG_WITH_DIES(T, 2, F, 0, T, 2, F, 0, F, 0, F, 0),
@@ -319,7 +319,7 @@ static struct SMPTestData data_generic_invalid[] = {
     },
 };
 
-static struct SMPTestData data_with_dies_invalid[] = {
+static const struct SMPTestData data_with_dies_invalid[] = {
     {
         /* config: -smp 16,sockets=2,dies=2,cores=4,threads=2,maxcpus=16 */
         .config = SMP_CONFIG_WITH_DIES(T, 16, T, 2, T, 2, T, 4, T, 2, T, 16),
@@ -356,7 +356,7 @@ static char *smp_config_to_string(SMPConfiguration *config)
         config->has_maxcpus ? "true" : "false", config->maxcpus);
 }
 
-static char *cpu_topology_to_string(CpuTopology *topo)
+static char *cpu_topology_to_string(const CpuTopology *topo)
 {
     return g_strdup_printf(
         "(CpuTopology) {\n"
@@ -372,7 +372,7 @@ static char *cpu_topology_to_string(CpuTopology *topo)
 }
 
 static void check_parse(MachineState *ms, SMPConfiguration *config,
-                        CpuTopology *expect_topo, const char *expect_err,
+                        const CpuTopology *expect_topo, const char *expect_err,
                         bool is_valid)
 {
     g_autofree char *config_str = smp_config_to_string(config);
@@ -466,7 +466,7 @@ static void smp_parse_test(MachineState *ms, SMPTestData *data, bool is_valid)
 }
 
 /* The parsed results of the unsupported parameters should be 1 */
-static void unsupported_params_init(MachineClass *mc, SMPTestData *data)
+static void unsupported_params_init(const MachineClass *mc, SMPTestData *data)
 {
     if (!mc->smp_props.dies_supported) {
         data->expect_prefer_sockets.dies = 1;
-- 
Gitee


From 2ce1daae407033e689a559b7346523b18651ee0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
Date: Thu, 11 Nov 2021 10:21:23 +0100
Subject: [PATCH 36/55] hw/core: Rename smp_parse() ->
 machine_parse_smp_config()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All methods related to MachineState are prefixed with "machine_".
smp_parse() does not need to be an exception. Rename it and
const'ify the SMPConfiguration argument, since it doesn't need
to be modified.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Tested-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211216132015.815493-9-philmd@redhat.com>
---
 hw/core/machine-smp.c       | 6 ++++--
 hw/core/machine.c           | 2 +-
 include/hw/boards.h         | 3 ++-
 tests/unit/test-smp-parse.c | 8 ++++----
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c
index 116a0cbbfa..2cbfd57429 100644
--- a/hw/core/machine-smp.c
+++ b/hw/core/machine-smp.c
@@ -44,7 +44,8 @@ static char *cpu_hierarchy_to_string(MachineState *ms)
 }
 
 /*
- * smp_parse - Generic function used to parse the given SMP configuration
+ * machine_parse_smp_config: Generic function used to parse the given
+ *                           SMP configuration
  *
  * Any missing parameter in "cpus/maxcpus/sockets/cores/threads" will be
  * automatically computed based on the provided ones.
@@ -63,7 +64,8 @@ static char *cpu_hierarchy_to_string(MachineState *ms)
  * introduced topology members which are likely to be target specific should
  * be directly set as 1 if they are omitted (e.g. dies for PC since 4.1).
  */
-void smp_parse(MachineState *ms, SMPConfiguration *config, Error **errp)
+void machine_parse_smp_config(MachineState *ms,
+                              const SMPConfiguration *config, Error **errp)
 {
     MachineClass *mc = MACHINE_GET_CLASS(ms);
     unsigned cpus    = config->has_cpus ? config->cpus : 0;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 53a99abc56..3993c534b9 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -761,7 +761,7 @@ static void machine_set_smp(Object *obj, Visitor *v, const char *name,
         return;
     }
 
-    smp_parse(ms, config, errp);
+    machine_parse_smp_config(ms, config, errp);
 }
 
 static void machine_class_init(ObjectClass *oc, void *data)
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 9c1c190104..7597cec440 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -34,7 +34,8 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
 void machine_set_cpu_numa_node(MachineState *machine,
                                const CpuInstanceProperties *props,
                                Error **errp);
-void smp_parse(MachineState *ms, SMPConfiguration *config, Error **errp);
+void machine_parse_smp_config(MachineState *ms,
+                              const SMPConfiguration *config, Error **errp);
 
 /**
  * machine_class_allow_dynamic_sysbus_dev: Add type to list of valid devices
diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index 0f98c9509e..b6df8137fc 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -337,7 +337,7 @@ static const struct SMPTestData data_with_dies_invalid[] = {
     },
 };
 
-static char *smp_config_to_string(SMPConfiguration *config)
+static char *smp_config_to_string(const SMPConfiguration *config)
 {
     return g_strdup_printf(
         "(SMPConfiguration) {\n"
@@ -371,7 +371,7 @@ static char *cpu_topology_to_string(const CpuTopology *topo)
         topo->cores, topo->threads, topo->max_cpus);
 }
 
-static void check_parse(MachineState *ms, SMPConfiguration *config,
+static void check_parse(MachineState *ms, const SMPConfiguration *config,
                         const CpuTopology *expect_topo, const char *expect_err,
                         bool is_valid)
 {
@@ -380,8 +380,8 @@ static void check_parse(MachineState *ms, SMPConfiguration *config,
     g_autofree char *output_topo_str = NULL;
     Error *err = NULL;
 
-    /* call the generic parser smp_parse() */
-    smp_parse(ms, config, &err);
+    /* call the generic parser */
+    machine_parse_smp_config(ms, config, &err);
 
     output_topo_str = cpu_topology_to_string(&ms->smp);
 
-- 
Gitee


From 07991b049fc9ebdb62c311eda1535ad4831625e5 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Tue, 28 Dec 2021 17:22:08 +0800
Subject: [PATCH 37/55] qemu-options: Improve readability of SMP related Docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have a description in qemu-options.hx for each CPU topology
parameter to explain what it exactly means, and also an extra
declaration for the target-specific one, e.g. "for PC only"
when describing "dies", and "for PC, it's on one die" when
describing "cores".

Now we are going to introduce one more non-generic parameter
"clusters", it will make the Doc less readable and  if we still
continue to use the legacy way to describe it.

So let's at first make two tweaks of the Docs to improve the
readability and also scalability:
1) In the -help text: Delete the extra specific declaration and
   describe each topology parameter level by level. Then add a
   note to declare that different machines may support different
   subsets and the actual meaning of the supported parameters
   will vary accordingly.
2) In the rST text: List all the sub-hierarchies currently
   supported in QEMU, and correspondingly give an example of
   -smp configuration for each of them.

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211228092221.21068-2-wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
---
 qemu-options.hx | 76 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 59 insertions(+), 17 deletions(-)

diff --git a/qemu-options.hx b/qemu-options.hx
index ae2c6dbbfc..7a59db7764 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -207,14 +207,26 @@ ERST
 
 DEF("smp", HAS_ARG, QEMU_OPTION_smp,
     "-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,cores=cores][,threads=threads]\n"
-    "                set the number of CPUs to 'n' [default=1]\n"
+    "                set the number of initial CPUs to 'n' [default=1]\n"
     "                maxcpus= maximum number of total CPUs, including\n"
     "                offline CPUs for hotplug, etc\n"
-    "                sockets= number of discrete sockets in the system\n"
-    "                dies= number of CPU dies on one socket (for PC only)\n"
-    "                cores= number of CPU cores on one socket (for PC, it's on one die)\n"
-    "                threads= number of threads on one CPU core\n",
-        QEMU_ARCH_ALL)
+    "                sockets= number of sockets on the machine board\n"
+    "                dies= number of dies in one socket\n"
+    "                cores= number of cores in one die\n"
+    "                threads= number of threads in one core\n"
+    "Note: Different machines may have different subsets of the CPU topology\n"
+    "      parameters supported, so the actual meaning of the supported parameters\n"
+    "      will vary accordingly. For example, for a machine type that supports a\n"
+    "      three-level CPU hierarchy of sockets/cores/threads, the parameters will\n"
+    "      sequentially mean as below:\n"
+    "                sockets means the number of sockets on the machine board\n"
+    "                cores means the number of cores in one socket\n"
+    "                threads means the number of threads in one core\n"
+    "      For a particular machine type board, an expected CPU topology hierarchy\n"
+    "      can be defined through the supported sub-option. Unsupported parameters\n"
+    "      can also be provided in addition to the sub-option, but their values\n"
+    "      must be set as 1 in the purpose of correct parsing.\n",
+    QEMU_ARCH_ALL)
 SRST
 ``-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,cores=cores][,threads=threads]``
     Simulate a SMP system with '\ ``n``\ ' CPUs initially present on
@@ -225,27 +237,57 @@ SRST
     initial CPU count will match the maximum number. When only one of them
     is given then the omitted one will be set to its counterpart's value.
     Both parameters may be specified, but the maximum number of CPUs must
-    be equal to or greater than the initial CPU count. Both parameters are
-    subject to an upper limit that is determined by the specific machine
-    type chosen.
-
-    To control reporting of CPU topology information, the number of sockets,
-    dies per socket, cores per die, and threads per core can be specified.
-    The sum `` sockets * cores * dies * threads `` must be equal to the
-    maximum CPU count. CPU targets may only support a subset of the topology
-    parameters. Where a CPU target does not support use of a particular
-    topology parameter, its value should be assumed to be 1 for the purpose
-    of computing the CPU maximum count.
+    be equal to or greater than the initial CPU count. Product of the
+    CPU topology hierarchy must be equal to the maximum number of CPUs.
+    Both parameters are subject to an upper limit that is determined by
+    the specific machine type chosen.
+
+    To control reporting of CPU topology information, values of the topology
+    parameters can be specified. Machines may only support a subset of the
+    parameters and different machines may have different subsets supported
+    which vary depending on capacity of the corresponding CPU targets. So
+    for a particular machine type board, an expected topology hierarchy can
+    be defined through the supported sub-option. Unsupported parameters can
+    also be provided in addition to the sub-option, but their values must be
+    set as 1 in the purpose of correct parsing.
 
     Either the initial CPU count, or at least one of the topology parameters
     must be specified. The specified parameters must be greater than zero,
     explicit configuration like "cpus=0" is not allowed. Values for any
     omitted parameters will be computed from those which are given.
+
+    For example, the following sub-option defines a CPU topology hierarchy
+    (2 sockets totally on the machine, 2 cores per socket, 2 threads per
+    core) for a machine that only supports sockets/cores/threads.
+    Some members of the option can be omitted but their values will be
+    automatically computed:
+
+    ::
+
+        -smp 8,sockets=2,cores=2,threads=2,maxcpus=8
+
+    The following sub-option defines a CPU topology hierarchy (2 sockets
+    totally on the machine, 2 dies per socket, 2 cores per die, 2 threads
+    per core) for PC machines which support sockets/dies/cores/threads.
+    Some members of the option can be omitted but their values will be
+    automatically computed:
+
+    ::
+
+        -smp 16,sockets=2,dies=2,cores=2,threads=2,maxcpus=16
+
     Historically preference was given to the coarsest topology parameters
     when computing missing values (ie sockets preferred over cores, which
     were preferred over threads), however, this behaviour is considered
     liable to change. Prior to 6.2 the preference was sockets over cores
     over threads. Since 6.2 the preference is cores over sockets over threads.
+
+    For example, the following option defines a machine board with 2 sockets
+    of 1 core before 6.2 and 1 socket of 2 cores after 6.2:
+
+    ::
+
+        -smp 2
 ERST
 
 DEF("numa", HAS_ARG, QEMU_OPTION_numa,
-- 
Gitee


From bf4a20a82bd4804842dd2960db30e0be7ecb2d32 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Tue, 28 Dec 2021 17:22:09 +0800
Subject: [PATCH 38/55] hw/core/machine: Introduce CPU cluster topology support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The new Cluster-Aware Scheduling support has landed in Linux 5.16,
which has been proved to benefit the scheduling performance (e.g.
load balance and wake_affine strategy) on both x86_64 and AArch64.

So now in Linux 5.16 we have four-level arch-neutral CPU topology
definition like below and a new scheduler level for clusters.
struct cpu_topology {
    int thread_id;
    int core_id;
    int cluster_id;
    int package_id;
    int llc_id;
    cpumask_t thread_sibling;
    cpumask_t core_sibling;
    cpumask_t cluster_sibling;
    cpumask_t llc_sibling;
}

A cluster generally means a group of CPU cores which share L2 cache
or other mid-level resources, and it is the shared resources that
is used to improve scheduler's behavior. From the point of view of
the size range, it's between CPU die and CPU core. For example, on
some ARM64 Kunpeng servers, we have 6 clusters in each NUMA node,
and 4 CPU cores in each cluster. The 4 CPU cores share a separate
L2 cache and a L3 cache tag, which brings cache affinity advantage.

In virtualization, on the Hosts which have pClusters (physical
clusters), if we can design a vCPU topology with cluster level for
guest kernel and have a dedicated vCPU pinning. A Cluster-Aware
Guest kernel can also make use of the cache affinity of CPU clusters
to gain similar scheduling performance.

This patch adds infrastructure for CPU cluster level topology
configuration and parsing, so that the user can specify cluster
parameter if their machines support it.

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20211228092221.21068-3-wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
[PMD: Added '(since 7.0)' to @clusters in qapi/machine.json]
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
---
 hw/core/machine-smp.c | 26 +++++++++++++++++++-------
 hw/core/machine.c     |  3 +++
 include/hw/boards.h   |  6 +++++-
 qapi/machine.json     |  5 ++++-
 qemu-options.hx       |  7 ++++---
 softmmu/vl.c          |  3 +++
 6 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c
index 2cbfd57429..b39ed21e65 100644
--- a/hw/core/machine-smp.c
+++ b/hw/core/machine-smp.c
@@ -37,6 +37,10 @@ static char *cpu_hierarchy_to_string(MachineState *ms)
         g_string_append_printf(s, " * dies (%u)", ms->smp.dies);
     }
 
+    if (mc->smp_props.clusters_supported) {
+        g_string_append_printf(s, " * clusters (%u)", ms->smp.clusters);
+    }
+
     g_string_append_printf(s, " * cores (%u)", ms->smp.cores);
     g_string_append_printf(s, " * threads (%u)", ms->smp.threads);
 
@@ -71,6 +75,7 @@ void machine_parse_smp_config(MachineState *ms,
     unsigned cpus    = config->has_cpus ? config->cpus : 0;
     unsigned sockets = config->has_sockets ? config->sockets : 0;
     unsigned dies    = config->has_dies ? config->dies : 0;
+    unsigned clusters = config->has_clusters ? config->clusters : 0;
     unsigned cores   = config->has_cores ? config->cores : 0;
     unsigned threads = config->has_threads ? config->threads : 0;
     unsigned maxcpus = config->has_maxcpus ? config->maxcpus : 0;
@@ -82,6 +87,7 @@ void machine_parse_smp_config(MachineState *ms,
     if ((config->has_cpus && config->cpus == 0) ||
         (config->has_sockets && config->sockets == 0) ||
         (config->has_dies && config->dies == 0) ||
+        (config->has_clusters && config->clusters == 0) ||
         (config->has_cores && config->cores == 0) ||
         (config->has_threads && config->threads == 0) ||
         (config->has_maxcpus && config->maxcpus == 0)) {
@@ -97,8 +103,13 @@ void machine_parse_smp_config(MachineState *ms,
         error_setg(errp, "dies not supported by this machine's CPU topology");
         return;
     }
+    if (!mc->smp_props.clusters_supported && clusters > 1) {
+        error_setg(errp, "clusters not supported by this machine's CPU topology");
+        return;
+    }
 
     dies = dies > 0 ? dies : 1;
+    clusters = clusters > 0 ? clusters : 1;
 
     /* compute missing values based on the provided ones */
     if (cpus == 0 && maxcpus == 0) {
@@ -113,41 +124,42 @@ void machine_parse_smp_config(MachineState *ms,
             if (sockets == 0) {
                 cores = cores > 0 ? cores : 1;
                 threads = threads > 0 ? threads : 1;
-                sockets = maxcpus / (dies * cores * threads);
+                sockets = maxcpus / (dies * clusters * cores * threads);
             } else if (cores == 0) {
                 threads = threads > 0 ? threads : 1;
-                cores = maxcpus / (sockets * dies * threads);
+                cores = maxcpus / (sockets * dies * clusters * threads);
             }
         } else {
             /* prefer cores over sockets since 6.2 */
             if (cores == 0) {
                 sockets = sockets > 0 ? sockets : 1;
                 threads = threads > 0 ? threads : 1;
-                cores = maxcpus / (sockets * dies * threads);
+                cores = maxcpus / (sockets * dies * clusters * threads);
             } else if (sockets == 0) {
                 threads = threads > 0 ? threads : 1;
-                sockets = maxcpus / (dies * cores * threads);
+                sockets = maxcpus / (dies * clusters * cores * threads);
             }
         }
 
         /* try to calculate omitted threads at last */
         if (threads == 0) {
-            threads = maxcpus / (sockets * dies * cores);
+            threads = maxcpus / (sockets * dies * clusters * cores);
         }
     }
 
-    maxcpus = maxcpus > 0 ? maxcpus : sockets * dies * cores * threads;
+    maxcpus = maxcpus > 0 ? maxcpus : sockets * dies * clusters * cores * threads;
     cpus = cpus > 0 ? cpus : maxcpus;
 
     ms->smp.cpus = cpus;
     ms->smp.sockets = sockets;
     ms->smp.dies = dies;
+    ms->smp.clusters = clusters;
     ms->smp.cores = cores;
     ms->smp.threads = threads;
     ms->smp.max_cpus = maxcpus;
 
     /* sanity-check of the computed topology */
-    if (sockets * dies * cores * threads != maxcpus) {
+    if (sockets * dies * clusters * cores * threads != maxcpus) {
         g_autofree char *topo_msg = cpu_hierarchy_to_string(ms);
         error_setg(errp, "Invalid CPU topology: "
                    "product of the hierarchy must match maxcpus: "
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 3993c534b9..a4a2df405f 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -742,10 +742,12 @@ static void machine_get_smp(Object *obj, Visitor *v, const char *name,
         .has_cpus = true, .cpus = ms->smp.cpus,
         .has_sockets = true, .sockets = ms->smp.sockets,
         .has_dies = true, .dies = ms->smp.dies,
+        .has_clusters = true, .clusters = ms->smp.clusters,
         .has_cores = true, .cores = ms->smp.cores,
         .has_threads = true, .threads = ms->smp.threads,
         .has_maxcpus = true, .maxcpus = ms->smp.max_cpus,
     };
+
     if (!visit_type_SMPConfiguration(v, name, &config, &error_abort)) {
         return;
     }
@@ -932,6 +934,7 @@ static void machine_initfn(Object *obj)
     ms->smp.max_cpus = mc->default_cpus;
     ms->smp.sockets = 1;
     ms->smp.dies = 1;
+    ms->smp.clusters = 1;
     ms->smp.cores = 1;
     ms->smp.threads = 1;
 }
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 7597cec440..f49a2578ea 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -129,10 +129,12 @@ typedef struct {
  * SMPCompatProps:
  * @prefer_sockets - whether sockets are preferred over cores in smp parsing
  * @dies_supported - whether dies are supported by the machine
+ * @clusters_supported - whether clusters are supported by the machine
  */
 typedef struct {
     bool prefer_sockets;
     bool dies_supported;
+    bool clusters_supported;
 } SMPCompatProps;
 
 /**
@@ -299,7 +301,8 @@ typedef struct DeviceMemoryState {
  * @cpus: the number of present logical processors on the machine
  * @sockets: the number of sockets on the machine
  * @dies: the number of dies in one socket
- * @cores: the number of cores in one die
+ * @clusters: the number of clusters in one die
+ * @cores: the number of cores in one cluster
  * @threads: the number of threads in one core
  * @max_cpus: the maximum number of logical processors on the machine
  */
@@ -307,6 +310,7 @@ typedef struct CpuTopology {
     unsigned int cpus;
     unsigned int sockets;
     unsigned int dies;
+    unsigned int clusters;
     unsigned int cores;
     unsigned int threads;
     unsigned int max_cpus;
diff --git a/qapi/machine.json b/qapi/machine.json
index f1839acf20..8faa51074e 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1396,7 +1396,9 @@
 #
 # @dies: number of dies per socket in the CPU topology
 #
-# @cores: number of cores per die in the CPU topology
+# @clusters: number of clusters per die in the CPU topology (since 7.0)
+#
+# @cores: number of cores per cluster in the CPU topology
 #
 # @threads: number of threads per core in the CPU topology
 #
@@ -1408,6 +1410,7 @@
      '*cpus': 'int',
      '*sockets': 'int',
      '*dies': 'int',
+     '*clusters': 'int',
      '*cores': 'int',
      '*threads': 'int',
      '*maxcpus': 'int' } }
diff --git a/qemu-options.hx b/qemu-options.hx
index 7a59db7764..0f26f7dad7 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -206,13 +206,14 @@ SRST
 ERST
 
 DEF("smp", HAS_ARG, QEMU_OPTION_smp,
-    "-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,cores=cores][,threads=threads]\n"
+    "-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]\n"
     "                set the number of initial CPUs to 'n' [default=1]\n"
     "                maxcpus= maximum number of total CPUs, including\n"
     "                offline CPUs for hotplug, etc\n"
     "                sockets= number of sockets on the machine board\n"
     "                dies= number of dies in one socket\n"
-    "                cores= number of cores in one die\n"
+    "                clusters= number of clusters in one die\n"
+    "                cores= number of cores in one cluster\n"
     "                threads= number of threads in one core\n"
     "Note: Different machines may have different subsets of the CPU topology\n"
     "      parameters supported, so the actual meaning of the supported parameters\n"
@@ -228,7 +229,7 @@ DEF("smp", HAS_ARG, QEMU_OPTION_smp,
     "      must be set as 1 in the purpose of correct parsing.\n",
     QEMU_ARCH_ALL)
 SRST
-``-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,cores=cores][,threads=threads]``
+``-smp [[cpus=]n][,maxcpus=maxcpus][,sockets=sockets][,dies=dies][,clusters=clusters][,cores=cores][,threads=threads]``
     Simulate a SMP system with '\ ``n``\ ' CPUs initially present on
     the machine type board. On boards supporting CPU hotplug, the optional
     '\ ``maxcpus``\ ' parameter can be set to enable further CPUs to be
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 620a1f1367..d9e4c619d3 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -726,6 +726,9 @@ static QemuOptsList qemu_smp_opts = {
         }, {
             .name = "dies",
             .type = QEMU_OPT_NUMBER,
+        }, {
+            .name = "clusters",
+            .type = QEMU_OPT_NUMBER,
         }, {
             .name = "cores",
             .type = QEMU_OPT_NUMBER,
-- 
Gitee


From 5e8a39a560ea58308f66d47639c0d5d2e704997f Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Tue, 28 Dec 2021 17:22:11 +0800
Subject: [PATCH 39/55] tests/unit/test-smp-parse: Add testcases for CPU
 clusters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add testcases for parsing of the four-level CPU topology hierarchy,
ie sockets/clusters/cores/threads, which will be supported on ARM
virt machines.

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211228092221.21068-5-wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 130 ++++++++++++++++++++++++++++++++++--
 1 file changed, 123 insertions(+), 7 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index b6df8137fc..331719bbc4 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -61,6 +61,20 @@
             .has_maxcpus = hf, .maxcpus = f,                  \
         }
 
+/*
+ * Currently a 4-level topology hierarchy is supported on ARM virt machines
+ *  -sockets/clusters/cores/threads
+ */
+#define SMP_CONFIG_WITH_CLUSTERS(ha, a, hb, b, hc, c, hd, d, he, e, hf, f) \
+        {                                                     \
+            .has_cpus     = ha, .cpus     = a,                \
+            .has_sockets  = hb, .sockets  = b,                \
+            .has_clusters = hc, .clusters = c,                \
+            .has_cores    = hd, .cores    = d,                \
+            .has_threads  = he, .threads  = e,                \
+            .has_maxcpus  = hf, .maxcpus  = f,                \
+        }
+
 /**
  * @config - the given SMP configuration
  * @expect_prefer_sockets - the expected parsing result for the
@@ -290,6 +304,10 @@ static const struct SMPTestData data_generic_invalid[] = {
         /* config: -smp 2,dies=2 */
         .config = SMP_CONFIG_WITH_DIES(T, 2, F, 0, T, 2, F, 0, F, 0, F, 0),
         .expect_error = "dies not supported by this machine's CPU topology",
+    }, {
+        /* config: -smp 2,clusters=2 */
+        .config = SMP_CONFIG_WITH_CLUSTERS(T, 2, F, 0, T, 2, F, 0, F, 0, F, 0),
+        .expect_error = "clusters not supported by this machine's CPU topology",
     }, {
         /* config: -smp 8,sockets=2,cores=4,threads=2,maxcpus=8 */
         .config = SMP_CONFIG_GENERIC(T, 8, T, 2, T, 4, T, 2, T, 8),
@@ -337,20 +355,40 @@ static const struct SMPTestData data_with_dies_invalid[] = {
     },
 };
 
+static const struct SMPTestData data_with_clusters_invalid[] = {
+    {
+        /* config: -smp 16,sockets=2,clusters=2,cores=4,threads=2,maxcpus=16 */
+        .config = SMP_CONFIG_WITH_CLUSTERS(T, 16, T, 2, T, 2, T, 4, T, 2, T, 16),
+        .expect_error = "Invalid CPU topology: "
+                        "product of the hierarchy must match maxcpus: "
+                        "sockets (2) * clusters (2) * cores (4) * threads (2) "
+                        "!= maxcpus (16)",
+    }, {
+        /* config: -smp 34,sockets=2,clusters=2,cores=4,threads=2,maxcpus=32 */
+        .config = SMP_CONFIG_WITH_CLUSTERS(T, 34, T, 2, T, 2, T, 4, T, 2, T, 32),
+        .expect_error = "Invalid CPU topology: "
+                        "maxcpus must be equal to or greater than smp: "
+                        "sockets (2) * clusters (2) * cores (4) * threads (2) "
+                        "== maxcpus (32) < smp_cpus (34)",
+    },
+};
+
 static char *smp_config_to_string(const SMPConfiguration *config)
 {
     return g_strdup_printf(
         "(SMPConfiguration) {\n"
-        "    .has_cpus    = %5s, cpus    = %" PRId64 ",\n"
-        "    .has_sockets = %5s, sockets = %" PRId64 ",\n"
-        "    .has_dies    = %5s, dies    = %" PRId64 ",\n"
-        "    .has_cores   = %5s, cores   = %" PRId64 ",\n"
-        "    .has_threads = %5s, threads = %" PRId64 ",\n"
-        "    .has_maxcpus = %5s, maxcpus = %" PRId64 ",\n"
+        "    .has_cpus     = %5s, cpus     = %" PRId64 ",\n"
+        "    .has_sockets  = %5s, sockets  = %" PRId64 ",\n"
+        "    .has_dies     = %5s, dies     = %" PRId64 ",\n"
+        "    .has_clusters = %5s, clusters = %" PRId64 ",\n"
+        "    .has_cores    = %5s, cores    = %" PRId64 ",\n"
+        "    .has_threads  = %5s, threads  = %" PRId64 ",\n"
+        "    .has_maxcpus  = %5s, maxcpus  = %" PRId64 ",\n"
         "}",
         config->has_cpus ? "true" : "false", config->cpus,
         config->has_sockets ? "true" : "false", config->sockets,
         config->has_dies ? "true" : "false", config->dies,
+        config->has_clusters ? "true" : "false", config->clusters,
         config->has_cores ? "true" : "false", config->cores,
         config->has_threads ? "true" : "false", config->threads,
         config->has_maxcpus ? "true" : "false", config->maxcpus);
@@ -363,11 +401,12 @@ static char *cpu_topology_to_string(const CpuTopology *topo)
         "    .cpus     = %u,\n"
         "    .sockets  = %u,\n"
         "    .dies     = %u,\n"
+        "    .clusters = %u,\n"
         "    .cores    = %u,\n"
         "    .threads  = %u,\n"
         "    .max_cpus = %u,\n"
         "}",
-        topo->cpus, topo->sockets, topo->dies,
+        topo->cpus, topo->sockets, topo->dies, topo->clusters,
         topo->cores, topo->threads, topo->max_cpus);
 }
 
@@ -391,6 +430,7 @@ static void check_parse(MachineState *ms, const SMPConfiguration *config,
             (ms->smp.cpus == expect_topo->cpus) &&
             (ms->smp.sockets == expect_topo->sockets) &&
             (ms->smp.dies == expect_topo->dies) &&
+            (ms->smp.clusters == expect_topo->clusters) &&
             (ms->smp.cores == expect_topo->cores) &&
             (ms->smp.threads == expect_topo->threads) &&
             (ms->smp.max_cpus == expect_topo->max_cpus)) {
@@ -472,6 +512,11 @@ static void unsupported_params_init(const MachineClass *mc, SMPTestData *data)
         data->expect_prefer_sockets.dies = 1;
         data->expect_prefer_cores.dies = 1;
     }
+
+    if (!mc->smp_props.clusters_supported) {
+        data->expect_prefer_sockets.clusters = 1;
+        data->expect_prefer_cores.clusters = 1;
+    }
 }
 
 static void machine_base_class_init(ObjectClass *oc, void *data)
@@ -491,6 +536,7 @@ static void machine_generic_valid_class_init(ObjectClass *oc, void *data)
     mc->max_cpus = MAX_CPUS;
 
     mc->smp_props.dies_supported = false;
+    mc->smp_props.clusters_supported = false;
 }
 
 static void machine_generic_invalid_class_init(ObjectClass *oc, void *data)
@@ -502,6 +548,7 @@ static void machine_generic_invalid_class_init(ObjectClass *oc, void *data)
     mc->max_cpus = 511;
 
     mc->smp_props.dies_supported = false;
+    mc->smp_props.clusters_supported = false;
 }
 
 static void machine_with_dies_class_init(ObjectClass *oc, void *data)
@@ -512,6 +559,18 @@ static void machine_with_dies_class_init(ObjectClass *oc, void *data)
     mc->max_cpus = MAX_CPUS;
 
     mc->smp_props.dies_supported = true;
+    mc->smp_props.clusters_supported = false;
+}
+
+static void machine_with_clusters_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->min_cpus = MIN_CPUS;
+    mc->max_cpus = MAX_CPUS;
+
+    mc->smp_props.clusters_supported = true;
+    mc->smp_props.dies_supported = false;
 }
 
 static void test_generic_valid(const void *opaque)
@@ -607,6 +666,56 @@ static void test_with_dies(const void *opaque)
     object_unref(obj);
 }
 
+static void test_with_clusters(const void *opaque)
+{
+    const char *machine_type = opaque;
+    Object *obj = object_new(machine_type);
+    MachineState *ms = MACHINE(obj);
+    MachineClass *mc = MACHINE_GET_CLASS(obj);
+    SMPTestData data = {};
+    unsigned int num_clusters = 2;
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(data_generic_valid); i++) {
+        data = data_generic_valid[i];
+        unsupported_params_init(mc, &data);
+
+        /* when clusters parameter is omitted, it will be set as 1 */
+        data.expect_prefer_sockets.clusters = 1;
+        data.expect_prefer_cores.clusters = 1;
+
+        smp_parse_test(ms, &data, true);
+
+        /* when clusters parameter is specified */
+        data.config.has_clusters = true;
+        data.config.clusters = num_clusters;
+        if (data.config.has_cpus) {
+            data.config.cpus *= num_clusters;
+        }
+        if (data.config.has_maxcpus) {
+            data.config.maxcpus *= num_clusters;
+        }
+
+        data.expect_prefer_sockets.clusters = num_clusters;
+        data.expect_prefer_sockets.cpus *= num_clusters;
+        data.expect_prefer_sockets.max_cpus *= num_clusters;
+        data.expect_prefer_cores.clusters = num_clusters;
+        data.expect_prefer_cores.cpus *= num_clusters;
+        data.expect_prefer_cores.max_cpus *= num_clusters;
+
+        smp_parse_test(ms, &data, true);
+    }
+
+    for (i = 0; i < ARRAY_SIZE(data_with_clusters_invalid); i++) {
+        data = data_with_clusters_invalid[i];
+        unsupported_params_init(mc, &data);
+
+        smp_parse_test(ms, &data, false);
+    }
+
+    object_unref(obj);
+}
+
 /* Type info of the tested machine */
 static const TypeInfo smp_machine_types[] = {
     {
@@ -628,6 +737,10 @@ static const TypeInfo smp_machine_types[] = {
         .name           = MACHINE_TYPE_NAME("smp-with-dies"),
         .parent         = TYPE_MACHINE,
         .class_init     = machine_with_dies_class_init,
+    }, {
+        .name           = MACHINE_TYPE_NAME("smp-with-clusters"),
+        .parent         = TYPE_MACHINE,
+        .class_init     = machine_with_clusters_class_init,
     }
 };
 
@@ -648,6 +761,9 @@ int main(int argc, char *argv[])
     g_test_add_data_func("/test-smp-parse/with_dies",
                          MACHINE_TYPE_NAME("smp-with-dies"),
                          test_with_dies);
+    g_test_add_data_func("/test-smp-parse/with_clusters",
+                         MACHINE_TYPE_NAME("smp-with-clusters"),
+                         test_with_clusters);
 
     g_test_run();
 
-- 
Gitee


From 77bca7d51e99f8ba4d11635ff9f51615739f4d55 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Tue, 28 Dec 2021 17:22:12 +0800
Subject: [PATCH 40/55] tests/unit/test-smp-parse: No need to explicitly zero
 MachineClass members
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The default value of the MachineClass members is 0, which
means we don't have to explicitly zero them. Also the value
of "mc->smp_props.prefer_sockets" will be taken care of by
smp_parse_test(), we don't necessarily need the statement
in machine_base_class_init() either.

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211228092221.21068-6-wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index 331719bbc4..72d83d1bbc 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -523,8 +523,6 @@ static void machine_base_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
 
-    mc->smp_props.prefer_sockets = true;
-
     mc->name = g_strdup(SMP_MACHINE_NAME);
 }
 
@@ -534,9 +532,6 @@ static void machine_generic_valid_class_init(ObjectClass *oc, void *data)
 
     mc->min_cpus = MIN_CPUS;
     mc->max_cpus = MAX_CPUS;
-
-    mc->smp_props.dies_supported = false;
-    mc->smp_props.clusters_supported = false;
 }
 
 static void machine_generic_invalid_class_init(ObjectClass *oc, void *data)
@@ -546,9 +541,6 @@ static void machine_generic_invalid_class_init(ObjectClass *oc, void *data)
     /* Force invalid min CPUs and max CPUs */
     mc->min_cpus = 2;
     mc->max_cpus = 511;
-
-    mc->smp_props.dies_supported = false;
-    mc->smp_props.clusters_supported = false;
 }
 
 static void machine_with_dies_class_init(ObjectClass *oc, void *data)
@@ -559,7 +551,6 @@ static void machine_with_dies_class_init(ObjectClass *oc, void *data)
     mc->max_cpus = MAX_CPUS;
 
     mc->smp_props.dies_supported = true;
-    mc->smp_props.clusters_supported = false;
 }
 
 static void machine_with_clusters_class_init(ObjectClass *oc, void *data)
@@ -570,7 +561,6 @@ static void machine_with_clusters_class_init(ObjectClass *oc, void *data)
     mc->max_cpus = MAX_CPUS;
 
     mc->smp_props.clusters_supported = true;
-    mc->smp_props.dies_supported = false;
 }
 
 static void test_generic_valid(const void *opaque)
-- 
Gitee


From 214511b1799b94cfd514a222d087bb888ed808ba Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Tue, 28 Dec 2021 17:22:13 +0800
Subject: [PATCH 41/55] tests/unit/test-smp-parse: Keep default MIN/MAX CPUs in
 machine_base_class_init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most machine types in test-smp-parse will be OK to have the default
MIN/MAX CPUs except "smp-generic-invalid", let's keep the default
values in machine_base_class_init which will be inherited. And if
we hope a different value for a specific machine, modify it in its
own initialization function.

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
Message-Id: <20211228092221.21068-7-wangyanan55@huawei.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
---
 tests/unit/test-smp-parse.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/tests/unit/test-smp-parse.c b/tests/unit/test-smp-parse.c
index 72d83d1bbc..fdc39a846c 100644
--- a/tests/unit/test-smp-parse.c
+++ b/tests/unit/test-smp-parse.c
@@ -523,15 +523,10 @@ static void machine_base_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
 
-    mc->name = g_strdup(SMP_MACHINE_NAME);
-}
-
-static void machine_generic_valid_class_init(ObjectClass *oc, void *data)
-{
-    MachineClass *mc = MACHINE_CLASS(oc);
-
     mc->min_cpus = MIN_CPUS;
     mc->max_cpus = MAX_CPUS;
+
+    mc->name = g_strdup(SMP_MACHINE_NAME);
 }
 
 static void machine_generic_invalid_class_init(ObjectClass *oc, void *data)
@@ -547,9 +542,6 @@ static void machine_with_dies_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
 
-    mc->min_cpus = MIN_CPUS;
-    mc->max_cpus = MAX_CPUS;
-
     mc->smp_props.dies_supported = true;
 }
 
@@ -557,9 +549,6 @@ static void machine_with_clusters_class_init(ObjectClass *oc, void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
 
-    mc->min_cpus = MIN_CPUS;
-    mc->max_cpus = MAX_CPUS;
-
     mc->smp_props.clusters_supported = true;
 }
 
@@ -718,7 +707,6 @@ static const TypeInfo smp_machine_types[] = {
     }, {
         .name           = MACHINE_TYPE_NAME("smp-generic-valid"),
         .parent         = TYPE_MACHINE,
-        .class_init     = machine_generic_valid_class_init,
     }, {
         .name           = MACHINE_TYPE_NAME("smp-generic-invalid"),
         .parent         = TYPE_MACHINE,
-- 
Gitee


From 1fab7ee365c8daccedd19d3a1be56babe36afcc6 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Fri, 7 Jan 2022 16:32:27 +0800
Subject: [PATCH 42/55] hw/arm/virt: Support CPU cluster on ARM virt machine

ARM64 machines like Kunpeng Family Server Chips have a level
of hardware topology in which a group of CPU cores share L3
cache tag or L2 cache. For example, Kunpeng 920 typically
has 6 or 8 clusters in each NUMA node (also represent range
of CPU die), and each cluster has 4 CPU cores. All clusters
share L3 cache data, but CPU cores in each cluster share a
local L3 tag.

Running a guest kernel with Cluster-Aware Scheduling on the
Hosts which have physical clusters, if we can design a vCPU
topology with cluster level for guest kernel and then have
a dedicated vCPU pinning, the guest will gain scheduling
performance improvement from cache affinity of CPU cluster.

So let's enable the support for this new parameter on ARM
virt machines. After this patch, we can define a 4-level
CPU hierarchy like: cpus=*,maxcpus=*,sockets=*,clusters=*,
cores=*,threads=*.

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Message-id: 20220107083232.16256-2-wangyanan55@huawei.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/arm/virt.c   |  1 +
 qemu-options.hx | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 3c972fdab0..6ca9cbe2cf 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2704,6 +2704,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
     hc->unplug_request = virt_machine_device_unplug_request_cb;
     hc->unplug = virt_machine_device_unplug_cb;
     mc->nvdimm_supported = true;
+    mc->smp_props.clusters_supported = true;
     mc->auto_enable_numa_with_memhp = true;
     mc->auto_enable_numa_with_memdev = true;
     mc->default_ram_id = "mach-virt.ram";
diff --git a/qemu-options.hx b/qemu-options.hx
index 0f26f7dad7..74d335e4c3 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -277,6 +277,16 @@ SRST
 
         -smp 16,sockets=2,dies=2,cores=2,threads=2,maxcpus=16
 
+    The following sub-option defines a CPU topology hierarchy (2 sockets
+    totally on the machine, 2 clusters per socket, 2 cores per cluster,
+    2 threads per core) for ARM virt machines which support sockets/clusters
+    /cores/threads. Some members of the option can be omitted but their values
+    will be automatically computed:
+
+    ::
+
+        -smp 16,sockets=2,clusters=2,cores=2,threads=2,maxcpus=16
+
     Historically preference was given to the coarsest topology parameters
     when computing missing values (ie sockets preferred over cores, which
     were preferred over threads), however, this behaviour is considered
-- 
Gitee


From 38d9ae59b9344f13198e6b4de03b04787bd6b89d Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Fri, 7 Jan 2022 16:32:28 +0800
Subject: [PATCH 43/55] hw/arm/virt: Support cluster level in DT cpu-map

Support one cluster level between core and physical package in the
cpu-map of Arm/virt devicetree. This is also consistent with Linux
Doc "Documentation/devicetree/bindings/cpu/cpu-topology.txt".

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Message-id: 20220107083232.16256-3-wangyanan55@huawei.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/arm/virt.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6ca9cbe2cf..ddcb73f714 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -434,9 +434,8 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
          * can contain several layers of clustering within a single physical
          * package and cluster nodes can be contained in parent cluster nodes.
          *
-         * Given that cluster is not yet supported in the vCPU topology,
-         * we currently generate one cluster node within each socket node
-         * by default.
+         * Note: currently we only support one layer of clustering within
+         * each physical package.
          */
         qemu_fdt_add_subnode(ms->fdt, "/cpus/cpu-map");
 
@@ -446,14 +445,16 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
 
             if (ms->smp.threads > 1) {
                 map_path = g_strdup_printf(
-                    "/cpus/cpu-map/socket%d/cluster0/core%d/thread%d",
-                    cpu / (ms->smp.cores * ms->smp.threads),
+                    "/cpus/cpu-map/socket%d/cluster%d/core%d/thread%d",
+                    cpu / (ms->smp.clusters * ms->smp.cores * ms->smp.threads),
+                    (cpu / (ms->smp.cores * ms->smp.threads)) % ms->smp.clusters,
                     (cpu / ms->smp.threads) % ms->smp.cores,
                     cpu % ms->smp.threads);
             } else {
                 map_path = g_strdup_printf(
-                    "/cpus/cpu-map/socket%d/cluster0/core%d",
-                    cpu / ms->smp.cores,
+                    "/cpus/cpu-map/socket%d/cluster%d/core%d",
+                    cpu / (ms->smp.clusters * ms->smp.cores),
+                    (cpu / ms->smp.cores) % ms->smp.clusters,
                     cpu % ms->smp.cores);
             }
             qemu_fdt_add_path(ms->fdt, map_path);
-- 
Gitee


From 66c935b435d90ef9c1ae4446c5edc07cbd8ba0ed Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Fri, 7 Jan 2022 16:32:29 +0800
Subject: [PATCH 44/55] hw/acpi/aml-build: Improve scalability of PPTT
 generation

Use g_queue APIs to reduce the nested loops and code indentation
with the processor hierarchy levels increasing. Consenquently,
it's more scalable to add new topology level to build_pptt.

No functional change intended.

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Message-id: 20220107083232.16256-4-wangyanan55@huawei.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/acpi/aml-build.c | 50 +++++++++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 18 deletions(-)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index b3b3310df3..6aaedca2e5 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -2001,7 +2001,10 @@ static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags,
 void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
                 const char *oem_id, const char *oem_table_id)
 {
-    int pptt_start = table_data->len;
+    GQueue *list = g_queue_new();
+    guint pptt_start = table_data->len;
+    guint parent_offset;
+    guint length, i;
     int uid = 0;
     int socket;
     AcpiTable table = { .sig = "PPTT", .rev = 2,
@@ -2010,9 +2013,8 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
     acpi_table_begin(&table, table_data);
 
     for (socket = 0; socket < ms->smp.sockets; socket++) {
-        uint32_t socket_offset = table_data->len - pptt_start;
-        int core;
-
+        g_queue_push_tail(list,
+            GUINT_TO_POINTER(table_data->len - pptt_start));
         build_processor_hierarchy_node(
             table_data,
             /*
@@ -2021,35 +2023,47 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
              */
             (1 << 0),
             0, socket, NULL, 0);
+    }
 
-        for (core = 0; core < ms->smp.cores; core++) {
-            uint32_t core_offset = table_data->len - pptt_start;
-            int thread;
+    length = g_queue_get_length(list);
+    for (i = 0; i < length; i++) {
+        int core;
 
+        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
+        for (core = 0; core < ms->smp.cores; core++) {
             if (ms->smp.threads > 1) {
+                g_queue_push_tail(list,
+                    GUINT_TO_POINTER(table_data->len - pptt_start));
                 build_processor_hierarchy_node(
                     table_data,
                     (0 << 0), /* not a physical package */
-                    socket_offset, core, NULL, 0);
-
-                for (thread = 0; thread < ms->smp.threads; thread++) {
-                    build_processor_hierarchy_node(
-                        table_data,
-                        (1 << 1) | /* ACPI Processor ID valid */
-                        (1 << 2) | /* Processor is a Thread */
-                        (1 << 3),  /* Node is a Leaf */
-                        core_offset, uid++, NULL, 0);
-                }
+                    parent_offset, core, NULL, 0);
             } else {
                 build_processor_hierarchy_node(
                     table_data,
                     (1 << 1) | /* ACPI Processor ID valid */
                     (1 << 3),  /* Node is a Leaf */
-                    socket_offset, uid++, NULL, 0);
+                    parent_offset, uid++, NULL, 0);
             }
         }
     }
 
+    length = g_queue_get_length(list);
+    for (i = 0; i < length; i++) {
+        int thread;
+
+        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
+        for (thread = 0; thread < ms->smp.threads; thread++) {
+            build_processor_hierarchy_node(
+                table_data,
+                (1 << 1) | /* ACPI Processor ID valid */
+                (1 << 2) | /* Processor is a Thread */
+                (1 << 3),  /* Node is a Leaf */
+                parent_offset, uid++, NULL, 0);
+        }
+    }
+
+    g_queue_free(list);
     acpi_table_end(linker, &table);
 }
 
-- 
Gitee


From 225034a72c803b8e3819cec22bc6fb8bfc9e7366 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Fri, 7 Jan 2022 16:32:30 +0800
Subject: [PATCH 45/55] tests/acpi/bios-tables-test: Allow changes to virt/PPTT
 file

List test/data/acpi/virt/PPTT as the expected files allowed to
be changed in tests/qtest/bios-tables-test-allowed-diff.h

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Acked-by: Ani Sinha <ani@anisinha.ca>
Message-id: 20220107083232.16256-5-wangyanan55@huawei.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 tests/qtest/bios-tables-test-allowed-diff.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
index dfb8523c8b..cb143a55a6 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1 +1,2 @@
 /* List of comma-separated changed AML files to ignore */
+"tests/data/acpi/virt/PPTT",
-- 
Gitee


From 9c16924ba0a77c34246b69e8b1faee219f266445 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Fri, 7 Jan 2022 16:32:31 +0800
Subject: [PATCH 46/55] hw/acpi/aml-build: Support cluster level in PPTT
 generation

Support CPU cluster topology level in generation of ACPI
Processor Properties Topology Table (PPTT).

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Message-id: 20220107083232.16256-6-wangyanan55@huawei.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 hw/acpi/aml-build.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 6aaedca2e5..bb2cad63b5 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -2001,6 +2001,7 @@ static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags,
 void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
                 const char *oem_id, const char *oem_table_id)
 {
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
     GQueue *list = g_queue_new();
     guint pptt_start = table_data->len;
     guint parent_offset;
@@ -2025,6 +2026,23 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
             0, socket, NULL, 0);
     }
 
+    if (mc->smp_props.clusters_supported) {
+        length = g_queue_get_length(list);
+        for (i = 0; i < length; i++) {
+            int cluster;
+
+            parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
+            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
+                g_queue_push_tail(list,
+                    GUINT_TO_POINTER(table_data->len - pptt_start));
+                build_processor_hierarchy_node(
+                    table_data,
+                    (0 << 0), /* not a physical package */
+                    parent_offset, cluster, NULL, 0);
+            }
+        }
+    }
+
     length = g_queue_get_length(list);
     for (i = 0; i < length; i++) {
         int core;
-- 
Gitee


From 6f89f06e686a61acf681038ac06732facc6e7b93 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Fri, 7 Jan 2022 16:32:32 +0800
Subject: [PATCH 47/55] tests/acpi/bios-table-test: Update expected virt/PPTT
 file

Run ./tests/data/acpi/rebuild-expected-aml.sh from build directory
to update PPTT binary. Also empty bios-tables-test-allowed-diff.h.

The disassembled differences between actual and expected PPTT:

 /*
  * Intel ACPI Component Architecture
  * AML/ASL+ Disassembler version 20200528 (64-bit version)
  * Copyright (c) 2000 - 2020 Intel Corporation
  *
- * Disassembly of tests/data/acpi/virt/PPTT, Tue Jan  4 12:51:11 2022
+ * Disassembly of /tmp/aml-2ZGOF1, Tue Jan  4 12:51:11 2022
  *
  * ACPI Data Table [PPTT]
  *
  * Format: [HexOffset DecimalOffset ByteLength]  FieldName : FieldValue
  */

 [000h 0000   4]                    Signature : "PPTT"    [Processor Properties Topology Table]
-[004h 0004   4]                 Table Length : 0000004C
+[004h 0004   4]                 Table Length : 00000060
 [008h 0008   1]                     Revision : 02
-[009h 0009   1]                     Checksum : A8
+[009h 0009   1]                     Checksum : 48
 [00Ah 0010   6]                       Oem ID : "BOCHS "
 [010h 0016   8]                 Oem Table ID : "BXPC    "
 [018h 0024   4]                 Oem Revision : 00000001
 [01Ch 0028   4]              Asl Compiler ID : "BXPC"
 [020h 0032   4]        Asl Compiler Revision : 00000001

 [024h 0036   1]                Subtable Type : 00 [Processor Hierarchy Node]
 [025h 0037   1]                       Length : 14
 [026h 0038   2]                     Reserved : 0000
 [028h 0040   4]        Flags (decoded below) : 00000001
                             Physical package : 1
                      ACPI Processor ID valid : 0
                        Processor is a thread : 0
                               Node is a leaf : 0
                     Identical Implementation : 0
 [02Ch 0044   4]                       Parent : 00000000
 [030h 0048   4]            ACPI Processor ID : 00000000
 [034h 0052   4]      Private Resource Number : 00000000

 [038h 0056   1]                Subtable Type : 00 [Processor Hierarchy Node]
 [039h 0057   1]                       Length : 14
 [03Ah 0058   2]                     Reserved : 0000
-[03Ch 0060   4]        Flags (decoded below) : 0000000A
+[03Ch 0060   4]        Flags (decoded below) : 00000000
                             Physical package : 0
-                     ACPI Processor ID valid : 1
+                     ACPI Processor ID valid : 0
                        Processor is a thread : 0
-                              Node is a leaf : 1
+                              Node is a leaf : 0
                     Identical Implementation : 0
 [040h 0064   4]                       Parent : 00000024
 [044h 0068   4]            ACPI Processor ID : 00000000
 [048h 0072   4]      Private Resource Number : 00000000

-Raw Table Data: Length 76 (0x4C)
+[04Ch 0076   1]                Subtable Type : 00 [Processor Hierarchy Node]
+[04Dh 0077   1]                       Length : 14
+[04Eh 0078   2]                     Reserved : 0000
+[050h 0080   4]        Flags (decoded below) : 0000000A
+                            Physical package : 0
+                     ACPI Processor ID valid : 1
+                       Processor is a thread : 0
+                              Node is a leaf : 1
+                    Identical Implementation : 0
+[054h 0084   4]                       Parent : 00000038
+[058h 0088   4]            ACPI Processor ID : 00000000
+[05Ch 0092   4]      Private Resource Number : 00000000
+
+Raw Table Data: Length 96 (0x60)

-    0000: 50 50 54 54 4C 00 00 00 02 A8 42 4F 43 48 53 20  // PPTTL.....BOCHS
+    0000: 50 50 54 54 60 00 00 00 02 48 42 4F 43 48 53 20  // PPTT`....HBOCHS
     0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43  // BXPC    ....BXPC
     0020: 01 00 00 00 00 14 00 00 01 00 00 00 00 00 00 00  // ................
-    0030: 00 00 00 00 00 00 00 00 00 14 00 00 0A 00 00 00  // ................
-    0040: 24 00 00 00 00 00 00 00 00 00 00 00              // $...........
+    0030: 00 00 00 00 00 00 00 00 00 14 00 00 00 00 00 00  // ................
+    0040: 24 00 00 00 00 00 00 00 00 00 00 00 00 14 00 00  // $...............
+    0050: 0A 00 00 00 38 00 00 00 00 00 00 00 00 00 00 00  // ....8...........

Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Ani Sinha <ani@anisinha.ca>
Message-id: 20220107083232.16256-7-wangyanan55@huawei.com
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 tests/data/acpi/virt/PPTT                   | Bin 76 -> 96 bytes
 tests/qtest/bios-tables-test-allowed-diff.h |   1 -
 2 files changed, 1 deletion(-)

diff --git a/tests/data/acpi/virt/PPTT b/tests/data/acpi/virt/PPTT
index 7a1258ecf123555b24462c98ccbb76b4ac1d0c2b..f56ea63b369a604877374ad696c396e796ab1c83 100644
GIT binary patch
delta 53
pcmeZC;0g!`2}xjJU|{l?$YrDgWH5jU5Ca567#O&Klm(arApowi1QY-O

delta 32
fcmYfB;R*-{3GrcIU|?D?k;`ae01J-_kOKn%ZFdCM

diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h
index cb143a55a6..dfb8523c8b 100644
--- a/tests/qtest/bios-tables-test-allowed-diff.h
+++ b/tests/qtest/bios-tables-test-allowed-diff.h
@@ -1,2 +1 @@
 /* List of comma-separated changed AML files to ignore */
-"tests/data/acpi/virt/PPTT",
-- 
Gitee


From ecc0eb93e8856321ad940a85970f0db14ab9f146 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Fri, 7 Jan 2022 14:38:44 +0100
Subject: [PATCH 48/55] softmmu/device_tree: Silence compiler warning with
 --enable-sanitizers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If I configure my build with --enable-sanitizers, my GCC (v8.5.0)
complains:

.../softmmu/device_tree.c: In function ‘qemu_fdt_add_path’:
.../softmmu/device_tree.c:560:18: error: ‘retval’ may be used uninitialized
 in this function [-Werror=maybe-uninitialized]
     int namelen, retval;
                  ^~~~~~

It's a false warning since the while loop is always executed at least
once (p has to be non-NULL, otherwise the derefence in the if-statement
earlier will crash). Thus let's switch to a do-while loop here instead
to make the compiler happy in all cases.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Message-id: 20220107133844.145039-1-thuth@redhat.com
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
---
 softmmu/device_tree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/softmmu/device_tree.c b/softmmu/device_tree.c
index 3965c834ca..9e96f5ecd5 100644
--- a/softmmu/device_tree.c
+++ b/softmmu/device_tree.c
@@ -564,7 +564,7 @@ int qemu_fdt_add_path(void *fdt, const char *path)
         return -1;
     }
 
-    while (p) {
+    do {
         name = p + 1;
         p = strchr(name, '/');
         namelen = p != NULL ? p - name : strlen(name);
@@ -584,7 +584,7 @@ int qemu_fdt_add_path(void *fdt, const char *path)
         }
 
         parent = retval;
-    }
+    } while (p);
 
     return retval;
 }
-- 
Gitee


From ebf1ac6c0ead3d6fbc32466028c286588333c1ea Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Tue, 11 Jan 2022 11:27:58 +0800
Subject: [PATCH 49/55] softmmu/device_tree: Remove redundant pointer
 assignment

The pointer assignment "const char *p = path;" in function
qemu_fdt_add_path is unnecessary. Let's remove it and just
use the "path" passed in. No functional change.

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Message-id: 20220111032758.27804-1-wangyanan55@huawei.com
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
---
 softmmu/device_tree.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/softmmu/device_tree.c b/softmmu/device_tree.c
index 9e96f5ecd5..8897c79ea4 100644
--- a/softmmu/device_tree.c
+++ b/softmmu/device_tree.c
@@ -556,7 +556,6 @@ int qemu_fdt_add_subnode(void *fdt, const char *name)
 int qemu_fdt_add_path(void *fdt, const char *path)
 {
     const char *name;
-    const char *p = path;
     int namelen, retval;
     int parent = 0;
 
@@ -565,9 +564,9 @@ int qemu_fdt_add_path(void *fdt, const char *path)
     }
 
     do {
-        name = p + 1;
-        p = strchr(name, '/');
-        namelen = p != NULL ? p - name : strlen(name);
+        name = path + 1;
+        path = strchr(name, '/');
+        namelen = path != NULL ? path - name : strlen(name);
 
         retval = fdt_subnode_offset_namelen(fdt, parent, name, namelen);
         if (retval < 0 && retval != -FDT_ERR_NOTFOUND) {
@@ -584,7 +583,7 @@ int qemu_fdt_add_path(void *fdt, const char *path)
         }
 
         parent = retval;
-    } while (p);
+    } while (path);
 
     return retval;
 }
-- 
Gitee


From c5cd762bb7513b6df07e26f4eb619dccbd1918b7 Mon Sep 17 00:00:00 2001
From: Ying Fang <fangying1@huawei.com>
Date: Tue, 8 Feb 2022 11:31:15 +0800
Subject: [PATCH 50/55] hw/arm64: add vcpu cache info support

Support VCPU Cache info by dtb and PPTT table, including L1, L2 and L3 Cache.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Honghao <honghao5@huawei.com>
Signed-off-by: Ying Fang <fangying1@huawei.com>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
---
 hw/acpi/aml-build.c         | 158 ++++++++++++++++++++++++++++++++++++
 hw/arm/virt.c               |  72 ++++++++++++++++
 include/hw/acpi/aml-build.h |  47 +++++++++++
 tests/data/acpi/virt/PPTT   | Bin 96 -> 208 bytes
 4 files changed, 277 insertions(+)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index bb2cad63b5..bebf49622b 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1994,6 +1994,163 @@ static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags,
     }
 }
 
+#ifdef __aarch64__
+/*
+ * ACPI spec, Revision 6.3
+ * 5.2.29.2 Cache Type Structure (Type 1)
+ */
+static void build_cache_hierarchy_node(GArray *tbl, uint32_t next_level,
+                                       uint32_t cache_type)
+{
+    build_append_byte(tbl, 1);
+    build_append_byte(tbl, 24);
+    build_append_int_noprefix(tbl, 0, 2);
+    build_append_int_noprefix(tbl, 127, 4);
+    build_append_int_noprefix(tbl, next_level, 4);
+
+    switch (cache_type) {
+    case ARM_L1D_CACHE: /* L1 dcache info */
+        build_append_int_noprefix(tbl, ARM_L1DCACHE_SIZE, 4);
+        build_append_int_noprefix(tbl, ARM_L1DCACHE_SETS, 4);
+        build_append_byte(tbl, ARM_L1DCACHE_ASSOCIATIVITY);
+        build_append_byte(tbl, ARM_L1DCACHE_ATTRIBUTES);
+        build_append_int_noprefix(tbl, ARM_L1DCACHE_LINE_SIZE, 2);
+        break;
+    case ARM_L1I_CACHE: /* L1 icache info */
+        build_append_int_noprefix(tbl, ARM_L1ICACHE_SIZE, 4);
+        build_append_int_noprefix(tbl, ARM_L1ICACHE_SETS, 4);
+        build_append_byte(tbl, ARM_L1ICACHE_ASSOCIATIVITY);
+        build_append_byte(tbl, ARM_L1ICACHE_ATTRIBUTES);
+        build_append_int_noprefix(tbl, ARM_L1ICACHE_LINE_SIZE, 2);
+        break;
+    case ARM_L2_CACHE: /* L2 cache info */
+        build_append_int_noprefix(tbl, ARM_L2CACHE_SIZE, 4);
+        build_append_int_noprefix(tbl, ARM_L2CACHE_SETS, 4);
+        build_append_byte(tbl, ARM_L2CACHE_ASSOCIATIVITY);
+        build_append_byte(tbl, ARM_L2CACHE_ATTRIBUTES);
+        build_append_int_noprefix(tbl, ARM_L2CACHE_LINE_SIZE, 2);
+        break;
+    case ARM_L3_CACHE: /* L3 cache info */
+        build_append_int_noprefix(tbl, ARM_L3CACHE_SIZE, 4);
+        build_append_int_noprefix(tbl, ARM_L3CACHE_SETS, 4);
+        build_append_byte(tbl, ARM_L3CACHE_ASSOCIATIVITY);
+        build_append_byte(tbl, ARM_L3CACHE_ATTRIBUTES);
+        build_append_int_noprefix(tbl, ARM_L3CACHE_LINE_SIZE, 2);
+        break;
+    default:
+        build_append_int_noprefix(tbl, 0, 4);
+        build_append_int_noprefix(tbl, 0, 4);
+        build_append_byte(tbl, 0);
+        build_append_byte(tbl, 0);
+        build_append_int_noprefix(tbl, 0, 2);
+    }
+}
+
+/*
+ * ACPI spec, Revision 6.3
+ * 5.2.29 Processor Properties Topology Table (PPTT)
+ */
+void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
+                const char *oem_id, const char *oem_table_id)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+    GQueue *list = g_queue_new();
+    guint pptt_start = table_data->len;
+    guint parent_offset;
+    guint length, i;
+    int uid = 0;
+    int socket;
+    AcpiTable table = { .sig = "PPTT", .rev = 2,
+                        .oem_id = oem_id, .oem_table_id = oem_table_id };
+
+    acpi_table_begin(&table, table_data);
+
+    for (socket = 0; socket < ms->smp.sockets; socket++) {
+        uint32_t l3_cache_offset = table_data->len - pptt_start;
+        build_cache_hierarchy_node(table_data, 0, ARM_L3_CACHE);
+
+        g_queue_push_tail(list,
+            GUINT_TO_POINTER(table_data->len - pptt_start));
+        build_processor_hierarchy_node(
+            table_data,
+            /*
+             * Physical package - represents the boundary
+             * of a physical package
+             */
+            (1 << 0),
+            0, socket, &l3_cache_offset, 1);
+    }
+
+    if (mc->smp_props.clusters_supported) {
+        length = g_queue_get_length(list);
+        for (i = 0; i < length; i++) {
+            int cluster;
+
+            parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
+            for (cluster = 0; cluster < ms->smp.clusters; cluster++) {
+                g_queue_push_tail(list,
+                    GUINT_TO_POINTER(table_data->len - pptt_start));
+                build_processor_hierarchy_node(
+                    table_data,
+                    (0 << 0), /* not a physical package */
+                    parent_offset, cluster, NULL, 0);
+            }
+        }
+    }
+
+    length = g_queue_get_length(list);
+    for (i = 0; i < length; i++) {
+        int core;
+
+        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
+        for (core = 0; core < ms->smp.cores; core++) {
+            uint32_t priv_rsrc[3] = {};
+            priv_rsrc[0] = table_data->len - pptt_start; /* L2 cache offset */
+            build_cache_hierarchy_node(table_data, 0, ARM_L2_CACHE);
+
+            priv_rsrc[1] = table_data->len - pptt_start; /* L1 dcache offset */
+            build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1D_CACHE);
+
+            priv_rsrc[2] = table_data->len - pptt_start; /* L1 icache offset */
+            build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1I_CACHE);
+
+            if (ms->smp.threads > 1) {
+                g_queue_push_tail(list,
+                    GUINT_TO_POINTER(table_data->len - pptt_start));
+                build_processor_hierarchy_node(
+                    table_data,
+                    (0 << 0), /* not a physical package */
+                    parent_offset, core, priv_rsrc, 3);
+            } else {
+                build_processor_hierarchy_node(
+                    table_data,
+                    (1 << 1) | /* ACPI Processor ID valid */
+                    (1 << 3),  /* Node is a Leaf */
+                    parent_offset, uid++, priv_rsrc, 3);
+            }
+        }
+    }
+
+    length = g_queue_get_length(list);
+    for (i = 0; i < length; i++) {
+        int thread;
+
+        parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list));
+        for (thread = 0; thread < ms->smp.threads; thread++) {
+            build_processor_hierarchy_node(
+                table_data,
+                (1 << 1) | /* ACPI Processor ID valid */
+                (1 << 2) | /* Processor is a Thread */
+                (1 << 3),  /* Node is a Leaf */
+                parent_offset, uid++, NULL, 0);
+        }
+    }
+
+    g_queue_free(list);
+    acpi_table_end(linker, &table);
+}
+
+#else
 /*
  * ACPI spec, Revision 6.3
  * 5.2.29 Processor Properties Topology Table (PPTT)
@@ -2084,6 +2241,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
     g_queue_free(list);
     acpi_table_end(linker, &table);
 }
+#endif
 
 /* build rev1/rev3/rev5.1 FADT */
 void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f,
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ddcb73f714..529c0d38b6 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -350,6 +350,72 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms)
                        GIC_FDT_IRQ_TYPE_PPI, ARCH_TIMER_NS_EL2_IRQ, irqflags);
 }
 
+static void fdt_add_l3cache_nodes(const VirtMachineState *vms)
+{
+    int i;
+    const MachineState *ms = MACHINE(vms);
+    int cpus_per_socket = ms->smp.clusters * ms->smp.cores * ms->smp.threads;
+    int sockets = (ms->smp.cpus + cpus_per_socket - 1) / cpus_per_socket;
+
+    for (i = 0; i < sockets; i++) {
+        char *nodename = g_strdup_printf("/cpus/l3-cache%d", i);
+
+        qemu_fdt_add_subnode(ms->fdt, nodename);
+        qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "cache");
+        qemu_fdt_setprop_string(ms->fdt, nodename, "cache-unified", "true");
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-level", 3);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", 0x2000000);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", 128);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", 2048);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
+                              qemu_fdt_alloc_phandle(ms->fdt));
+        g_free(nodename);
+    }
+}
+
+static void fdt_add_l2cache_nodes(const VirtMachineState *vms)
+{
+    const MachineState *ms = MACHINE(vms);
+    int cpus_per_socket = ms->smp.clusters * ms->smp.cores * ms->smp.threads;
+    int cpu;
+
+    for (cpu = 0; cpu < ms->smp.cpus; cpu++) {
+        char *next_path = g_strdup_printf("/cpus/l3-cache%d",
+                                          cpu / cpus_per_socket);
+        char *nodename = g_strdup_printf("/cpus/l2-cache%d", cpu);
+
+        qemu_fdt_add_subnode(ms->fdt, nodename);
+        qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "cache");
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", 0x80000);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", 64);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", 1024);
+        qemu_fdt_setprop_phandle(ms->fdt, nodename, "next-level-cache",
+                                 next_path);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
+                              qemu_fdt_alloc_phandle(ms->fdt));
+
+        g_free(next_path);
+        g_free(nodename);
+    }
+}
+
+static void fdt_add_l1cache_prop(const VirtMachineState *vms,
+                                 char *nodename, int cpu)
+{
+        const MachineState *ms = MACHINE(vms);
+        char *cachename = g_strdup_printf("/cpus/l2-cache%d", cpu);
+
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-size", 0x10000);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-line-size", 64);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-sets", 256);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-size", 0x10000);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-line-size", 64);
+        qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-sets", 256);
+        qemu_fdt_setprop_phandle(ms->fdt, nodename, "next-level-cache",
+                                 cachename);
+        g_free(cachename);
+}
+
 static void fdt_add_cpu_nodes(const VirtMachineState *vms)
 {
     int cpu;
@@ -384,6 +450,11 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
     qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", addr_cells);
     qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0);
 
+    if (!vmc->no_cpu_topology) {
+        fdt_add_l3cache_nodes(vms);
+        fdt_add_l2cache_nodes(vms);
+    }
+
     for (cpu = smp_cpus - 1; cpu >= 0; cpu--) {
         char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu);
         ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu));
@@ -413,6 +484,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms)
         }
 
         if (!vmc->no_cpu_topology) {
+            fdt_add_l1cache_prop(vms, nodename, cpu);
             qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle",
                                   qemu_fdt_alloc_phandle(ms->fdt));
         }
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 8346003a22..8e8ad8029e 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -221,6 +221,53 @@ struct AcpiBuildTables {
     BIOSLinker *linker;
 } AcpiBuildTables;
 
+#ifdef __aarch64__
+/* Definitions of the hardcoded cache info*/
+
+typedef enum {
+    ARM_L1D_CACHE,
+    ARM_L1I_CACHE,
+    ARM_L2_CACHE,
+    ARM_L3_CACHE
+} ArmCacheType;
+
+/* L1 data cache: */
+#define ARM_L1DCACHE_SIZE 65536
+#define ARM_L1DCACHE_SETS 256
+#define ARM_L1DCACHE_ASSOCIATIVITY 4
+#define ARM_L1DCACHE_ATTRIBUTES 2
+#define ARM_L1DCACHE_LINE_SIZE 64
+
+/* L1 instruction cache: */
+#define ARM_L1ICACHE_SIZE 65536
+#define ARM_L1ICACHE_SETS 256
+#define ARM_L1ICACHE_ASSOCIATIVITY 4
+#define ARM_L1ICACHE_ATTRIBUTES 4
+#define ARM_L1ICACHE_LINE_SIZE 64
+
+/* Level 2 unified cache: */
+#define ARM_L2CACHE_SIZE 524288
+#define ARM_L2CACHE_SETS 1024
+#define ARM_L2CACHE_ASSOCIATIVITY 8
+#define ARM_L2CACHE_ATTRIBUTES 10
+#define ARM_L2CACHE_LINE_SIZE 64
+
+/* Level 3 unified cache: */
+#define ARM_L3CACHE_SIZE 33554432
+#define ARM_L3CACHE_SETS 2048
+#define ARM_L3CACHE_ASSOCIATIVITY 15
+#define ARM_L3CACHE_ATTRIBUTES 10
+#define ARM_L3CACHE_LINE_SIZE 128
+
+struct offset_status {
+    uint32_t parent;
+    uint32_t l2_offset;
+    uint32_t l1d_offset;
+    uint32_t l1i_offset;
+};
+
+#endif
+
 typedef
 struct CrsRangeEntry {
     uint64_t base;
diff --git a/tests/data/acpi/virt/PPTT b/tests/data/acpi/virt/PPTT
index f56ea63b369a604877374ad696c396e796ab1c83..b89b2a9c71e0bc2713fc38f5de68fbc39b6302cb 100644
GIT binary patch
literal 208
zcmWFt2no5sz`($y>E!S15v<@85#X!<1dKp25F11@N-!|g18FE=V&Gt4;OA;!U;v7P
z<Y55BRsnHDAR;yp3QVG!0W^Sxfq{d|0d95%L>1UT7A73xEDj6|3JeTfK(!%23<AtR
M4AR>G#4~^x046;S0RR91

literal 96
zcmWFt2nk7GU|?YKaPoKd2v%^42yj*a0!E-1hz+6{L>L&rG>8oYKrs+dflv?<D8>b3
ISs;l402v7f0RR91

-- 
Gitee


From e7e28e79988eb671051d0d2af0eb010314c83d41 Mon Sep 17 00:00:00 2001
From: Ying Fang <fangying1@huawei.com>
Date: Tue, 8 Feb 2022 21:01:09 +0800
Subject: [PATCH 51/55] arm64: Add the cpufreq device to show cpufreq info to
 guest

On ARM64 platform, cpu frequency is retrieved via ACPI CPPC.
A virtual cpufreq device based on ACPI CPPC is created to
present cpu frequency info to the guest.

The default frequency is set to host cpu nominal frequency,
which is obtained from the host CPPC sysfs. Other performance
data are set to the same value, since we don't support guest
performance scaling here.

Performance counters are also not emulated and they simply
return 1 if read, and guest should fallback to use desired
performance value as the current performance.

Guest kernel version above 4.18 is required to make it work.

This series is backported from:
https://patchwork.kernel.org/cover/11379943/

Signed-off-by: Ying Fang <fangying1@huawei.com>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
---
 configs/devices/aarch64-softmmu/default.mak |   1 +
 hw/acpi/aml-build.c                         |  22 ++
 hw/acpi/cpufreq.c                           | 283 ++++++++++++++++++++
 hw/acpi/meson.build                         |   1 +
 hw/arm/virt-acpi-build.c                    |  77 +++++-
 hw/arm/virt.c                               |  13 +
 hw/char/Kconfig                             |   4 +
 include/hw/acpi/acpi-defs.h                 |  38 +++
 include/hw/acpi/aml-build.h                 |   3 +
 include/hw/arm/virt.h                       |   1 +
 tests/data/acpi/virt/DSDT                   | Bin 5196 -> 5669 bytes
 tests/data/acpi/virt/DSDT.memhp             | Bin 6557 -> 7030 bytes
 tests/data/acpi/virt/DSDT.numamem           | Bin 5196 -> 5669 bytes
 tests/data/acpi/virt/DSDT.pxb               | Bin 7679 -> 8152 bytes
 14 files changed, 441 insertions(+), 2 deletions(-)
 create mode 100644 hw/acpi/cpufreq.c

diff --git a/configs/devices/aarch64-softmmu/default.mak b/configs/devices/aarch64-softmmu/default.mak
index cf43ac8da1..c7a710a0f1 100644
--- a/configs/devices/aarch64-softmmu/default.mak
+++ b/configs/devices/aarch64-softmmu/default.mak
@@ -6,3 +6,4 @@ include ../arm-softmmu/default.mak
 CONFIG_XLNX_ZYNQMP_ARM=y
 CONFIG_XLNX_VERSAL=y
 CONFIG_SBSA_REF=y
+CONFIG_CPUFREQ=y
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index bebf49622b..c4edaafa4a 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1554,6 +1554,28 @@ Aml *aml_sleep(uint64_t msec)
     return var;
 }
 
+/* ACPI 5.0b: 6.4.3.7 Generic Register Descriptor */
+Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width,
+                          uint8_t reg_offset, AmlAccessType type, uint64_t addr)
+{
+    int i;
+    Aml *var = aml_alloc();
+    build_append_byte(var->buf, 0x82); /* Generic Register Descriptor */
+    build_append_byte(var->buf, 0x0C); /* Length, bits[7:0] value = 0x0C */
+    build_append_byte(var->buf, 0);    /* Length, bits[15:8] value = 0 */
+    build_append_byte(var->buf, rs);   /* Address Space ID */
+    build_append_byte(var->buf, reg_width);   /* Register Bit Width */
+    build_append_byte(var->buf, reg_offset);  /* Register Bit Offset */
+    build_append_byte(var->buf, type);        /* Access Size */
+
+    /* Register address */
+    for (i = 0; i < 8; i++) {
+        build_append_byte(var->buf, extract64(addr, i * 8, 8));
+    }
+
+    return var;
+}
+
 static uint8_t Hex2Byte(const char *src)
 {
     int hi, lo;
diff --git a/hw/acpi/cpufreq.c b/hw/acpi/cpufreq.c
new file mode 100644
index 0000000000..a84db490b3
--- /dev/null
+++ b/hw/acpi/cpufreq.c
@@ -0,0 +1,283 @@
+/*
+ * ACPI CPPC register device
+ *
+ * Support for showing CPU frequency in guest OS.
+ *
+ * Copyright (c) 2019 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "chardev/char.h"
+#include "qemu/log.h"
+#include "trace.h"
+#include "qemu/option.h"
+#include "sysemu/sysemu.h"
+#include "hw/acpi/acpi-defs.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "hw/boards.h"
+
+#define TYPE_CPUFREQ "cpufreq"
+#define CPUFREQ(obj) OBJECT_CHECK(CpuhzState, (obj), TYPE_CPUFREQ)
+#define NOMINAL_FREQ_FILE "/sys/devices/system/cpu/cpu0/acpi_cppc/nominal_freq"
+#define CPU_MAX_FREQ_FILE "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"
+#define HZ_MAX_LENGTH 1024
+#define MAX_SUPPORT_SPACE 0x10000
+
+/*
+ * Since Hi1616 will not support CPPC, we simply use its nominal frequency as
+ * the default.
+ */
+#define DEFAULT_HZ 2400
+
+int cppc_regs_offset[CPPC_REG_COUNT] = {
+    [HIGHEST_PERF] = 0,
+    [NOMINAL_PERF] = 4,
+    [LOW_NON_LINEAR_PERF] = 8,
+    [LOWEST_PERF] = 12,
+    [GUARANTEED_PERF] = 16,
+    [DESIRED_PERF] = 20,
+    [MIN_PERF] = -1,
+    [MAX_PERF] = -1,
+    [PERF_REDUC_TOLERANCE] = -1,
+    [TIME_WINDOW] = -1,
+    [CTR_WRAP_TIME] = -1,
+    [REFERENCE_CTR] = 24,
+    [DELIVERED_CTR] = 32,
+    [PERF_LIMITED] = 40,
+    [ENABLE] = -1,
+    [AUTO_SEL_ENABLE] = -1,
+    [AUTO_ACT_WINDOW] = -1,
+    [ENERGY_PERF] = -1,
+    [REFERENCE_PERF] = -1,
+    [LOWEST_FREQ] = 44,
+    [NOMINAL_FREQ] = 48,
+};
+
+typedef struct CpuhzState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    uint32_t HighestPerformance;
+    uint32_t NominalPerformance;
+    uint32_t LowestNonlinearPerformance;
+    uint32_t LowestPerformance;
+    uint32_t GuaranteedPerformance;
+    uint32_t DesiredPerformance;
+    uint64_t ReferencePerformanceCounter;
+    uint64_t DeliveredPerformanceCounter;
+    uint32_t PerformanceLimited;
+    uint32_t LowestFreq;
+    uint32_t NominalFreq;
+    uint32_t reg_size;
+} CpuhzState;
+
+
+static uint64_t cpufreq_read(void *opaque, hwaddr offset, unsigned size)
+{
+    CpuhzState *s = (CpuhzState *)opaque;
+    uint64_t r;
+    uint64_t n;
+
+    MachineState *ms = MACHINE(qdev_get_machine());
+    unsigned int smp_cpus = ms->smp.cpus;
+
+    if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) {
+        warn_report("cpufreq_read: offset 0x%lx out of range", offset);
+        return 0;
+    }
+
+    n = offset % CPPC_REG_PER_CPU_STRIDE;
+    switch (n) {
+    case 0:
+        r = s->HighestPerformance;
+        break;
+    case 4:
+        r = s->NominalPerformance;
+        break;
+    case 8:
+        r = s->LowestNonlinearPerformance;
+        break;
+    case 12:
+        r = s->LowestPerformance;
+        break;
+    case 16:
+        r = s->GuaranteedPerformance;
+        break;
+    case 20:
+        r = s->DesiredPerformance;
+        break;
+    /*
+     * We don't have real counters and it is hard to emulate, so always set the
+     * counter value to 1 to rely on Linux to use the DesiredPerformance value
+     * directly.
+     */
+    case 24:
+        r = s->ReferencePerformanceCounter;
+        break;
+    /*
+     * Guest may still access the register by 32bit; add the process to
+     * eliminate unnecessary warnings.
+     */
+    case 28:
+        r = s->ReferencePerformanceCounter >> 32;
+        break;
+    case 32:
+        r = s->DeliveredPerformanceCounter;
+        break;
+    case 36:
+        r = s->DeliveredPerformanceCounter >> 32;
+        break;
+
+    case 40:
+        r = s->PerformanceLimited;
+        break;
+    case 44:
+        r = s->LowestFreq;
+        break;
+    case 48:
+        r = s->NominalFreq;
+        break;
+    default:
+        error_printf("cpufreq_read: Bad offset 0x%lx\n", offset);
+        r = 0;
+        break;
+    }
+    return r;
+}
+
+static void cpufreq_write(void *opaque, hwaddr offset,
+                           uint64_t value, unsigned size)
+{
+    uint64_t n;
+    MachineState *ms = MACHINE(qdev_get_machine());
+    unsigned int smp_cpus = ms->smp.cpus;
+
+    if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) {
+        error_printf("cpufreq_write: offset 0x%lx out of range", offset);
+        return;
+    }
+
+    n = offset % CPPC_REG_PER_CPU_STRIDE;
+
+    switch (n) {
+    case 20:
+        break;
+    default:
+        error_printf("cpufreq_write: Bad offset 0x%lx\n", offset);
+    }
+}
+
+static uint32_t CPPC_Read(const char *hostpath)
+{
+    int fd;
+    char buffer[HZ_MAX_LENGTH] = { 0 };
+    uint64_t hz;
+    int len;
+    const char *endptr = NULL;
+    int ret;
+
+    fd = qemu_open_old(hostpath, O_RDONLY);
+    if (fd < 0) {
+        return 0;
+    }
+
+    len = read(fd, buffer, HZ_MAX_LENGTH);
+    qemu_close(fd);
+    if (len <= 0) {
+        return 0;
+    }
+    ret = qemu_strtoul(buffer, &endptr, 0, &hz);
+    if (ret < 0) {
+        return 0;
+    }
+    return (uint32_t)hz;
+}
+
+static const MemoryRegionOps cpufreq_ops = {
+    .read = cpufreq_read,
+    .write = cpufreq_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void hz_init(CpuhzState *s)
+{
+    uint32_t hz;
+
+    hz = CPPC_Read(NOMINAL_FREQ_FILE);
+    if (hz == 0) {
+        hz = CPPC_Read(CPU_MAX_FREQ_FILE);
+        if (hz == 0) {
+            hz = DEFAULT_HZ;
+        } else {
+            /* Value in CpuMaxFrequency is in KHz unit; convert to MHz */
+            hz = hz / 1000;
+        }
+    }
+
+    s->HighestPerformance = hz;
+    s->NominalPerformance = hz;
+    s->LowestNonlinearPerformance = hz;
+    s->LowestPerformance = hz;
+    s->GuaranteedPerformance = hz;
+    s->DesiredPerformance = hz;
+    s->ReferencePerformanceCounter = 1;
+    s->DeliveredPerformanceCounter = 1;
+    s->PerformanceLimited = 0;
+    s->LowestFreq = hz;
+    s->NominalFreq = hz;
+}
+
+static void cpufreq_init(Object *obj)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    CpuhzState *s = CPUFREQ(obj);
+
+    MachineState *ms = MACHINE(qdev_get_machine());
+    unsigned int smp_cpus = ms->smp.cpus;
+
+    s->reg_size = smp_cpus * CPPC_REG_PER_CPU_STRIDE;
+    if (s->reg_size > MAX_SUPPORT_SPACE) {
+        error_report("Required space 0x%x excesses the max support 0x%x",
+                 s->reg_size, MAX_SUPPORT_SPACE);
+        goto err_end;
+    }
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &cpufreq_ops, s, "cpufreq",
+                          s->reg_size);
+    sysbus_init_mmio(sbd, &s->iomem);
+    hz_init(s);
+    return;
+
+err_end:
+    /* Set desired perf register offset to -1 to indicate no support for CPPC */
+    cppc_regs_offset[DESIRED_PERF] = -1;
+}
+
+static const TypeInfo cpufreq_arm_info = {
+    .name          = TYPE_CPUFREQ,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(CpuhzState),
+    .instance_init = cpufreq_init,
+};
+
+static void cpufreq_register_types(void)
+{
+    type_register_static(&cpufreq_arm_info);
+}
+
+type_init(cpufreq_register_types)
diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build
index adf6347bc4..448ea6afb4 100644
--- a/hw/acpi/meson.build
+++ b/hw/acpi/meson.build
@@ -25,6 +25,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_X86_ICH', if_true: files('ich9.c', 'tco.c'))
 acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: files('ipmi-stub.c'))
 acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c'))
 acpi_ss.add(when: 'CONFIG_TPM', if_true: files('tpm.c'))
+acpi_ss.add(when: 'CONFIG_CPUFREQ', if_true: files('cpufreq.c'))
 softmmu_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c', 'ghes-stub.c'))
 softmmu_ss.add_all(when: 'CONFIG_ACPI', if_true: acpi_ss)
 softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-stub.c', 'aml-build-stub.c',
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 674f902652..1ca705654b 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -60,7 +60,68 @@
 
 #define ACPI_BUILD_TABLE_SIZE             0x20000
 
-static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms)
+static void acpi_dsdt_add_psd(Aml *dev, int cpus)
+{
+    Aml *pkg;
+    Aml *sub;
+
+    sub = aml_package(5);
+    aml_append(sub, aml_int(5));
+    aml_append(sub, aml_int(0));
+    /* Assume all vCPUs belong to the same domain */
+    aml_append(sub, aml_int(0));
+    /* SW_ANY: OSPM coordinate, initiate on any processor */
+    aml_append(sub, aml_int(0xFD));
+    aml_append(sub, aml_int(cpus));
+
+    pkg = aml_package(1);
+    aml_append(pkg, sub);
+
+    aml_append(dev, aml_name_decl("_PSD", pkg));
+}
+
+static void acpi_dsdt_add_cppc(Aml *dev, uint64_t cpu_base, int *regs_offset)
+{
+    Aml *cpc;
+    int i;
+
+    /* Use version 3 of CPPC table from ACPI 6.3 */
+    cpc = aml_package(23);
+    aml_append(cpc, aml_int(23));
+    aml_append(cpc, aml_int(3));
+
+    for (i = 0; i < CPPC_REG_COUNT; i++) {
+        Aml *res;
+        uint8_t reg_width;
+        uint8_t acc_type;
+        uint64_t addr;
+
+        if (regs_offset[i] == -1) {
+            reg_width = 0;
+            acc_type = AML_ANY_ACC;
+            addr = 0;
+        } else {
+            addr = cpu_base + regs_offset[i];
+            if (i == REFERENCE_CTR || i == DELIVERED_CTR) {
+                reg_width = 64;
+                acc_type = AML_QWORD_ACC;
+            } else {
+                reg_width = 32;
+                acc_type = AML_DWORD_ACC;
+            }
+        }
+
+        res = aml_resource_template();
+        aml_append(res, aml_generic_register(AML_SYSTEM_MEMORY, reg_width, 0,
+                                             acc_type, addr));
+        aml_append(cpc, res);
+    }
+
+    aml_append(dev, aml_name_decl("_CPC", cpc));
+}
+
+static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms,
+                               const MemMapEntry *cppc_memmap)
 {
     MachineState *ms = MACHINE(vms);
     uint16_t i;
@@ -69,6 +130,18 @@ static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms)
         Aml *dev = aml_device("C%.03X", i);
         aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007")));
         aml_append(dev, aml_name_decl("_UID", aml_int(i)));
+
+        /*
+         * Append _CPC and _PSD to support CPU frequence show
+         * Check CPPC available by DESIRED_PERF register
+         */
+        if (cppc_regs_offset[DESIRED_PERF] != -1) {
+            acpi_dsdt_add_cppc(dev,
+                               cppc_memmap->base + i * CPPC_REG_PER_CPU_STRIDE,
+                               cppc_regs_offset);
+            acpi_dsdt_add_psd(dev, ms->smp.cpus);
+        }
+
         aml_append(scope, dev);
     }
 }
@@ -858,7 +931,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
      * the RTC ACPI device at all when using UEFI.
      */
     scope = aml_scope("\\_SB");
-    acpi_dsdt_add_cpus(scope, vms);
+    acpi_dsdt_add_cpus(scope, vms, &memmap[VIRT_CPUFREQ]);
     acpi_dsdt_add_uart(scope, &memmap[VIRT_UART],
                        (irqmap[VIRT_UART] + ARM_SPI_BASE));
     if (vmc->acpi_expose_flash) {
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 529c0d38b6..0538d258fa 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -154,6 +154,7 @@ static const MemMapEntry base_memmap[] = {
     [VIRT_PVTIME] =             { 0x090a0000, 0x00010000 },
     [VIRT_SECURE_GPIO] =        { 0x090b0000, 0x00001000 },
     [VIRT_MMIO] =               { 0x0a000000, 0x00000200 },
+    [VIRT_CPUFREQ] =            { 0x0b000000, 0x00010000 },
     /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
     [VIRT_PLATFORM_BUS] =       { 0x0c000000, 0x02000000 },
     [VIRT_SECURE_MEM] =         { 0x0e000000, 0x01000000 },
@@ -931,6 +932,16 @@ static void create_uart(const VirtMachineState *vms, int uart,
     g_free(nodename);
 }
 
+static void create_cpufreq(const VirtMachineState *vms, MemoryRegion *mem)
+{
+    hwaddr base = vms->memmap[VIRT_CPUFREQ].base;
+    DeviceState *dev = qdev_new("cpufreq");
+    SysBusDevice *s = SYS_BUS_DEVICE(dev);
+
+    sysbus_realize_and_unref(s, &error_fatal);
+    memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0));
+}
+
 static void create_rtc(const VirtMachineState *vms)
 {
     char *nodename;
@@ -2190,6 +2201,8 @@ static void machvirt_init(MachineState *machine)
 
     create_uart(vms, VIRT_UART, sysmem, serial_hd(0));
 
+    create_cpufreq(vms, sysmem);
+
     if (vms->secure) {
         create_secure_ram(vms, secure_sysmem, secure_tag_sysmem);
         create_uart(vms, VIRT_SECURE_UART, secure_sysmem, serial_hd(1));
diff --git a/hw/char/Kconfig b/hw/char/Kconfig
index 6b6cf2fc1d..335a60c2c1 100644
--- a/hw/char/Kconfig
+++ b/hw/char/Kconfig
@@ -71,3 +71,7 @@ config GOLDFISH_TTY
 
 config SHAKTI_UART
     bool
+
+config CPUFREQ
+    bool
+    default y
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index c97e8633ad..ab86583228 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -92,4 +92,42 @@ typedef struct AcpiFadtData {
 #define ACPI_FADT_ARM_PSCI_COMPLIANT  (1 << 0)
 #define ACPI_FADT_ARM_PSCI_USE_HVC    (1 << 1)
 
+/*
+ * CPPC register definition from kernel header
+ * include/acpi/cppc_acpi.h
+ * The last element is newly added for easy use
+ */
+enum cppc_regs {
+    HIGHEST_PERF,
+    NOMINAL_PERF,
+    LOW_NON_LINEAR_PERF,
+    LOWEST_PERF,
+    GUARANTEED_PERF,
+    DESIRED_PERF,
+    MIN_PERF,
+    MAX_PERF,
+    PERF_REDUC_TOLERANCE,
+    TIME_WINDOW,
+    CTR_WRAP_TIME,
+    REFERENCE_CTR,
+    DELIVERED_CTR,
+    PERF_LIMITED,
+    ENABLE,
+    AUTO_SEL_ENABLE,
+    AUTO_ACT_WINDOW,
+    ENERGY_PERF,
+    REFERENCE_PERF,
+    LOWEST_FREQ,
+    NOMINAL_FREQ,
+    CPPC_REG_COUNT,
+};
+
+#define CPPC_REG_PER_CPU_STRIDE     0x40
+
+/*
+ * Offset for each CPPC register; -1 for unavailable
+ * The whole register space is unavailable if desired perf offset is -1.
+ */
+extern int cppc_regs_offset[CPPC_REG_COUNT];
+
 #endif
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 8e8ad8029e..2e00d2e208 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -429,6 +429,9 @@ Aml *aml_dma(AmlDmaType typ, AmlDmaBusMaster bm, AmlTransferSize sz,
              uint8_t channel);
 Aml *aml_sleep(uint64_t msec);
 Aml *aml_i2c_serial_bus_device(uint16_t address, const char *resource_source);
+Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width,
+                          uint8_t reg_offset, AmlAccessType type,
+                          uint64_t addr);
 
 /* Block AML object primitives */
 Aml *aml_scope(const char *name_format, ...) GCC_FMT_ATTR(1, 2);
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index dc6b66ffc8..a4356cf736 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -70,6 +70,7 @@ enum {
     VIRT_GIC_REDIST,
     VIRT_SMMU,
     VIRT_UART,
+    VIRT_CPUFREQ,
     VIRT_MMIO,
     VIRT_RTC,
     VIRT_FW_CFG,
diff --git a/tests/data/acpi/virt/DSDT b/tests/data/acpi/virt/DSDT
index c47503990715d389914fdf9c8bccb510761741ac..dd8573f0312918ea1ba17496e9f3a275e98ab214 100644
GIT binary patch
delta 514
zcmX@3u~di4CD<iIRg8gw>C{9nX`Y4z#+dkEr}*e5XZeW+lJy+%&H>Is4l?3g;#|yv
zB3yz^JPZs949pA+4BSA>P|1KK$bwCf1Dhbw5KQd?n1b~T%pw?~AbX(zVLeO)NK;MF
zfuWv70>gR-1{MVjL12(*U<gv~rf8TE1pEZFUI)W^pr8R<kRv`I*hPqkQHYb3i<N<a
L>o4O*Pi7GSAkZ%L

delta 38
tcmZ3gb4G*9CD<jzM}&caY5qhmY3_C>#+dkEr}*e5sfmV?o0XWDMF7oq3Hks4

diff --git a/tests/data/acpi/virt/DSDT.memhp b/tests/data/acpi/virt/DSDT.memhp
index bae36cdd397473afe3923c52f030641a5ab19d5d..d764481440adea59d928a1a48afe0ba1ce135597 100644
GIT binary patch
delta 514
zcmbPh{LPHZCD<jTOqzj#>Geb|X`aTdj4|=SPVv!A&hirtB<nfiodcYO9Aw0~#JQLS
zMYsf;co-NI7?>Ft7`TC$p^^bfkOi9{2R1>VA(+|)Fa_%wm_;x|LH0rc!g`npkfxfT
z14BKF1cvnv3@i#5g1{irz!0R|P0=tT2>1zTy$*);KtThzAV+*au!|56qYx)67b^n;
L*I&kso*|L|A3`us

delta 38
ucmexnHrJTTCD<iot|S8k(}9Ux(%c=>8Drvuo#LaLq$V0lZdPInkpuwgSPN7D

diff --git a/tests/data/acpi/virt/DSDT.numamem b/tests/data/acpi/virt/DSDT.numamem
index c47503990715d389914fdf9c8bccb510761741ac..dd8573f0312918ea1ba17496e9f3a275e98ab214 100644
GIT binary patch
delta 514
zcmX@3u~di4CD<iIRg8gw>C{9nX`Y4z#+dkEr}*e5XZeW+lJy+%&H>Is4l?3g;#|yv
zB3yz^JPZs949pA+4BSA>P|1KK$bwCf1Dhbw5KQd?n1b~T%pw?~AbX(zVLeO)NK;MF
zfuWv70>gR-1{MVjL12(*U<gv~rf8TE1pEZFUI)W^pr8R<kRv`I*hPqkQHYb3i<N<a
L>o4O*Pi7GSAkZ%L

delta 38
tcmZ3gb4G*9CD<jzM}&caY5qhmY3_C>#+dkEr}*e5sfmV?o0XWDMF7oq3Hks4

diff --git a/tests/data/acpi/virt/DSDT.pxb b/tests/data/acpi/virt/DSDT.pxb
index fbd78f44c4785d19759daea909fe6d6f9a6e6b01..9ff22b5ea465d2f678beb2ce9d905861d69a5b87 100644
GIT binary patch
delta 514
zcmexweZ!v1CD<k8hCBlU)2oSG(mc(-8Drvuo#LaLoaHAPNY-=2I|n!mImn1}iE}Xv
zif{=w@h~tbFfcPPFmMAgLnQ-}APY7@4s3!zLol@qU<%eVFpFS_g6xF?g!M2HAWbzv
z2ZnkU2@LBU7+4fA1c5=Kfgwn_o1$Sx5bzVwdL0bwfr18bL5}!<U>6}CMj=jCE>;Ex
MuD^^MJ-5jM0391Kw*UYD

delta 38
ucmca%|KFO+CD<k8zbpd-)BcHE(%fBl8Drvuo#LaLq$V0lZdPL2CJO)tYz({r

-- 
Gitee


From 04367b942d923c29da86fb5142ddfb767394bb8a Mon Sep 17 00:00:00 2001
From: yang hang <yanghang44@huawei.com>
Date: Tue, 8 Feb 2022 11:56:08 +0800
Subject: [PATCH 52/55] timer: sync the host realtime to the RTC

Synchronizing the host realtime to the RTC to eliminate the time value offset

Signed-off-by: yang hang <yanghang44@huawei.com>
Signed-off-by: Bo Wan <wanbo13@huawei.com>
---
 include/qemu-log.h            |  47 +++++++
 include/qemu/qemu-timer.inc.h |  31 +++++
 softmmu/cpus.c                |   4 +
 softmmu/vl.c                  |   3 +
 util/meson.build              |   1 +
 util/qemu-log.c               | 225 ++++++++++++++++++++++++++++++++++
 util/qemu-timer.c             |  26 ++++
 7 files changed, 337 insertions(+)
 create mode 100644 include/qemu-log.h
 create mode 100644 include/qemu/qemu-timer.inc.h
 create mode 100644 util/qemu-log.c

diff --git a/include/qemu-log.h b/include/qemu-log.h
new file mode 100644
index 0000000000..1de8c62c24
--- /dev/null
+++ b/include/qemu-log.h
@@ -0,0 +1,47 @@
+/*
+ * Introduce QEMU_LOG
+ *
+ * LOG: Introduce QEMU_LOG.
+ *
+ * Copyright (c) 2017-2020 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef UVP_QEMU_COMMON_H
+#define UVP_QEMU_COMMON_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <time.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/time.h>
+#include <syslog.h>
+#include "qemu/typedefs.h"
+#include "qemu/osdep.h"
+
+#define TIMESTAMP_MAX_LEN  33 /* RFC 3339 timestamp length shuold be 33 */
+
+void qemu_get_timestamp(char *buf, int buf_size);
+void qemu_convert_timestamp(struct timeval tp, char *buf, int buf_size);
+void qemu_log_print(int level, const char *funcname, int linenr,
+                    const char *fmt, ...);
+
+#define QEMU_LOG(level, format, ...) \
+         qemu_log_print(level, __func__, __LINE__, format, ##__VA_ARGS__)
+
+#endif
diff --git a/include/qemu/qemu-timer.inc.h b/include/qemu/qemu-timer.inc.h
new file mode 100644
index 0000000000..a8e9da18c9
--- /dev/null
+++ b/include/qemu/qemu-timer.inc.h
@@ -0,0 +1,31 @@
+/*
+ * Introduce qemu-timer function
+ *
+ * We introduce the functions here to decouple for the upstream
+ * include/qemu/timer.h
+ *
+ * Copyright (c) 2017-2020 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HUAWEI_INCLUDE_QEMU_QEMU_TIMER_INC_H
+#define HUAWEI_INCLUDE_QEMU_QEMU_TIMER_INC_H
+
+#include "qemu/timer.h"
+
+void qemu_clock_trigger_reset(QEMUClockType type);
+void qemu_clock_disable_reset(void);
+
+#endif /* end of include guard: HUAWEI_INCLUDE_QEMU_QEMU_TIMER_INC_H */
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index 071085f840..902a13eacb 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -45,6 +45,8 @@
 #include "hw/boards.h"
 #include "hw/hw.h"
 #include "trace.h"
+#include "qemu/timer.h"
+#include "qemu/qemu-timer.inc.h"
 
 #ifdef CONFIG_LINUX
 
@@ -65,6 +67,7 @@
 #endif /* CONFIG_LINUX */
 
 static QemuMutex qemu_global_mutex;
+extern QEMUClockType rtc_clock;
 
 bool cpu_is_stopped(CPUState *cpu)
 {
@@ -683,6 +686,7 @@ int vm_prepare_start(void)
         return -1;
     }
 
+    qemu_clock_trigger_reset(rtc_clock);
     /* We are sending this now, but the CPUs will be resumed shortly later */
     qapi_event_send_resume();
 
diff --git a/softmmu/vl.c b/softmmu/vl.c
index d9e4c619d3..bec3eedc90 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -126,6 +126,7 @@
 #include "qapi/qmp/qerror.h"
 #include "sysemu/iothread.h"
 #include "qemu/guest-random.h"
+#include "qemu/qemu-timer.inc.h"
 
 #include "config-host.h"
 
@@ -3504,6 +3505,8 @@ void qemu_init(int argc, char **argv, char **envp)
                     runstate_set(RUN_STATE_INMIGRATE);
                 }
                 incoming = optarg;
+
+                qemu_clock_disable_reset();
                 break;
             case QEMU_OPTION_only_migratable:
                 only_migratable = 1;
diff --git a/util/meson.build b/util/meson.build
index 05b593055a..2674383806 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -80,6 +80,7 @@ if have_block
   util_ss.add(files('qemu-coroutine-sleep.c'))
   util_ss.add(files('qemu-co-shared-resource.c'))
   util_ss.add(files('thread-pool.c', 'qemu-timer.c'))
+  util_ss.add(files('qemu-log.c'))
   util_ss.add(files('readline.c'))
   util_ss.add(files('throttle.c'))
   util_ss.add(files('timed-average.c'))
diff --git a/util/qemu-log.c b/util/qemu-log.c
new file mode 100644
index 0000000000..31379bfa78
--- /dev/null
+++ b/util/qemu-log.c
@@ -0,0 +1,225 @@
+/*
+ * Introduce QEMU_LOG
+ *
+ * LOG: Introduce QEMU_LOG.
+ *
+ * Copyright (c) 2017-2020 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu-log.h"
+#include <unistd.h>
+#include "qemu/osdep.h"
+
+#define BEGIN_YEAR 1900
+#define DEFAULT_SECFRACPRECISION 6
+#define LOG_LEVEL_NAME_MAX_LEN 10
+
+static const int tenPowers[6] = { 1, 10, 100, 1000, 10000, 100000 };
+
+typedef char intTiny;
+
+struct syslogTime {
+        intTiny timeType;       /* 0 - unitinialized
+                                 * 1 - RFC 3164
+                                 * 2 - syslog-protocol
+                                 */
+        intTiny month;
+        intTiny day;
+        intTiny hour;           /* 24 hour clock */
+        intTiny minute;
+        intTiny second;
+        intTiny secfracPrecision;
+        intTiny OffsetMinute;   /* UTC offset in minutes */
+        intTiny OffsetHour;     /* UTC offset in hours
+                                 * full UTC offset minutes
+                                 *           = OffsetHours*60 + OffsetMinute.
+                                 * Then use OffsetMode to know the direction.
+                                 */
+        char OffsetMode;        /* UTC offset + or - */
+        short year;
+        int secfrac;    /* fractional seconds (must be 32 bit!) */
+        intTiny inUTC;  /* forced UTC? */
+};
+typedef struct syslogTime syslogTime_t;
+
+typedef struct syslogName_s {
+        const char *c_name;
+        int c_val;
+} syslogName_t;
+
+static syslogName_t syslogPriNames[] = {
+        {"emerg",   LOG_EMERG},
+        {"alert",   LOG_ALERT},
+        {"crit",    LOG_CRIT},
+        {"err",     LOG_ERR},
+        {"warning", LOG_WARNING},
+        {"notice",  LOG_NOTICE},
+        {"info",    LOG_INFO},
+        {"debug",   LOG_DEBUG},
+        {NULL,      -1}
+};
+
+/**
+ * Format a syslogTimestamp to a RFC3339 timestamp string (as
+ * specified in syslog-protocol).
+ *
+ * Notes: rfc_time_buf size >= TIMESTAMP_MAX_LEN
+ */
+static void formatTimestamp3339(struct syslogTime *ts, char *rfc_time_buf)
+{
+    int iBuf = 0;
+    int power = 0;
+    int secfrac = 0;
+    short digit = 0;
+    char *pBuf = rfc_time_buf;
+
+    pBuf[iBuf++] = (ts->year / 1000) % 10 + '0';
+    pBuf[iBuf++] = (ts->year / 100) % 10 + '0';
+    pBuf[iBuf++] = (ts->year / 10) % 10 + '0';
+    pBuf[iBuf++] = ts->year % 10 + '0';
+    pBuf[iBuf++] = '-';
+    /* month */
+    pBuf[iBuf++] = (ts->month / 10) % 10 + '0';
+    pBuf[iBuf++] = ts->month % 10 + '0';
+    pBuf[iBuf++] = '-';
+    /* day */
+    pBuf[iBuf++] = (ts->day / 10) % 10 + '0';
+    pBuf[iBuf++] = ts->day % 10 + '0';
+    pBuf[iBuf++] = 'T';
+    /* hour */
+    pBuf[iBuf++] = (ts->hour / 10) % 10 + '0';
+    pBuf[iBuf++] = ts->hour % 10 + '0';
+    pBuf[iBuf++] = ':';
+    /* minute */
+    pBuf[iBuf++] = (ts->minute / 10) % 10 + '0';
+    pBuf[iBuf++] = ts->minute % 10 + '0';
+    pBuf[iBuf++] = ':';
+    /* second */
+    pBuf[iBuf++] = (ts->second / 10) % 10 + '0';
+    pBuf[iBuf++] = ts->second % 10 + '0';
+
+    if (ts->secfracPrecision > 0) {
+        pBuf[iBuf++] = '.';
+        power = tenPowers[(ts->secfracPrecision - 1) % 6];
+        secfrac = ts->secfrac;
+        while (power > 0) {
+            digit = secfrac / power;
+            secfrac -= digit * power;
+            power /= 10;
+            pBuf[iBuf++] = digit + '0';
+        }
+    }
+
+    pBuf[iBuf++] = ts->OffsetMode;
+    pBuf[iBuf++] = (ts->OffsetHour / 10) % 10 + '0';
+    pBuf[iBuf++] = ts->OffsetHour % 10 + '0';
+    pBuf[iBuf++] = ':';
+    pBuf[iBuf++] = (ts->OffsetMinute / 10) % 10 + '0';
+    pBuf[iBuf++] = ts->OffsetMinute % 10 + '0';
+
+    pBuf[iBuf] = '\0';
+}
+
+void qemu_convert_timestamp(struct timeval tp, char *buf, int buf_size)
+{
+    struct tm *tm;
+    struct tm tmBuf;
+    long lBias = 0;
+    time_t secs;
+    syslogTime_t ts;
+    char rfc_time_buf[TIMESTAMP_MAX_LEN] = {0};
+
+   /* RFC 3339 timestamp length must be greater than or equal 33 */
+    if (buf_size < TIMESTAMP_MAX_LEN) {
+        buf[0] = '\0';
+        (void)printf("RFC 3339 timestamp length must be greater than or equal 33\n");
+        return;
+    }
+
+    secs = tp.tv_sec;
+    tm = localtime_r(&secs, &tmBuf);
+
+    ts.year = tm->tm_year + BEGIN_YEAR;
+    ts.month = tm->tm_mon + 1;
+    ts.day = tm->tm_mday;
+    ts.hour = tm->tm_hour;
+    ts.minute = tm->tm_min;
+    ts.second = tm->tm_sec;
+    ts.secfrac = tp.tv_usec;
+    ts.secfracPrecision = DEFAULT_SECFRACPRECISION;
+
+    lBias = tm->tm_gmtoff;
+    if (lBias < 0) {
+        ts.OffsetMode = '-';
+        lBias *= -1;
+    } else {
+        ts.OffsetMode = '+';
+    }
+
+    ts.OffsetHour = lBias / 3600;
+    ts.OffsetMinute = (lBias % 3600) / 60;
+
+    formatTimestamp3339(&ts, rfc_time_buf);
+    (void)snprintf(buf, buf_size, "%s", rfc_time_buf);
+}
+
+void qemu_get_timestamp(char *buf, int buf_size)
+{
+    struct timeval tp;
+    (void)gettimeofday(&tp, NULL);
+
+    qemu_convert_timestamp(tp, buf, buf_size);
+}
+
+
+static void qemu_get_loglevelname(int level, char *log_level_name, int len)
+{
+    syslogName_t *c;
+
+    for (c = syslogPriNames; c->c_name; c++) {
+        if (level == c->c_val) {
+            (void)snprintf(log_level_name, len, "%s", c->c_name);
+            return;
+        }
+    }
+
+    (void)printf("The log level is wrong\n");
+}
+
+void qemu_log_print(int level, const char *funcname, int linenr,
+                    const char *fmt, ...)
+{
+    va_list ap;
+    char time_buf[TIMESTAMP_MAX_LEN] = {0};
+    char log_level_name[LOG_LEVEL_NAME_MAX_LEN] = {0};
+    char buf[1024] = {0};
+
+    qemu_get_timestamp(time_buf, TIMESTAMP_MAX_LEN);
+    qemu_get_loglevelname(level, log_level_name, sizeof(log_level_name));
+
+    va_start(ap, fmt);
+    (void)vsnprintf(buf, 1024, fmt, ap);
+    va_end(ap);
+
+    if (funcname != NULL) {
+        (void)fprintf(stderr, "%s|%s|qemu[%d]|[%d]|%s[%d]|: %s", time_buf,
+                      log_level_name, getpid(), qemu_get_thread_id(),
+                      funcname, linenr, buf);
+    } else {
+        (void)fprintf(stderr, "%s|%s|qemu[%d]|[%d]|%s", time_buf, log_level_name,
+                      getpid(), qemu_get_thread_id(), buf);
+    }
+}
diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index f36c75e594..ed888d91e0 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -29,6 +29,8 @@
 #include "sysemu/cpu-timers.h"
 #include "sysemu/replay.h"
 #include "sysemu/cpus.h"
+#include "qemu-log.h"
+#include "qemu/qemu-timer.inc.h"
 
 #ifdef CONFIG_POSIX
 #include <pthread.h>
@@ -44,11 +46,15 @@
 
 /***********************************************************/
 /* timers */
+bool g_vm_cold_start_flag = true;
 
 typedef struct QEMUClock {
     /* We rely on BQL to protect the timerlists */
     QLIST_HEAD(, QEMUTimerList) timerlists;
 
+    NotifierList reset_notifiers;
+    int64_t last;
+
     QEMUClockType type;
     bool enabled;
 } QEMUClock;
@@ -672,3 +678,23 @@ bool qemu_clock_run_all_timers(void)
 
     return progress;
 }
+
+void qemu_clock_disable_reset(void)
+{
+    g_vm_cold_start_flag = false;
+}
+
+void qemu_clock_trigger_reset(QEMUClockType type)
+{
+    if (!g_vm_cold_start_flag) {
+        return;
+    }
+    if (type == QEMU_CLOCK_VIRTUAL) {
+        int64_t now;
+        QEMUClock *clock = qemu_clock_ptr(type);
+        now = REPLAY_CLOCK(REPLAY_CLOCK_HOST, get_clock_realtime());
+        notifier_list_notify(&clock->reset_notifiers, &now);
+        QEMU_LOG(LOG_INFO, "Guest synchronise time success.\n");
+    }
+    g_vm_cold_start_flag = false;
+}
-- 
Gitee


From a4d81c68e81c5b9be81b57efc5f09997b7f8df35 Mon Sep 17 00:00:00 2001
From: yang hang <yanghang44@huawei.com>
Date: Tue, 8 Feb 2022 14:35:20 +0800
Subject: [PATCH 53/55] timer: rtc irqs reinject in kvm

The rtc is emulated in qemu but coalesced irqs are reinjected by kvm

Signed-off-by: yang hang <yanghang44@huawei.com>
Signed-off-by: Bo Wan <wanbo13@huawei.com>
---
 accel/kvm/kvm-all.c           |  8 +++++
 hw/rtc/mc146818rtc.c          | 26 +++++++++++++-
 hw/timer/meson.build          |  1 +
 hw/timer/rtc.c                | 66 +++++++++++++++++++++++++++++++++++
 include/hw/timer/rtc.h        | 52 +++++++++++++++++++++++++++
 include/qemu/qemu-timer.inc.h |  6 ++--
 6 files changed, 155 insertions(+), 4 deletions(-)
 create mode 100644 hw/timer/rtc.c
 create mode 100644 include/hw/timer/rtc.h

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index eecd8031cf..52304ab9bb 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -53,6 +53,8 @@
 #include <sys/eventfd.h>
 #endif
 
+#include "hw/timer/rtc.h"
+
 /* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We
  * need to use the real host PAGE_SIZE, as that's what KVM will use.
  */
@@ -170,6 +172,7 @@ bool kvm_ioeventfd_any_length_allowed;
 bool kvm_msi_use_devid;
 static bool kvm_immediate_exit;
 static hwaddr kvm_max_slot_size = ~0;
+bool kvm_rtc_reinject_enable;
 
 static const KVMCapabilityInfo kvm_required_capabilites[] = {
     KVM_CAP_INFO(USER_MEMORY),
@@ -2581,6 +2584,11 @@ static int kvm_init(MachineState *ms)
         kvm_irqchip_create(s);
     }
 
+    kvm_rtc_reinject_enable = (kvm_check_extension(kvm_state, KVM_CAP_RTC_IRQ_COALESCED) > 0);
+    if (!kvm_rtc_reinject_enable) {
+        QEMU_LOG(LOG_INFO, "kvm rtc irq reinjection not supported.\n");
+    }
+
     if (kvm_eventfds_allowed) {
         s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
         s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index 4fbafddb22..b355588d6c 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -42,7 +42,7 @@
 #include "qapi/error.h"
 #include "qapi/qapi-events-misc-target.h"
 #include "qapi/visitor.h"
-#include "hw/rtc/mc146818rtc_regs.h"
+#include "hw/timer/rtc.h"
 
 #ifdef TARGET_I386
 #include "qapi/qapi-commands-misc-target.h"
@@ -217,9 +217,17 @@ periodic_timer_update(RTCState *s, int64_t current_time, uint32_t old_period, bo
      */
     if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
         uint32_t old_irq_coalesced = s->irq_coalesced;
+        if (kvm_rtc_reinject_enable) {
+            old_irq_coalesced += rtc_get_coalesced_irq();
+        }
 
         lost_clock += old_irq_coalesced * old_period;
         s->irq_coalesced = lost_clock / s->period;
+        if (kvm_rtc_reinject_enable) {
+            rtc_set_coalesced_irq(s->irq_coalesced);
+            s->irq_coalesced = 0;
+            old_irq_coalesced = 0;
+        }
         lost_clock %= s->period;
         if (old_irq_coalesced != s->irq_coalesced ||
             old_period != s->period) {
@@ -784,6 +792,11 @@ static int rtc_pre_save(void *opaque)
 
     rtc_update_time(s);
 
+    if (kvm_rtc_reinject_enable &&
+        s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+        s->irq_coalesced += rtc_get_coalesced_irq();
+    }
+
     return 0;
 }
 
@@ -815,6 +828,13 @@ static int rtc_post_load(void *opaque, int version_id)
             rtc_coalesced_timer_update(s);
         }
     }
+
+    if (kvm_rtc_reinject_enable && s->irq_coalesced != 0 &&
+        s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+        rtc_set_coalesced_irq(s->irq_coalesced);
+        s->irq_coalesced = 0;
+    }
+
     return 0;
 }
 
@@ -949,6 +969,10 @@ static void rtc_realizefn(DeviceState *dev, Error **errp)
     object_property_add_tm(OBJECT(s), "date", rtc_get_date);
 
     qdev_init_gpio_out(dev, &s->irq, 1);
+    if (kvm_rtc_reinject_enable &&
+        s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+        rtc_lost_tick_policy_slew();
+    }
     QLIST_INSERT_HEAD(&rtc_devices, s, link);
 }
 
diff --git a/hw/timer/meson.build b/hw/timer/meson.build
index 03092e2ceb..785fb6825b 100644
--- a/hw/timer/meson.build
+++ b/hw/timer/meson.build
@@ -36,5 +36,6 @@ softmmu_ss.add(when: 'CONFIG_STM32F2XX_TIMER', if_true: files('stm32f2xx_timer.c
 softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_timer.c'))
 specific_ss.add(when: 'CONFIG_IBEX', if_true: files('ibex_timer.c'))
 softmmu_ss.add(when: 'CONFIG_SIFIVE_PWM', if_true: files('sifive_pwm.c'))
+softmmu_ss.add(files('rtc.c'))
 
 specific_ss.add(when: 'CONFIG_AVR_TIMER16', if_true: files('avr_timer16.c'))
diff --git a/hw/timer/rtc.c b/hw/timer/rtc.c
new file mode 100644
index 0000000000..52ef44f1f8
--- /dev/null
+++ b/hw/timer/rtc.c
@@ -0,0 +1,66 @@
+ /*
+ * QEMU rtc
+ *
+ * We introduce the functions here to decouple for the upstream
+ * hw/timer/mc146818rtc.c
+ *
+ * Copyright (c) 2017-2020 HUAWEI TECHNOLOGIES CO.,LTD.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include "qemu/osdep.h"
+#include "hw/timer/rtc.h"
+#include "sysemu/kvm.h"
+
+extern int kvm_vm_ioctl(KVMState *s, int type, ...);
+
+uint32_t rtc_get_coalesced_irq(void)
+{
+    struct kvm_rtc_reinject_control control = {};
+    int ret;
+
+    control.flag = KVM_GET_RTC_IRQ_COALESCED;
+    ret = kvm_vm_ioctl(kvm_state, KVM_RTC_REINJECT_CONTROL, &control);
+    if (ret < 0) {
+        QEMU_LOG(LOG_ERR, "Failed to get coalesced irqs from kmod: %d\n", ret);
+    }
+    return control.rtc_irq_coalesced;
+}
+
+void rtc_set_coalesced_irq(uint32_t nr_irqs)
+{
+    struct kvm_rtc_reinject_control control = {};
+    int ret;
+
+    control.rtc_irq_coalesced = nr_irqs;
+    control.flag = KVM_SET_RTC_IRQ_COALESCED;
+    ret = kvm_vm_ioctl(kvm_state, KVM_RTC_REINJECT_CONTROL, &control);
+    if (ret < 0) {
+        QEMU_LOG(LOG_ERR, "Failed to set coalesced irqs to kmod: %d, %u\n", ret, nr_irqs);
+    }
+}
+
+void rtc_lost_tick_policy_slew(void)
+{
+    struct kvm_rtc_reinject_control control = {};
+    int ret;
+
+    control.flag = KVM_RTC_LOST_TICK_POLICY_SLEW;
+    ret = kvm_vm_ioctl(kvm_state, KVM_RTC_REINJECT_CONTROL, &control);
+    if (ret < 0) {
+        QEMU_LOG(LOG_ERR, "Failed to notify kvm to use lost tick policy slew: %d\n", ret);
+    }
+}
diff --git a/include/hw/timer/rtc.h b/include/hw/timer/rtc.h
new file mode 100644
index 0000000000..04b942de86
--- /dev/null
+++ b/include/hw/timer/rtc.h
@@ -0,0 +1,52 @@
+/*
+* QEMU rtc
+*
+* We introduce the functions here to decouple for the upstream
+* include/hw/timer/mc146818rtc.h
+*
+* Copyright (c) 2017-2020 HUAWEI TECHNOLOGIES CO.,LTD.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+
+* You should have received a copy of the GNU General Public License along
+* with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef INCLUDE_HW_TIMER_RTC_H
+#define INCLUDE_HW_TIMER_RTC_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <linux/kvm.h>
+#include "qemu-log.h"
+
+/* flags to control coalesced irq */
+#define KVM_GET_RTC_IRQ_COALESCED        (1 << 0)
+#define KVM_SET_RTC_IRQ_COALESCED        (1 << 1)
+#define KVM_RTC_LOST_TICK_POLICY_SLEW    (1 << 2)
+
+/* RTC is emulated in qemu, but the colasced irqs are reinjected in kvm */
+#define KVM_CAP_RTC_IRQ_COALESCED 163
+#define KVM_RTC_REINJECT_CONTROL _IOWR(KVMIO, 0x56, struct kvm_rtc_reinject_control)
+
+struct kvm_rtc_reinject_control {
+    __u32 rtc_irq_coalesced;
+    __u8 flag;
+    __u8 reserved[31];
+};
+
+extern bool kvm_rtc_reinject_enable;
+
+uint32_t rtc_get_coalesced_irq(void);
+void rtc_set_coalesced_irq(uint32_t nr_irqs);
+void rtc_lost_tick_policy_slew(void);
+
+#endif
diff --git a/include/qemu/qemu-timer.inc.h b/include/qemu/qemu-timer.inc.h
index a8e9da18c9..bbe7b67f7a 100644
--- a/include/qemu/qemu-timer.inc.h
+++ b/include/qemu/qemu-timer.inc.h
@@ -20,12 +20,12 @@
  * with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef HUAWEI_INCLUDE_QEMU_QEMU_TIMER_INC_H
-#define HUAWEI_INCLUDE_QEMU_QEMU_TIMER_INC_H
+#ifndef INCLUDE_QEMU_QEMU_TIMER_INC_H
+#define INCLUDE_QEMU_QEMU_TIMER_INC_H
 
 #include "qemu/timer.h"
 
 void qemu_clock_trigger_reset(QEMUClockType type);
 void qemu_clock_disable_reset(void);
 
-#endif /* end of include guard: HUAWEI_INCLUDE_QEMU_QEMU_TIMER_INC_H */
+#endif /* end of include guard: INCLUDE_QEMU_QEMU_TIMER_INC_H */
-- 
Gitee


From f66688a99bed38d9ab7c039d79b27d15c943811d Mon Sep 17 00:00:00 2001
From: yang hang <yanghang44@huawei.com>
Date: Wed, 9 Feb 2022 14:26:10 +0800
Subject: [PATCH 54/55] timer: set rtc cachup speed for per vm

Supports setting the RTC clock compensation rate in kernel mode with
virtual machine as granularity, instead of using the same speed for
all virtual machines. A new configuration item catchup speed is
added to libvirt to set the compensation rate of virtual machine

Signed-off-by: yang hang <yanghang44@huawei.com>
Signed-off-by: Bo Wan <wanbo13@huawei.com>
---
 hw/rtc/mc146818rtc.c   | 15 +++++++++++++++
 hw/timer/rtc.c         | 29 +++++++++++++++++++++++++++++
 include/hw/timer/rtc.h |  6 +++++-
 qapi/misc-target.json  | 12 ++++++++++++
 qapi/pragma.json       |  3 ++-
 softmmu/vl.c           |  3 +++
 6 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index b355588d6c..f1ae63300d 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -110,6 +110,21 @@ static void rtc_coalesced_timer_update(RTCState *s)
 static QLIST_HEAD(, RTCState) rtc_devices =
     QLIST_HEAD_INITIALIZER(rtc_devices);
 
+void qmp_set_rtc_catchup_speed(const uint32_t speed, Error **errp)
+{
+    RTCState *s;
+
+    if (!kvm_rtc_reinject_enable) {
+        return;
+    }
+
+    QLIST_FOREACH(s, &rtc_devices, link) {
+        if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW) {
+            set_rtc_catchup_speed(speed);
+        }
+    }
+}
+
 #ifdef TARGET_I386
 void qmp_rtc_reset_reinjection(Error **errp)
 {
diff --git a/hw/timer/rtc.c b/hw/timer/rtc.c
index 52ef44f1f8..3f50e8b892 100644
--- a/hw/timer/rtc.c
+++ b/hw/timer/rtc.c
@@ -24,6 +24,8 @@
 #include "qemu/osdep.h"
 #include "hw/timer/rtc.h"
 #include "sysemu/kvm.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
 
 extern int kvm_vm_ioctl(KVMState *s, int type, ...);
 
@@ -64,3 +66,30 @@ void rtc_lost_tick_policy_slew(void)
         QEMU_LOG(LOG_ERR, "Failed to notify kvm to use lost tick policy slew: %d\n", ret);
     }
 }
+
+uint32_t rtc_catchup_speed(void)
+{
+    uint32_t speed;
+    QemuOpts *opts = qemu_find_opts_singleton("rtc");
+
+    speed = qemu_opt_get_number(opts, "speed", 0);
+    QEMU_LOG(LOG_INFO, "rtc catchup speed: %u\n", speed);
+
+    return speed;
+}
+
+void set_rtc_catchup_speed(const uint32_t speed)
+{
+    struct kvm_rtc_reinject_control control = {};
+    int ret;
+
+    if (speed > 0) {
+        control.flag = KVM_SET_RTC_CATCHUP_SPEED;
+        control.speed = speed;
+        ret = kvm_vm_ioctl(kvm_state, KVM_RTC_REINJECT_CONTROL, &control);
+        if (ret < 0) {
+            QEMU_LOG(LOG_ERR, "Failed to set rtc_catchup_speed: %d\n", ret);
+        }
+        QEMU_LOG(LOG_INFO, "Success to set rtc_catchup_speed: %u\n", speed);
+    }
+}
diff --git a/include/hw/timer/rtc.h b/include/hw/timer/rtc.h
index 04b942de86..2d064d2994 100644
--- a/include/hw/timer/rtc.h
+++ b/include/hw/timer/rtc.h
@@ -32,6 +32,7 @@
 #define KVM_GET_RTC_IRQ_COALESCED        (1 << 0)
 #define KVM_SET_RTC_IRQ_COALESCED        (1 << 1)
 #define KVM_RTC_LOST_TICK_POLICY_SLEW    (1 << 2)
+#define KVM_SET_RTC_CATCHUP_SPEED        (1 << 3)
 
 /* RTC is emulated in qemu, but the colasced irqs are reinjected in kvm */
 #define KVM_CAP_RTC_IRQ_COALESCED 163
@@ -40,7 +41,8 @@
 struct kvm_rtc_reinject_control {
     __u32 rtc_irq_coalesced;
     __u8 flag;
-    __u8 reserved[31];
+    __u8 speed;
+    __u8 reserved[30];
 };
 
 extern bool kvm_rtc_reinject_enable;
@@ -48,5 +50,7 @@ extern bool kvm_rtc_reinject_enable;
 uint32_t rtc_get_coalesced_irq(void);
 void rtc_set_coalesced_irq(uint32_t nr_irqs);
 void rtc_lost_tick_policy_slew(void);
+uint32_t rtc_catchup_speed(void);
+void set_rtc_catchup_speed(const uint32_t speed);
 
 #endif
diff --git a/qapi/misc-target.json b/qapi/misc-target.json
index 5aa2b95b7d..3467a019bc 100644
--- a/qapi/misc-target.json
+++ b/qapi/misc-target.json
@@ -54,6 +54,18 @@
 { 'command': 'rtc-reset-reinjection',
   'if': 'TARGET_I386' }
 
+##
+# @set-rtc-catchup-speed:
+#
+# set rtc catchup speed
+#
+# @speed: rtc catchup speed
+#
+# Returns: Nothing on success
+#
+# Since: 2.8.1
+##
+{ 'command': 'set-rtc-catchup-speed', 'data': {'speed': 'uint32'}}
 
 ##
 # @SevState:
diff --git a/qapi/pragma.json b/qapi/pragma.json
index 3bc0335d1f..d546f1ab49 100644
--- a/qapi/pragma.json
+++ b/qapi/pragma.json
@@ -20,7 +20,8 @@
         'set_password',
         'system_powerdown',
         'system_reset',
-        'system_wakeup' ],
+        'system_wakeup',
+        'set-rtc-catchup-speed' ],
     'command-returns-exceptions': [
         'human-monitor-command',
         'qom-get',
diff --git a/softmmu/vl.c b/softmmu/vl.c
index bec3eedc90..381f9a61d8 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -228,6 +228,9 @@ static QemuOptsList qemu_rtc_opts = {
         },{
             .name = "driftfix",
             .type = QEMU_OPT_STRING,
+        },{
+            .name = "speed",
+            .type = QEMU_OPT_NUMBER,
         },
         { /* end of list */ }
     },
-- 
Gitee


From b24197cd51c94639ac82bb2934b7653cd6f9ec55 Mon Sep 17 00:00:00 2001
From: yang hang <yanghang44@huawei.com>
Date: Tue, 8 Feb 2022 19:39:57 +0800
Subject: [PATCH 55/55] timer: qmp_set_rtc_catchup_speed only used in x86

qmp_set_rtc_catchup_speed only used in x86

Signed-off-by: yang hang <yanghang44@huawei.com>
Signed-off-by: Bo Wan <wanbo13@huawei.com>
---
 hw/rtc/mc146818rtc.c  |  2 +-
 qapi/misc-target.json |  2 +-
 qemu.spec             | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 qemu.spec

diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c
index f1ae63300d..afe082f687 100644
--- a/hw/rtc/mc146818rtc.c
+++ b/hw/rtc/mc146818rtc.c
@@ -110,6 +110,7 @@ static void rtc_coalesced_timer_update(RTCState *s)
 static QLIST_HEAD(, RTCState) rtc_devices =
     QLIST_HEAD_INITIALIZER(rtc_devices);
 
+#ifdef TARGET_I386
 void qmp_set_rtc_catchup_speed(const uint32_t speed, Error **errp)
 {
     RTCState *s;
@@ -125,7 +126,6 @@ void qmp_set_rtc_catchup_speed(const uint32_t speed, Error **errp)
     }
 }
 
-#ifdef TARGET_I386
 void qmp_rtc_reset_reinjection(Error **errp)
 {
     RTCState *s;
diff --git a/qapi/misc-target.json b/qapi/misc-target.json
index 3467a019bc..ac7f7bf43a 100644
--- a/qapi/misc-target.json
+++ b/qapi/misc-target.json
@@ -65,7 +65,7 @@
 #
 # Since: 2.8.1
 ##
-{ 'command': 'set-rtc-catchup-speed', 'data': {'speed': 'uint32'}}
+{ 'command': 'set-rtc-catchup-speed', 'data': {'speed': 'uint32'}, 'if': 'TARGET_I386'}
 
 ##
 # @SevState:
diff --git a/qemu.spec b/qemu.spec
new file mode 100644
index 0000000000..ae9e0f3939
--- /dev/null
+++ b/qemu.spec
@@ -0,0 +1,35 @@
+Name:
+Version:
+Release:	1%{?dist}
+Summary:
+
+Group:
+License:
+URL:
+Source0:
+
+BuildRequires:
+Requires:
+
+%description
+
+
+%prep
+%setup -q
+
+
+%build
+%configure
+make %{?_smp_mflags}
+
+
+%install
+%make_install
+
+
+%files
+%doc
+
+
+
+%changelog
-- 
Gitee