From a7cca9b3931b22d9893ddf938b6ab4b74d4c7533 Mon Sep 17 00:00:00 2001
From: Chenyi Qiang <chenyi.qiang@intel.com>
Date: Mon, 7 Apr 2025 15:49:21 +0800
Subject: [PATCH 01/40] memory: Export a helper to get intersection of a
 MemoryRegionSection with a given range

Rename the helper to memory_region_section_intersect_range() to make it
more generic. Meanwhile, define the @end as Int128 and replace the
related operations with Int128_* format since the helper is exported as
a wider API.

Reference:https://gitlab.com/qemu-project/qemu/-/commit/f47a672a72acd6e2712031f0bc4d4f3ae4b6302c

Suggested-by: Alexey Kardashevskiy <aik@amd.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Reviewed-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/virtio/virtio-mem.c | 32 +++++---------------------------
 include/exec/memory.h  | 27 +++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 75ee38aa46..90bfc5e596 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -235,28 +235,6 @@ static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg,
     return ret;
 }
 
-/*
- * Adjust the memory section to cover the intersection with the given range.
- *
- * Returns false if the intersection is empty, otherwise returns true.
- */
-static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s,
-                                                uint64_t offset, uint64_t size)
-{
-    uint64_t start = MAX(s->offset_within_region, offset);
-    uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
-                       offset + size);
-
-    if (end <= start) {
-        return false;
-    }
-
-    s->offset_within_address_space += start - s->offset_within_region;
-    s->offset_within_region = start;
-    s->size = int128_make64(end - start);
-    return true;
-}
-
 typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
 
 static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
@@ -278,7 +256,7 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
                                       first_bit + 1) - 1;
         size = (last_bit - first_bit + 1) * vmem->block_size;
 
-        if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+        if (!memory_region_section_intersect_range(&tmp, offset, size)) {
             break;
         }
         ret = cb(&tmp, arg);
@@ -310,7 +288,7 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
                                  first_bit + 1) - 1;
         size = (last_bit - first_bit + 1) * vmem->block_size;
 
-        if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+        if (!memory_region_section_intersect_range(&tmp, offset, size)) {
             break;
         }
         ret = cb(&tmp, arg);
@@ -346,7 +324,7 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
         MemoryRegionSection tmp = *rdl->section;
 
-        if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+        if (!memory_region_section_intersect_range(&tmp, offset, size)) {
             continue;
         }
         rdl->notify_discard(rdl, &tmp);
@@ -362,7 +340,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
         MemoryRegionSection tmp = *rdl->section;
 
-        if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+        if (!memory_region_section_intersect_range(&tmp, offset, size)) {
             continue;
         }
         ret = rdl->notify_populate(rdl, &tmp);
@@ -379,7 +357,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
             if (rdl2 == rdl) {
                 break;
             }
-            if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+            if (!memory_region_section_intersect_range(&tmp, offset, size)) {
                 continue;
             }
             rdl2->notify_discard(rdl2, &tmp);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 0361ec2054..950362d53c 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1272,6 +1272,33 @@ MemoryRegionSection *memory_region_section_new_copy(MemoryRegionSection *s);
  */
 void memory_region_section_free_copy(MemoryRegionSection *s);
 
+/**
+ * memory_region_section_intersect_range: Adjust the memory section to cover
+ * the intersection with the given range.
+ *
+ * @s: the #MemoryRegionSection to be adjusted
+ * @offset: the offset of the given range in the memory region
+ * @size: the size of the given range
+ *
+ * Returns false if the intersection is empty, otherwise returns true.
+ */
+static inline bool memory_region_section_intersect_range(MemoryRegionSection *s,
+                                                         uint64_t offset, uint64_t size)
+{
+    uint64_t start = MAX(s->offset_within_region, offset);
+    Int128 end = int128_min(int128_add(int128_make64(s->offset_within_region), s->size),
+                            int128_add(int128_make64(offset), int128_make64(size)));
+
+    if (int128_le(end, int128_make64(start))) {
+        return false;
+    }
+
+    s->offset_within_address_space += start - s->offset_within_region;
+    s->offset_within_region = start;
+    s->size = int128_sub(end, int128_make64(start));
+    return true;
+}
+
 /**
  * memory_region_init: Initialize a memory region
  *
-- 
Gitee


From 9d4e30a832e8de249869c6cbc29b102e4e9b3db9 Mon Sep 17 00:00:00 2001
From: Chenyi Qiang <chenyi.qiang@intel.com>
Date: Mon, 7 Apr 2025 15:49:22 +0800
Subject: [PATCH 02/40] memory: Change memory_region_set_ram_discard_manager()
 to return the result

Reference:https://gitlab.com/qemu-project/qemu/-/commit/ff1211154c45c9f7f82116ae9a8c72a848e4a8b5

Modify memory_region_set_ram_discard_manager() to return false if a
RamDiscardManager is already set in the MemoryRegion. The caller must
handle this failure, such as having virtio-mem undo its actions and fail
the realize() process. Opportunistically move the call earlier to avoid
complex error handling.

This change is beneficial when introducing a new RamDiscardManager
instance besides virtio-mem. After
ram_block_coordinated_discard_require(true) unlocks all
RamDiscardManager instances, only one instance is allowed to be set for
a MemoryRegion at present.

Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Conflicts:
      hw/virtio/virtio-mem.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/virtio/virtio-mem.c | 28 ++++++++++++++++------------
 include/exec/memory.h  |  6 +++---
 system/memory.c        | 10 +++++++---
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 90bfc5e596..6f3ecddfc7 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -1049,6 +1049,17 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    /*
+     * Set ourselves as RamDiscardManager before the plug handler maps the
+     * memory region and exposes it via an address space.
+     */
+    if (memory_region_set_ram_discard_manager(&vmem->memdev->mr,
+                                              RAM_DISCARD_MANAGER(vmem))) {
+        error_setg(errp, "Failed to set RamDiscardManager");
+        ram_block_coordinated_discard_require(false);
+        return;
+    }
+
     /*
      * We don't know at this point whether shared RAM is migrated using
      * QEMU or migrated using the file content. "x-ignore-shared" will be
@@ -1103,13 +1114,6 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
                              &vmstate_virtio_mem_device_early, vmem);
     }
     qemu_register_reset(virtio_mem_system_reset, vmem);
-
-    /*
-     * Set ourselves as RamDiscardManager before the plug handler maps the
-     * memory region and exposes it via an address space.
-     */
-    memory_region_set_ram_discard_manager(&vmem->memdev->mr,
-                                          RAM_DISCARD_MANAGER(vmem));
 }
 
 static void virtio_mem_device_unrealize(DeviceState *dev)
@@ -1117,11 +1121,6 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
     VirtIOMEM *vmem = VIRTIO_MEM(dev);
 
-    /*
-     * The unplug handler unmapped the memory region, it cannot be
-     * found via an address space anymore. Unset ourselves.
-     */
-    memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
     qemu_unregister_reset(virtio_mem_system_reset, vmem);
     if (vmem->early_migration) {
         vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early,
@@ -1132,6 +1131,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
     virtio_del_queue(vdev, 0);
     virtio_cleanup(vdev);
     g_free(vmem->bitmap);
+    /*
+     * The unplug handler unmapped the memory region, it cannot be
+     * found via an address space anymore. Unset ourselves.
+     */
+    memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
     ram_block_coordinated_discard_require(false);
 }
 
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 950362d53c..a4e9e084cd 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -2554,13 +2554,13 @@ static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr)
  *
  * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion
  * that does not cover RAM, or a #MemoryRegion that already has a
- * #RamDiscardManager assigned.
+ * #RamDiscardManager assigned. Return 0 if the rdm is set successfully.
  *
  * @mr: the #MemoryRegion
  * @rdm: #RamDiscardManager to set
  */
-void memory_region_set_ram_discard_manager(MemoryRegion *mr,
-                                           RamDiscardManager *rdm);
+int memory_region_set_ram_discard_manager(MemoryRegion *mr,
+                                          RamDiscardManager *rdm);
 
 /**
  * memory_region_find: translate an address/size relative to a
diff --git a/system/memory.c b/system/memory.c
index 607ce9cf60..c3985e8eef 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2121,12 +2121,16 @@ RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
     return mr->rdm;
 }
 
-void memory_region_set_ram_discard_manager(MemoryRegion *mr,
-                                           RamDiscardManager *rdm)
+int memory_region_set_ram_discard_manager(MemoryRegion *mr,
+                                          RamDiscardManager *rdm)
 {
     g_assert(memory_region_is_ram(mr));
-    g_assert(!rdm || !mr->rdm);
+    if (mr->rdm && rdm) {
+        return -EBUSY;
+    }
+
     mr->rdm = rdm;
+    return 0;
 }
 
 uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
-- 
Gitee


From b18b91d25cd224fd4920b804a401c90a6f5ed2b8 Mon Sep 17 00:00:00 2001
From: Chenyi Qiang <chenyi.qiang@intel.com>
Date: Mon, 7 Apr 2025 15:49:23 +0800
Subject: [PATCH 03/40] memory: Unify the definiton of ReplayRamPopulate() and
 ReplayRamDiscard()

Reference:https://gitlab.com/qemu-project/qemu/-/commit/2205b8466733f8c6e3306c964f31c5a7cac69dfa

Update ReplayRamDiscard() function to return the result and unify the
ReplayRamPopulate() and ReplayRamDiscard() to ReplayStateChange() at
the same time due to their identical definitions. This unification
simplifies related structures, such as VirtIOMEMReplayData, which makes
it more cleaner and maintainable.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/virtio/virtio-mem.c | 20 ++++++++++----------
 include/exec/memory.h  | 31 ++++++++++++++++---------------
 migration/ram.c        |  5 +++--
 system/memory.c        | 12 ++++++------
 4 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index 6f3ecddfc7..f40a816b7f 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -1712,7 +1712,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
 }
 
 struct VirtIOMEMReplayData {
-    void *fn;
+    ReplayStateChange fn;
     void *opaque;
 };
 
@@ -1720,12 +1720,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
 {
     struct VirtIOMEMReplayData *data = arg;
 
-    return ((ReplayRamPopulate)data->fn)(s, data->opaque);
+    return data->fn(s, data->opaque);
 }
 
 static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
                                            MemoryRegionSection *s,
-                                           ReplayRamPopulate replay_fn,
+                                           ReplayStateChange replay_fn,
                                            void *opaque)
 {
     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
@@ -1744,14 +1744,14 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
 {
     struct VirtIOMEMReplayData *data = arg;
 
-    ((ReplayRamDiscard)data->fn)(s, data->opaque);
+    data->fn(s, data->opaque);
     return 0;
 }
 
-static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
-                                            MemoryRegionSection *s,
-                                            ReplayRamDiscard replay_fn,
-                                            void *opaque)
+static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
+                                           MemoryRegionSection *s,
+                                           ReplayStateChange replay_fn,
+                                           void *opaque)
 {
     const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
     struct VirtIOMEMReplayData data = {
@@ -1760,8 +1760,8 @@ static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
     };
 
     g_assert(s->mr == &vmem->memdev->mr);
-    virtio_mem_for_each_unplugged_section(vmem, s, &data,
-                                          virtio_mem_rdm_replay_discarded_cb);
+    return virtio_mem_for_each_unplugged_section(vmem, s, &data,
+                                                 virtio_mem_rdm_replay_discarded_cb);
 }
 
 static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
diff --git a/include/exec/memory.h b/include/exec/memory.h
index a4e9e084cd..a3243ee218 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -623,8 +623,7 @@ static inline void ram_discard_listener_init(RamDiscardListener *rdl,
     rdl->double_discard_supported = double_discard_supported;
 }
 
-typedef int (*ReplayRamPopulate)(MemoryRegionSection *section, void *opaque);
-typedef void (*ReplayRamDiscard)(MemoryRegionSection *section, void *opaque);
+typedef int (*ReplayStateChange)(MemoryRegionSection *section, void *opaque);
 
 /*
  * RamDiscardManagerClass:
@@ -698,36 +697,38 @@ struct RamDiscardManagerClass {
     /**
      * @replay_populated:
      *
-     * Call the #ReplayRamPopulate callback for all populated parts within the
+     * Call the #ReplayStateChange callback for all populated parts within the
      * #MemoryRegionSection via the #RamDiscardManager.
      *
      * In case any call fails, no further calls are made.
      *
      * @rdm: the #RamDiscardManager
      * @section: the #MemoryRegionSection
-     * @replay_fn: the #ReplayRamPopulate callback
+     * @replay_fn: the #ReplayStateChange callback
      * @opaque: pointer to forward to the callback
      *
      * Returns 0 on success, or a negative error if any notification failed.
      */
     int (*replay_populated)(const RamDiscardManager *rdm,
                             MemoryRegionSection *section,
-                            ReplayRamPopulate replay_fn, void *opaque);
+                            ReplayStateChange replay_fn, void *opaque);
 
     /**
      * @replay_discarded:
      *
-     * Call the #ReplayRamDiscard callback for all discarded parts within the
+     * Call the #ReplayStateChange callback for all discarded parts within the
      * #MemoryRegionSection via the #RamDiscardManager.
      *
      * @rdm: the #RamDiscardManager
      * @section: the #MemoryRegionSection
-     * @replay_fn: the #ReplayRamDiscard callback
+     * @replay_fn: the #ReplayStateChange callback
      * @opaque: pointer to forward to the callback
+     *
+     * Returns 0 on success, or a negative error if any notification failed.
      */
-    void (*replay_discarded)(const RamDiscardManager *rdm,
-                             MemoryRegionSection *section,
-                             ReplayRamDiscard replay_fn, void *opaque);
+    int (*replay_discarded)(const RamDiscardManager *rdm,
+                            MemoryRegionSection *section,
+                            ReplayStateChange replay_fn, void *opaque);
 
     /**
      * @register_listener:
@@ -770,13 +771,13 @@ bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
 
 int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
                                          MemoryRegionSection *section,
-                                         ReplayRamPopulate replay_fn,
+                                         ReplayStateChange replay_fn,
                                          void *opaque);
 
-void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
-                                          MemoryRegionSection *section,
-                                          ReplayRamDiscard replay_fn,
-                                          void *opaque);
+int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+                                         MemoryRegionSection *section,
+                                         ReplayStateChange replay_fn,
+                                         void *opaque);
 
 void ram_discard_manager_register_listener(RamDiscardManager *rdm,
                                            RamDiscardListener *rdl,
diff --git a/migration/ram.c b/migration/ram.c
index 91bec89a6e..083a8a8073 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -841,8 +841,8 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
     return ret;
 }
 
-static void dirty_bitmap_clear_section(MemoryRegionSection *section,
-                                       void *opaque)
+static int dirty_bitmap_clear_section(MemoryRegionSection *section,
+                                      void *opaque)
 {
     const hwaddr offset = section->offset_within_region;
     const hwaddr size = int128_get64(section->size);
@@ -861,6 +861,7 @@ static void dirty_bitmap_clear_section(MemoryRegionSection *section,
     }
     *cleared_bits += bitmap_count_one_with_offset(rb->bmap, start, npages);
     bitmap_clear(rb->bmap, start, npages);
+    return 0;
 }
 
 /*
diff --git a/system/memory.c b/system/memory.c
index c3985e8eef..ace79b0f59 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2153,7 +2153,7 @@ bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
 
 int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
                                          MemoryRegionSection *section,
-                                         ReplayRamPopulate replay_fn,
+                                         ReplayStateChange replay_fn,
                                          void *opaque)
 {
     RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
@@ -2162,15 +2162,15 @@ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
     return rdmc->replay_populated(rdm, section, replay_fn, opaque);
 }
 
-void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
-                                          MemoryRegionSection *section,
-                                          ReplayRamDiscard replay_fn,
-                                          void *opaque)
+int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
+                                         MemoryRegionSection *section,
+                                         ReplayStateChange replay_fn,
+                                         void *opaque)
 {
     RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
 
     g_assert(rdmc->replay_discarded);
-    rdmc->replay_discarded(rdm, section, replay_fn, opaque);
+    return rdmc->replay_discarded(rdm, section, replay_fn, opaque);
 }
 
 void ram_discard_manager_register_listener(RamDiscardManager *rdm,
-- 
Gitee


From c0f15fa6a2c663bba5cf56f98bdcfec20dc2e807 Mon Sep 17 00:00:00 2001
From: Chenyi Qiang <chenyi.qiang@intel.com>
Date: Mon, 7 Apr 2025 15:49:24 +0800
Subject: [PATCH 04/40] memory: Introduce generic state change parent class for
 RamDiscardManager

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/31df9c4804e4e422e27a18ca9a7e22b4123203d1

RamDiscardManager is an interface used by virtio-mem to adjust VFIO
mappings in relation to VM page assignment. It manages the state of
populated and discard for the RAM. To accommodate future scnarios for
managing RAM states, such as private and shared states in confidential
VMs, the existing RamDiscardManager interface needs to be generalized.

Introduce a parent class, GenericStateManager, to manage a pair of
opposite states with RamDiscardManager as its child. The changes include
- Define a new abstract class GenericStateChange.
- Extract six callbacks into GenericStateChangeClass and allow the child
  classes to inherit them.
- Modify RamDiscardManager-related helpers to use GenericStateManager
  ones.
- Define a generic StatChangeListener to extract fields from
  RamDiscardManager listener which allows future listeners to embed it
  and avoid duplication.
- Change the users of RamDiscardManager (virtio-mem, migration, etc.) to
  switch to use GenericStateChange helpers.

It can provide a more flexible and resuable framework for RAM state
management, facilitating future enhancements and use cases.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Conflicts:
      hw/vfio/common.c
      include/exec/memory.h
      system/memory.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/vfio/common.c        |  30 ++--
 hw/virtio/virtio-mem.c  |  95 ++++++------
 include/exec/memory.h   | 313 ++++++++++++++++++++++------------------
 migration/ram.c         |  16 +-
 system/memory.c         | 106 ++++++++------
 system/memory_mapping.c |   6 +-
 6 files changed, 310 insertions(+), 256 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 0be63c5fbc..ab7450f3bd 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -350,9 +350,10 @@ out:
     rcu_read_unlock();
 }
 
-static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
+static void vfio_ram_discard_notify_discard(StateChangeListener *scl,
                                             MemoryRegionSection *section)
 {
+    RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl);
     VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
                                                 listener);
     VFIOContainerBase *bcontainer = vrdl->bcontainer;
@@ -368,9 +369,10 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
     }
 }
 
-static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
+static int vfio_ram_discard_notify_populate(StateChangeListener *scl,
                                             MemoryRegionSection *section)
 {
+    RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl);
     VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
                                                 listener);
     VFIOContainerBase *bcontainer = vrdl->bcontainer;
@@ -396,7 +398,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
                                      vaddr, section->readonly);
         if (ret) {
             /* Rollback */
-            vfio_ram_discard_notify_discard(rdl, section);
+            vfio_ram_discard_notify_discard(scl, section);
             return ret;
         }
     }
@@ -406,8 +408,9 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
 static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer,
                                                MemoryRegionSection *section)
 {
-    RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+    GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr);
     VFIORamDiscardListener *vrdl;
+    RamDiscardListener *rdl;
 
     /* Ignore some corner cases not relevant in practice. */
     g_assert(QEMU_IS_ALIGNED(section->offset_within_region, TARGET_PAGE_SIZE));
@@ -420,17 +423,18 @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer,
     vrdl->mr = section->mr;
     vrdl->offset_within_address_space = section->offset_within_address_space;
     vrdl->size = int128_get64(section->size);
-    vrdl->granularity = ram_discard_manager_get_min_granularity(rdm,
-                                                                section->mr);
+    vrdl->granularity = generic_state_manager_get_min_granularity(gsm,
+                                                                  section->mr);
 
     g_assert(vrdl->granularity && is_power_of_2(vrdl->granularity));
     g_assert(bcontainer->pgsizes &&
              vrdl->granularity >= 1ULL << ctz64(bcontainer->pgsizes));
 
-    ram_discard_listener_init(&vrdl->listener,
+    rdl = &vrdl->listener;
+    ram_discard_listener_init(rdl,
                               vfio_ram_discard_notify_populate,
                               vfio_ram_discard_notify_discard, true);
-    ram_discard_manager_register_listener(rdm, &vrdl->listener, section);
+    generic_state_manager_register_listener(gsm, &rdl->scl, section);
     QLIST_INSERT_HEAD(&bcontainer->vrdl_list, vrdl, next);
 
     /*
@@ -480,8 +484,9 @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer,
 static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer,
                                                  MemoryRegionSection *section)
 {
-    RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+    GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr);
     VFIORamDiscardListener *vrdl = NULL;
+    RamDiscardListener *rdl;
 
     QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
         if (vrdl->mr == section->mr &&
@@ -495,7 +500,8 @@ static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer,
         hw_error("vfio: Trying to unregister missing RAM discard listener");
     }
 
-    ram_discard_manager_unregister_listener(rdm, &vrdl->listener);
+    rdl = &vrdl->listener;
+    generic_state_manager_unregister_listener(gsm, &rdl->scl);
     QLIST_REMOVE(vrdl, next);
     g_free(vrdl);
 }
@@ -1275,7 +1281,7 @@ static int
 vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
                                             MemoryRegionSection *section)
 {
-    RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
+    GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr);
     VFIORamDiscardListener *vrdl = NULL;
 
     QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
@@ -1294,7 +1300,7 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
      * We only want/can synchronize the bitmap for actually mapped parts -
      * which correspond to populated parts. Replay all populated parts.
      */
-    return ram_discard_manager_replay_populated(rdm, section,
+    return generic_state_manager_replay_on_state_set(gsm, section,
                                               vfio_ram_discard_get_dirty_bitmap,
                                                 &vrdl);
 }
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index f40a816b7f..d60bc994ad 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -303,16 +303,16 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
 
 static int virtio_mem_notify_populate_cb(MemoryRegionSection *s, void *arg)
 {
-    RamDiscardListener *rdl = arg;
+    StateChangeListener *scl = arg;
 
-    return rdl->notify_populate(rdl, s);
+    return scl->notify_to_state_set(scl, s);
 }
 
 static int virtio_mem_notify_discard_cb(MemoryRegionSection *s, void *arg)
 {
-    RamDiscardListener *rdl = arg;
+    StateChangeListener *scl = arg;
 
-    rdl->notify_discard(rdl, s);
+    scl->notify_to_state_clear(scl, s);
     return 0;
 }
 
@@ -322,12 +322,13 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
     RamDiscardListener *rdl;
 
     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
-        MemoryRegionSection tmp = *rdl->section;
+        StateChangeListener *scl = &rdl->scl;
+        MemoryRegionSection tmp = *scl->section;
 
         if (!memory_region_section_intersect_range(&tmp, offset, size)) {
             continue;
         }
-        rdl->notify_discard(rdl, &tmp);
+        scl->notify_to_state_clear(scl, &tmp);
     }
 }
 
@@ -338,12 +339,13 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
     int ret = 0;
 
     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
-        MemoryRegionSection tmp = *rdl->section;
+        StateChangeListener *scl = &rdl->scl;
+        MemoryRegionSection tmp = *scl->section;
 
         if (!memory_region_section_intersect_range(&tmp, offset, size)) {
             continue;
         }
-        ret = rdl->notify_populate(rdl, &tmp);
+        ret = scl->notify_to_state_set(scl, &tmp);
         if (ret) {
             break;
         }
@@ -352,7 +354,8 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
     if (ret) {
         /* Notify all already-notified listeners. */
         QLIST_FOREACH(rdl2, &vmem->rdl_list, next) {
-            MemoryRegionSection tmp = *rdl2->section;
+            StateChangeListener *scl2 = &rdl2->scl;
+            MemoryRegionSection tmp = *scl2->section;
 
             if (rdl2 == rdl) {
                 break;
@@ -360,7 +363,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
             if (!memory_region_section_intersect_range(&tmp, offset, size)) {
                 continue;
             }
-            rdl2->notify_discard(rdl2, &tmp);
+            scl2->notify_to_state_clear(scl2, &tmp);
         }
     }
     return ret;
@@ -375,10 +378,11 @@ static void virtio_mem_notify_unplug_all(VirtIOMEM *vmem)
     }
 
     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
+        StateChangeListener *scl = &rdl->scl;
         if (rdl->double_discard_supported) {
-            rdl->notify_discard(rdl, rdl->section);
+            scl->notify_to_state_clear(scl, scl->section);
         } else {
-            virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+            virtio_mem_for_each_plugged_section(vmem, scl->section, scl,
                                                 virtio_mem_notify_discard_cb);
         }
     }
@@ -1053,8 +1057,8 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
      * Set ourselves as RamDiscardManager before the plug handler maps the
      * memory region and exposes it via an address space.
      */
-    if (memory_region_set_ram_discard_manager(&vmem->memdev->mr,
-                                              RAM_DISCARD_MANAGER(vmem))) {
+    if (memory_region_set_generic_state_manager(&vmem->memdev->mr,
+                                                GENERIC_STATE_MANAGER(vmem))) {
         error_setg(errp, "Failed to set RamDiscardManager");
         ram_block_coordinated_discard_require(false);
         return;
@@ -1135,7 +1139,7 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
      * The unplug handler unmapped the memory region, it cannot be
      * found via an address space anymore. Unset ourselves.
      */
-    memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
+    memory_region_set_generic_state_manager(&vmem->memdev->mr, NULL);
     ram_block_coordinated_discard_require(false);
 }
 
@@ -1184,7 +1188,8 @@ static int virtio_mem_post_load_bitmap(VirtIOMEM *vmem)
      * into an address space. Replay, now that we updated the bitmap.
      */
     QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
-        ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+        StateChangeListener *scl = &rdl->scl;
+        ret = virtio_mem_for_each_plugged_section(vmem, scl->section, scl,
                                                  virtio_mem_notify_populate_cb);
         if (ret) {
             return ret;
@@ -1683,19 +1688,19 @@ static Property virtio_mem_properties[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
-static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm,
+static uint64_t virtio_mem_rdm_get_min_granularity(const GenericStateManager *gsm,
                                                    const MemoryRegion *mr)
 {
-    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    const VirtIOMEM *vmem = VIRTIO_MEM(gsm);
 
     g_assert(mr == &vmem->memdev->mr);
     return vmem->block_size;
 }
 
-static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
+static bool virtio_mem_rdm_is_populated(const GenericStateManager *gsm,
                                         const MemoryRegionSection *s)
 {
-    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    const VirtIOMEM *vmem = VIRTIO_MEM(gsm);
     uint64_t start_gpa = vmem->addr + s->offset_within_region;
     uint64_t end_gpa = start_gpa + int128_get64(s->size);
 
@@ -1723,12 +1728,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
     return data->fn(s, data->opaque);
 }
 
-static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
+static int virtio_mem_rdm_replay_populated(const GenericStateManager *gsm,
                                            MemoryRegionSection *s,
                                            ReplayStateChange replay_fn,
                                            void *opaque)
 {
-    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    const VirtIOMEM *vmem = VIRTIO_MEM(gsm);
     struct VirtIOMEMReplayData data = {
         .fn = replay_fn,
         .opaque = opaque,
@@ -1748,12 +1753,12 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
     return 0;
 }
 
-static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
+static int virtio_mem_rdm_replay_discarded(const GenericStateManager *gsm,
                                            MemoryRegionSection *s,
                                            ReplayStateChange replay_fn,
                                            void *opaque)
 {
-    const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    const VirtIOMEM *vmem = VIRTIO_MEM(gsm);
     struct VirtIOMEMReplayData data = {
         .fn = replay_fn,
         .opaque = opaque,
@@ -1764,18 +1769,19 @@ static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
                                                  virtio_mem_rdm_replay_discarded_cb);
 }
 
-static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
-                                             RamDiscardListener *rdl,
+static void virtio_mem_rdm_register_listener(GenericStateManager *gsm,
+                                             StateChangeListener *scl,
                                              MemoryRegionSection *s)
 {
-    VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    VirtIOMEM *vmem = VIRTIO_MEM(gsm);
+    RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl);
     int ret;
 
     g_assert(s->mr == &vmem->memdev->mr);
-    rdl->section = memory_region_section_new_copy(s);
+    scl->section = memory_region_section_new_copy(s);
 
     QLIST_INSERT_HEAD(&vmem->rdl_list, rdl, next);
-    ret = virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+    ret = virtio_mem_for_each_plugged_section(vmem, scl->section, scl,
                                               virtio_mem_notify_populate_cb);
     if (ret) {
         error_report("%s: Replaying plugged ranges failed: %s", __func__,
@@ -1783,23 +1789,24 @@ static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
     }
 }
 
-static void virtio_mem_rdm_unregister_listener(RamDiscardManager *rdm,
-                                               RamDiscardListener *rdl)
+static void virtio_mem_rdm_unregister_listener(GenericStateManager *gsm,
+                                               StateChangeListener *scl)
 {
-    VirtIOMEM *vmem = VIRTIO_MEM(rdm);
+    VirtIOMEM *vmem = VIRTIO_MEM(gsm);
+    RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl);
 
-    g_assert(rdl->section->mr == &vmem->memdev->mr);
+    g_assert(scl->section->mr == &vmem->memdev->mr);
     if (vmem->size) {
         if (rdl->double_discard_supported) {
-            rdl->notify_discard(rdl, rdl->section);
+            scl->notify_to_state_clear(scl, scl->section);
         } else {
-            virtio_mem_for_each_plugged_section(vmem, rdl->section, rdl,
+            virtio_mem_for_each_plugged_section(vmem, scl->section, scl,
                                                 virtio_mem_notify_discard_cb);
         }
     }
 
-    memory_region_section_free_copy(rdl->section);
-    rdl->section = NULL;
+    memory_region_section_free_copy(scl->section);
+    scl->section = NULL;
     QLIST_REMOVE(rdl, next);
 }
 
@@ -1832,7 +1839,7 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
     VirtIOMEMClass *vmc = VIRTIO_MEM_CLASS(klass);
-    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass);
+    GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_CLASS(klass);
 
     device_class_set_props(dc, virtio_mem_properties);
     dc->vmsd = &vmstate_virtio_mem;
@@ -1853,12 +1860,12 @@ static void virtio_mem_class_init(ObjectClass *klass, void *data)
     vmc->remove_size_change_notifier = virtio_mem_remove_size_change_notifier;
     vmc->unplug_request_check = virtio_mem_unplug_request_check;
 
-    rdmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
-    rdmc->is_populated = virtio_mem_rdm_is_populated;
-    rdmc->replay_populated = virtio_mem_rdm_replay_populated;
-    rdmc->replay_discarded = virtio_mem_rdm_replay_discarded;
-    rdmc->register_listener = virtio_mem_rdm_register_listener;
-    rdmc->unregister_listener = virtio_mem_rdm_unregister_listener;
+    gsmc->get_min_granularity = virtio_mem_rdm_get_min_granularity;
+    gsmc->is_state_set = virtio_mem_rdm_is_populated;
+    gsmc->replay_on_state_set = virtio_mem_rdm_replay_populated;
+    gsmc->replay_on_state_clear = virtio_mem_rdm_replay_discarded;
+    gsmc->register_listener = virtio_mem_rdm_register_listener;
+    gsmc->unregister_listener = virtio_mem_rdm_unregister_listener;
 }
 
 static const TypeInfo virtio_mem_info = {
diff --git a/include/exec/memory.h b/include/exec/memory.h
index a3243ee218..652d71ddf0 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -43,6 +43,12 @@ typedef struct IOMMUMemoryRegionClass IOMMUMemoryRegionClass;
 DECLARE_OBJ_CHECKERS(IOMMUMemoryRegion, IOMMUMemoryRegionClass,
                      IOMMU_MEMORY_REGION, TYPE_IOMMU_MEMORY_REGION)
 
+#define TYPE_GENERIC_STATE_MANAGER "generic-state-manager"
+typedef struct GenericStateManagerClass GenericStateManagerClass;
+typedef struct GenericStateManager GenericStateManager;
+DECLARE_OBJ_CHECKERS(GenericStateManager, GenericStateManagerClass,
+                     GENERIC_STATE_MANAGER, TYPE_GENERIC_STATE_MANAGER)
+
 #define TYPE_RAM_DISCARD_MANAGER "qemu:ram-discard-manager"
 typedef struct RamDiscardManagerClass RamDiscardManagerClass;
 typedef struct RamDiscardManager RamDiscardManager;
@@ -563,103 +569,59 @@ struct IOMMUMemoryRegionClass {
                                   Error **errp);
 };
 
-typedef struct RamDiscardListener RamDiscardListener;
-typedef int (*NotifyRamPopulate)(RamDiscardListener *rdl,
-                                 MemoryRegionSection *section);
-typedef void (*NotifyRamDiscard)(RamDiscardListener *rdl,
+typedef int (*ReplayStateChange)(MemoryRegionSection *section, void *opaque);
+
+typedef struct StateChangeListener StateChangeListener;
+typedef int (*NotifyStateSet)(StateChangeListener *scl,
+                              MemoryRegionSection *section);
+typedef void (*NotifyStateClear)(StateChangeListener *scl,
                                  MemoryRegionSection *section);
 
-struct RamDiscardListener {
+struct StateChangeListener {
     /*
-     * @notify_populate:
+     * @notify_to_state_set:
      *
-     * Notification that previously discarded memory is about to get populated.
-     * Listeners are able to object. If any listener objects, already
-     * successfully notified listeners are notified about a discard again.
+     * Notification that previously state clear part is about to be set.
      *
-     * @rdl: the #RamDiscardListener getting notified
-     * @section: the #MemoryRegionSection to get populated. The section
+     * @scl: the #StateChangeListener getting notified
+     * @section: the #MemoryRegionSection to be state-set. The section
      *           is aligned within the memory region to the minimum granularity
      *           unless it would exceed the registered section.
      *
      * Returns 0 on success. If the notification is rejected by the listener,
      * an error is returned.
      */
-    NotifyRamPopulate notify_populate;
+    NotifyStateSet notify_to_state_set;
 
     /*
-     * @notify_discard:
+     * @notify_to_state_clear:
      *
-     * Notification that previously populated memory was discarded successfully
-     * and listeners should drop all references to such memory and prevent
-     * new population (e.g., unmap).
+     * Notification that previously state set part is about to be cleared
      *
-     * @rdl: the #RamDiscardListener getting notified
-     * @section: the #MemoryRegionSection to get populated. The section
+     * @scl: the #StateChangeListener getting notified
+     * @section: the #MemoryRegionSection to be state-cleared. The section
      *           is aligned within the memory region to the minimum granularity
      *           unless it would exceed the registered section.
-     */
-    NotifyRamDiscard notify_discard;
-
-    /*
-     * @double_discard_supported:
      *
-     * The listener suppors getting @notify_discard notifications that span
-     * already discarded parts.
+     * Returns 0 on success. If the notification is rejected by the listener,
+     * an error is returned.
      */
-    bool double_discard_supported;
+    NotifyStateClear notify_to_state_clear;
 
     MemoryRegionSection *section;
-    QLIST_ENTRY(RamDiscardListener) next;
 };
 
-static inline void ram_discard_listener_init(RamDiscardListener *rdl,
-                                             NotifyRamPopulate populate_fn,
-                                             NotifyRamDiscard discard_fn,
-                                             bool double_discard_supported)
-{
-    rdl->notify_populate = populate_fn;
-    rdl->notify_discard = discard_fn;
-    rdl->double_discard_supported = double_discard_supported;
-}
-
-typedef int (*ReplayStateChange)(MemoryRegionSection *section, void *opaque);
-
 /*
- * RamDiscardManagerClass:
- *
- * A #RamDiscardManager coordinates which parts of specific RAM #MemoryRegion
- * regions are currently populated to be used/accessed by the VM, notifying
- * after parts were discarded (freeing up memory) and before parts will be
- * populated (consuming memory), to be used/accessed by the VM.
- *
- * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the
- * #MemoryRegion isn't mapped into an address space yet (either directly
- * or via an alias); it cannot change while the #MemoryRegion is
- * mapped into an address space.
+ * GenericStateManagerClass:
  *
- * The #RamDiscardManager is intended to be used by technologies that are
- * incompatible with discarding of RAM (e.g., VFIO, which may pin all
- * memory inside a #MemoryRegion), and require proper coordination to only
- * map the currently populated parts, to hinder parts that are expected to
- * remain discarded from silently getting populated and consuming memory.
- * Technologies that support discarding of RAM don't have to bother and can
- * simply map the whole #MemoryRegion.
- *
- * An example #RamDiscardManager is virtio-mem, which logically (un)plugs
- * memory within an assigned RAM #MemoryRegion, coordinated with the VM.
- * Logically unplugging memory consists of discarding RAM. The VM agreed to not
- * access unplugged (discarded) memory - especially via DMA. virtio-mem will
- * properly coordinate with listeners before memory is plugged (populated),
- * and after memory is unplugged (discarded).
+ * A #GenericStateManager is a common interface used to manage the state of
+ * a #MemoryRegion. The managed states is a pair of opposite states, such as
+ * populated and discarded, or private and shared. It is abstract as set and
+ * clear in below callbacks, and the actual state is managed by the
+ * implementation.
  *
- * Listeners are called in multiples of the minimum granularity (unless it
- * would exceed the registered range) and changes are aligned to the minimum
- * granularity within the #MemoryRegion. Listeners have to prepare for memory
- * becoming discarded in a different granularity than it was populated and the
- * other way around.
  */
-struct RamDiscardManagerClass {
+struct GenericStateManagerClass {
     /* private */
     InterfaceClass parent_class;
 
@@ -669,122 +631,188 @@ struct RamDiscardManagerClass {
      * @get_min_granularity:
      *
      * Get the minimum granularity in which listeners will get notified
-     * about changes within the #MemoryRegion via the #RamDiscardManager.
+     * about changes within the #MemoryRegion via the #GenericStateManager.
      *
-     * @rdm: the #RamDiscardManager
+     * @gsm: the #GenericStateManager
      * @mr: the #MemoryRegion
      *
      * Returns the minimum granularity.
      */
-    uint64_t (*get_min_granularity)(const RamDiscardManager *rdm,
+    uint64_t (*get_min_granularity)(const GenericStateManager *gsm,
                                     const MemoryRegion *mr);
 
     /**
-     * @is_populated:
+     * @is_state_set:
      *
-     * Check whether the given #MemoryRegionSection is completely populated
-     * (i.e., no parts are currently discarded) via the #RamDiscardManager.
-     * There are no alignment requirements.
+     * Check whether the given #MemoryRegionSection state is set.
+     * via the #GenericStateManager.
      *
-     * @rdm: the #RamDiscardManager
+     * @gsm: the #GenericStateManager
      * @section: the #MemoryRegionSection
      *
-     * Returns whether the given range is completely populated.
+     * Returns whether the given range is completely set.
      */
-    bool (*is_populated)(const RamDiscardManager *rdm,
+    bool (*is_state_set)(const GenericStateManager *gsm,
                          const MemoryRegionSection *section);
 
     /**
-     * @replay_populated:
+     * @replay_on_state_set:
      *
-     * Call the #ReplayStateChange callback for all populated parts within the
-     * #MemoryRegionSection via the #RamDiscardManager.
+     * Call the #ReplayStateChange callback for all state set parts within the
+     * #MemoryRegionSection via the #GenericStateManager.
      *
      * In case any call fails, no further calls are made.
      *
-     * @rdm: the #RamDiscardManager
+     * @gsm: the #GenericStateManager
      * @section: the #MemoryRegionSection
      * @replay_fn: the #ReplayStateChange callback
      * @opaque: pointer to forward to the callback
      *
      * Returns 0 on success, or a negative error if any notification failed.
      */
-    int (*replay_populated)(const RamDiscardManager *rdm,
-                            MemoryRegionSection *section,
-                            ReplayStateChange replay_fn, void *opaque);
+    int (*replay_on_state_set)(const GenericStateManager *gsm,
+                               MemoryRegionSection *section,
+                               ReplayStateChange replay_fn, void *opaque);
 
     /**
-     * @replay_discarded:
+     * @replay_on_state_clear:
      *
-     * Call the #ReplayStateChange callback for all discarded parts within the
-     * #MemoryRegionSection via the #RamDiscardManager.
+     * Call the #ReplayStateChange callback for all state clear parts within the
+     * #MemoryRegionSection via the #GenericStateManager.
+     *
+     * In case any call fails, no further calls are made.
      *
-     * @rdm: the #RamDiscardManager
+     * @gsm: the #GenericStateManager
      * @section: the #MemoryRegionSection
      * @replay_fn: the #ReplayStateChange callback
      * @opaque: pointer to forward to the callback
      *
      * Returns 0 on success, or a negative error if any notification failed.
      */
-    int (*replay_discarded)(const RamDiscardManager *rdm,
-                            MemoryRegionSection *section,
-                            ReplayStateChange replay_fn, void *opaque);
+    int (*replay_on_state_clear)(const GenericStateManager *gsm,
+                                 MemoryRegionSection *section,
+                                 ReplayStateChange replay_fn, void *opaque);
 
     /**
      * @register_listener:
      *
-     * Register a #RamDiscardListener for the given #MemoryRegionSection and
-     * immediately notify the #RamDiscardListener about all populated parts
-     * within the #MemoryRegionSection via the #RamDiscardManager.
+     * Register a #StateChangeListener for the given #MemoryRegionSection and
+     * immediately notify the #StateChangeListener about all state-set parts
+     * within the #MemoryRegionSection via the #GenericStateManager.
      *
      * In case any notification fails, no further notifications are triggered
      * and an error is logged.
      *
-     * @rdm: the #RamDiscardManager
-     * @rdl: the #RamDiscardListener
+     * @rdm: the #GenericStateManager
+     * @rdl: the #StateChangeListener
      * @section: the #MemoryRegionSection
      */
-    void (*register_listener)(RamDiscardManager *rdm,
-                              RamDiscardListener *rdl,
+    void (*register_listener)(GenericStateManager *gsm,
+                              StateChangeListener *scl,
                               MemoryRegionSection *section);
 
     /**
      * @unregister_listener:
      *
-     * Unregister a previously registered #RamDiscardListener via the
-     * #RamDiscardManager after notifying the #RamDiscardListener about all
-     * populated parts becoming unpopulated within the registered
+     * Unregister a previously registered #StateChangeListener via the
+     * #GenericStateManager after notifying the #StateChangeListener about all
+     * state-set parts becoming state-cleared within the registered
      * #MemoryRegionSection.
      *
-     * @rdm: the #RamDiscardManager
-     * @rdl: the #RamDiscardListener
+     * @rdm: the #GenericStateManager
+     * @rdl: the #StateChangeListener
      */
-    void (*unregister_listener)(RamDiscardManager *rdm,
-                                RamDiscardListener *rdl);
+    void (*unregister_listener)(GenericStateManager *gsm,
+                                StateChangeListener *scl);
 };
 
-uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
-                                                 const MemoryRegion *mr);
+uint64_t generic_state_manager_get_min_granularity(const GenericStateManager *gsm,
+                                                   const MemoryRegion *mr);
 
-bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
-                                      const MemoryRegionSection *section);
+bool generic_state_manager_is_state_set(const GenericStateManager *gsm,
+                                        const MemoryRegionSection *section);
 
-int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
-                                         MemoryRegionSection *section,
-                                         ReplayStateChange replay_fn,
-                                         void *opaque);
+int generic_state_manager_replay_on_state_set(const GenericStateManager *gsm,
+                                           MemoryRegionSection *section,
+                                           ReplayStateChange replay_fn,
+                                           void *opaque);
 
-int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
-                                         MemoryRegionSection *section,
-                                         ReplayStateChange replay_fn,
-                                         void *opaque);
+int generic_state_manager_replay_on_state_clear(const GenericStateManager *gsm,
+                                                MemoryRegionSection *section,
+                                                ReplayStateChange replay_fn,
+                                                void *opaque);
 
-void ram_discard_manager_register_listener(RamDiscardManager *rdm,
-                                           RamDiscardListener *rdl,
-                                           MemoryRegionSection *section);
+void generic_state_manager_register_listener(GenericStateManager *gsm,
+                                             StateChangeListener *scl,
+                                             MemoryRegionSection *section);
 
-void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
-                                             RamDiscardListener *rdl);
+void generic_state_manager_unregister_listener(GenericStateManager *gsm,
+                                               StateChangeListener *scl);
+
+typedef struct RamDiscardListener RamDiscardListener;
+
+struct RamDiscardListener {
+    struct StateChangeListener scl;
+
+    /*
+     * @double_discard_supported:
+     *
+     * The listener suppors getting @notify_discard notifications that span
+     * already discarded parts.
+     */
+    bool double_discard_supported;
+
+    QLIST_ENTRY(RamDiscardListener) next;
+};
+
+static inline void ram_discard_listener_init(RamDiscardListener *rdl,
+                                             NotifyStateSet populate_fn,
+                                             NotifyStateClear discard_fn,
+                                             bool double_discard_supported)
+{
+    rdl->scl.notify_to_state_set = populate_fn;
+    rdl->scl.notify_to_state_clear = discard_fn;
+    rdl->double_discard_supported = double_discard_supported;
+}
+
+/*
+ * RamDiscardManagerClass:
+ *
+ * A #RamDiscardManager coordinates which parts of specific RAM #MemoryRegion
+ * regions are currently populated to be used/accessed by the VM, notifying
+ * after parts were discarded (freeing up memory) and before parts will be
+ * populated (consuming memory), to be used/accessed by the VM.
+ *
+ * A #RamDiscardManager can only be set for a RAM #MemoryRegion while the
+ * #MemoryRegion isn't mapped into an address space yet (either directly
+ * or via an alias); it cannot change while the #MemoryRegion is
+ * mapped into an address space.
+ *
+ * The #RamDiscardManager is intended to be used by technologies that are
+ * incompatible with discarding of RAM (e.g., VFIO, which may pin all
+ * memory inside a #MemoryRegion), and require proper coordination to only
+ * map the currently populated parts, to hinder parts that are expected to
+ * remain discarded from silently getting populated and consuming memory.
+ * Technologies that support discarding of RAM don't have to bother and can
+ * simply map the whole #MemoryRegion.
+ *
+ * An example #RamDiscardManager is virtio-mem, which logically (un)plugs
+ * memory within an assigned RAM #MemoryRegion, coordinated with the VM.
+ * Logically unplugging memory consists of discarding RAM. The VM agreed to not
+ * access unplugged (discarded) memory - especially via DMA. virtio-mem will
+ * properly coordinate with listeners before memory is plugged (populated),
+ * and after memory is unplugged (discarded).
+ *
+ * Listeners are called in multiples of the minimum granularity (unless it
+ * would exceed the registered range) and changes are aligned to the minimum
+ * granularity within the #MemoryRegion. Listeners have to prepare for memory
+ * becoming discarded in a different granularity than it was populated and the
+ * other way around.
+ */
+struct RamDiscardManagerClass {
+    /* private */
+    GenericStateManagerClass parent_class;
+};
 
 bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
                           ram_addr_t *ram_addr, bool *read_only,
@@ -851,7 +879,7 @@ struct MemoryRegion {
     const char *name;
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
-    RamDiscardManager *rdm; /* Only for RAM */
+    GenericStateManager *gsm; /* Only for RAM */
 
     /* For devices designed to perform re-entrant IO into their own IO MRs */
     bool disable_reentrancy_guard;
@@ -2529,39 +2557,36 @@ bool memory_region_present(MemoryRegion *container, hwaddr addr);
 bool memory_region_is_mapped(MemoryRegion *mr);
 
 /**
- * memory_region_get_ram_discard_manager: get the #RamDiscardManager for a
+ * memory_region_get_generic_state_manager: get the #GenericStateManager for a
  * #MemoryRegion
  *
- * The #RamDiscardManager cannot change while a memory region is mapped.
+ * The #GenericStateManager cannot change while a memory region is mapped.
  *
  * @mr: the #MemoryRegion
  */
-RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr);
+GenericStateManager *memory_region_get_generic_state_manager(MemoryRegion *mr);
 
 /**
- * memory_region_has_ram_discard_manager: check whether a #MemoryRegion has a
- * #RamDiscardManager assigned
+ * memory_region_set_generic_state_manager: set the #GenericStateManager for a
+ * #MemoryRegion
+ *
+ * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion
+ * that does not cover RAM, or a #MemoryRegion that already has a
+ * #GenericStateManager assigned. Return 0 if the gsm is set successfully.
  *
  * @mr: the #MemoryRegion
+ * @gsm: #GenericStateManager to set
  */
-static inline bool memory_region_has_ram_discard_manager(MemoryRegion *mr)
-{
-    return !!memory_region_get_ram_discard_manager(mr);
-}
+int memory_region_set_generic_state_manager(MemoryRegion *mr,
+                                            GenericStateManager *gsm);
 
 /**
- * memory_region_set_ram_discard_manager: set the #RamDiscardManager for a
- * #MemoryRegion
- *
- * This function must not be called for a mapped #MemoryRegion, a #MemoryRegion
- * that does not cover RAM, or a #MemoryRegion that already has a
- * #RamDiscardManager assigned. Return 0 if the rdm is set successfully.
+ * memory_region_has_ram_discard_manager: check whether a #MemoryRegion has a
+ * #RamDiscardManager assigned
  *
  * @mr: the #MemoryRegion
- * @rdm: #RamDiscardManager to set
  */
-int memory_region_set_ram_discard_manager(MemoryRegion *mr,
-                                          RamDiscardManager *rdm);
+bool memory_region_has_ram_discard_manager(MemoryRegion *mr);
 
 /**
  * memory_region_find: translate an address/size relative to a
diff --git a/migration/ram.c b/migration/ram.c
index 083a8a8073..e6baecf143 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -882,14 +882,14 @@ static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb)
     uint64_t cleared_bits = 0;
 
     if (rb->mr && rb->bmap && memory_region_has_ram_discard_manager(rb->mr)) {
-        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
+        GenericStateManager *gsm = memory_region_get_generic_state_manager(rb->mr);
         MemoryRegionSection section = {
             .mr = rb->mr,
             .offset_within_region = 0,
             .size = int128_make64(qemu_ram_get_used_length(rb)),
         };
 
-        ram_discard_manager_replay_discarded(rdm, &section,
+        generic_state_manager_replay_on_state_clear(gsm, &section,
                                              dirty_bitmap_clear_section,
                                              &cleared_bits);
     }
@@ -905,14 +905,14 @@ static uint64_t ramblock_dirty_bitmap_clear_discarded_pages(RAMBlock *rb)
 bool ramblock_page_is_discarded(RAMBlock *rb, ram_addr_t start)
 {
     if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
-        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
+        GenericStateManager *gsm = memory_region_get_generic_state_manager(rb->mr);
         MemoryRegionSection section = {
             .mr = rb->mr,
             .offset_within_region = start,
             .size = int128_make64(qemu_ram_pagesize(rb)),
         };
 
-        return !ram_discard_manager_is_populated(rdm, &section);
+        return !generic_state_manager_is_state_set(gsm, &section);
     }
     return false;
 }
@@ -1732,14 +1732,14 @@ static void ram_block_populate_read(RAMBlock *rb)
      * Note: The result is only stable while migrating (precopy/postcopy).
      */
     if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
-        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
+        GenericStateManager *gsm = memory_region_get_generic_state_manager(rb->mr);
         MemoryRegionSection section = {
             .mr = rb->mr,
             .offset_within_region = 0,
             .size = rb->mr->size,
         };
 
-        ram_discard_manager_replay_populated(rdm, &section,
+        generic_state_manager_replay_on_state_set(gsm, &section,
                                              populate_read_section, NULL);
     } else {
         populate_read_range(rb, 0, rb->used_length);
@@ -1791,14 +1791,14 @@ static int ram_block_uffd_protect(RAMBlock *rb, int uffd_fd)
 
     /* See ram_block_populate_read() */
     if (rb->mr && memory_region_has_ram_discard_manager(rb->mr)) {
-        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(rb->mr);
+        GenericStateManager *gsm = memory_region_get_generic_state_manager(rb->mr);
         MemoryRegionSection section = {
             .mr = rb->mr,
             .offset_within_region = 0,
             .size = rb->mr->size,
         };
 
-        return ram_discard_manager_replay_populated(rdm, &section,
+        return generic_state_manager_replay_on_state_set(gsm, &section,
                                                     uffd_protect_section,
                                                     (void *)(uintptr_t)uffd_fd);
     }
diff --git a/system/memory.c b/system/memory.c
index ace79b0f59..38f73eb48b 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2113,83 +2113,93 @@ int memory_region_iommu_num_indexes(IOMMUMemoryRegion *iommu_mr)
     return imrc->num_indexes(iommu_mr);
 }
 
-RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr)
+GenericStateManager *memory_region_get_generic_state_manager(MemoryRegion *mr)
 {
     if (!memory_region_is_ram(mr)) {
         return NULL;
     }
-    return mr->rdm;
+    return mr->gsm;
 }
 
-int memory_region_set_ram_discard_manager(MemoryRegion *mr,
-                                          RamDiscardManager *rdm)
+int memory_region_set_generic_state_manager(MemoryRegion *mr,
+                                            GenericStateManager *gsm)
 {
     g_assert(memory_region_is_ram(mr));
-    if (mr->rdm && rdm) {
+    if (mr->gsm && gsm) {
         return -EBUSY;
     }
 
-    mr->rdm = rdm;
+    mr->gsm = gsm;
     return 0;
 }
 
-uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm,
-                                                 const MemoryRegion *mr)
+bool memory_region_has_ram_discard_manager(MemoryRegion *mr)
 {
-    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+    if (!memory_region_is_ram(mr) ||
+        !object_dynamic_cast(OBJECT(mr->gsm), TYPE_RAM_DISCARD_MANAGER)) {
+        return false;
+    }
+
+    return true;
+}
+
+uint64_t generic_state_manager_get_min_granularity(const GenericStateManager *gsm,
+                                                   const MemoryRegion *mr)
+{
+    GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm);
 
-    g_assert(rdmc->get_min_granularity);
-    return rdmc->get_min_granularity(rdm, mr);
+    g_assert(gsmc->get_min_granularity);
+    return gsmc->get_min_granularity(gsm, mr);
 }
 
-bool ram_discard_manager_is_populated(const RamDiscardManager *rdm,
-                                      const MemoryRegionSection *section)
+bool generic_state_manager_is_state_set(const GenericStateManager *gsm,
+                                        const MemoryRegionSection *section)
 {
-    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+    GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm);
 
-    g_assert(rdmc->is_populated);
-    return rdmc->is_populated(rdm, section);
+    g_assert(gsmc->is_state_set);
+    return gsmc->is_state_set(gsm, section);
 }
 
-int ram_discard_manager_replay_populated(const RamDiscardManager *rdm,
-                                         MemoryRegionSection *section,
-                                         ReplayStateChange replay_fn,
-                                         void *opaque)
+int generic_state_manager_replay_on_state_set(const GenericStateManager *gsm,
+                                              MemoryRegionSection *section,
+                                              ReplayStateChange replay_fn,
+                                              void *opaque)
 {
-    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+    GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm);
 
-    g_assert(rdmc->replay_populated);
-    return rdmc->replay_populated(rdm, section, replay_fn, opaque);
+    g_assert(gsmc->replay_on_state_set);
+    return gsmc->replay_on_state_set(gsm, section, replay_fn, opaque);
 }
 
-int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm,
-                                         MemoryRegionSection *section,
-                                         ReplayStateChange replay_fn,
-                                         void *opaque)
+int generic_state_manager_replay_on_state_clear(const GenericStateManager *gsm,
+                                                MemoryRegionSection *section,
+                                                ReplayStateChange replay_fn,
+                                                void *opaque)
 {
-    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+    GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm);
 
-    g_assert(rdmc->replay_discarded);
-    return rdmc->replay_discarded(rdm, section, replay_fn, opaque);
+    g_assert(gsmc->replay_on_state_clear);
+    return gsmc->replay_on_state_clear(gsm, section, replay_fn, opaque);
 }
 
-void ram_discard_manager_register_listener(RamDiscardManager *rdm,
-                                           RamDiscardListener *rdl,
-                                           MemoryRegionSection *section)
+void generic_state_manager_register_listener(GenericStateManager *gsm,
+                                             StateChangeListener *scl,
+                                             MemoryRegionSection *section)
 {
-    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+    GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm);
 
-    g_assert(rdmc->register_listener);
-    rdmc->register_listener(rdm, rdl, section);
+    g_assert(gsmc->register_listener);
+    gsmc->register_listener(gsm, scl, section);
 }
 
-void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
-                                             RamDiscardListener *rdl)
+void generic_state_manager_unregister_listener(GenericStateManager *gsm,
+                                               StateChangeListener *scl)
 {
-    RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm);
+    GenericStateManagerClass *gsmc = GENERIC_STATE_MANAGER_GET_CLASS(gsm);
 
-    g_assert(rdmc->unregister_listener);
-    rdmc->unregister_listener(rdm, rdl);
+    g_assert(gsmc->unregister_listener);
+    gsmc->unregister_listener(gsm, scl);
 }
 
 /* Called with rcu_read_lock held.  */
@@ -2216,7 +2226,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
         error_report("iommu map to non memory area %" HWADDR_PRIx "", xlat);
         return false;
     } else if (memory_region_has_ram_discard_manager(mr)) {
-        RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
+        GenericStateManager *gsm = memory_region_get_generic_state_manager(mr);
         MemoryRegionSection tmp = {
             .mr = mr,
             .offset_within_region = xlat,
@@ -2231,7 +2241,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
          * Disallow that. vmstate priorities make sure any RamDiscardManager
          * were already restored before IOMMUs are restored.
          */
-        if (!ram_discard_manager_is_populated(rdm, &tmp)) {
+        if (!generic_state_manager_is_state_set(gsm, &tmp)) {
             error_report("iommu map to discarded memory (e.g., unplugged via"
                          " virtio-mem): %" HWADDR_PRIx "",
                          iotlb->translated_addr);
@@ -3737,8 +3747,15 @@ static const TypeInfo iommu_memory_region_info = {
     .abstract           = true,
 };
 
-static const TypeInfo ram_discard_manager_info = {
+static const TypeInfo generic_state_manager_info = {
     .parent             = TYPE_INTERFACE,
+    .name               = TYPE_GENERIC_STATE_MANAGER,
+    .class_size         = sizeof(GenericStateManagerClass),
+    .abstract           = true,
+};
+
+static const TypeInfo ram_discard_manager_info = {
+    .parent             = TYPE_GENERIC_STATE_MANAGER,
     .name               = TYPE_RAM_DISCARD_MANAGER,
     .class_size         = sizeof(RamDiscardManagerClass),
 };
@@ -3747,6 +3764,7 @@ static void memory_register_types(void)
 {
     type_register_static(&memory_region_info);
     type_register_static(&iommu_memory_region_info);
+    type_register_static(&generic_state_manager_info);
     type_register_static(&ram_discard_manager_info);
 }
 
diff --git a/system/memory_mapping.c b/system/memory_mapping.c
index 6f884c5b90..7bd8972b55 100644
--- a/system/memory_mapping.c
+++ b/system/memory_mapping.c
@@ -270,10 +270,8 @@ static void guest_phys_blocks_region_add(MemoryListener *listener,
 
     /* for special sparse regions, only add populated parts */
     if (memory_region_has_ram_discard_manager(section->mr)) {
-        RamDiscardManager *rdm;
-
-        rdm = memory_region_get_ram_discard_manager(section->mr);
-        ram_discard_manager_replay_populated(rdm, section,
+        GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr);
+        generic_state_manager_replay_on_state_set(gsm, section,
                                              guest_phys_ram_populate_cb, g);
         return;
     }
-- 
Gitee


From 8d2a28564e7642b156d2a8d7351c5a70011c4529 Mon Sep 17 00:00:00 2001
From: Chenyi Qiang <chenyi.qiang@intel.com>
Date: Mon, 7 Apr 2025 15:49:25 +0800
Subject: [PATCH 05/40] memory: Introduce PrivateSharedManager Interface as
 child of GenericStateManager

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/dd9686d946fcd8ebd5d5e7dec1fb8b1c05f8b980

To manage the private and shared RAM states in confidential VMs,
introduce a new class of PrivateShareManager as a child of
GenericStateManager, which inherits the six interface callbacks. With a
different interface type, it can be distinguished from the
RamDiscardManager object and provide the flexibility for addressing
specific requirements of confidential VMs in the future.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Conflicts:
      include/exec/memory.h
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 include/exec/memory.h | 44 +++++++++++++++++++++++++++++++++++++++++--
 system/memory.c       | 17 +++++++++++++++++
 2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 652d71ddf0..964ec53afc 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -55,6 +55,12 @@ typedef struct RamDiscardManager RamDiscardManager;
 DECLARE_OBJ_CHECKERS(RamDiscardManager, RamDiscardManagerClass,
                      RAM_DISCARD_MANAGER, TYPE_RAM_DISCARD_MANAGER);
 
+#define TYPE_PRIVATE_SHARED_MANAGER "private-shared-manager"
+typedef struct PrivateSharedManagerClass PrivateSharedManagerClass;
+typedef struct PrivateSharedManager PrivateSharedManager;
+DECLARE_OBJ_CHECKERS(PrivateSharedManager, PrivateSharedManagerClass,
+                     PRIVATE_SHARED_MANAGER, TYPE_PRIVATE_SHARED_MANAGER)
+
 #ifdef CONFIG_FUZZ
 void fuzz_dma_read_cb(size_t addr,
                       size_t len,
@@ -749,6 +755,14 @@ void generic_state_manager_register_listener(GenericStateManager *gsm,
 void generic_state_manager_unregister_listener(GenericStateManager *gsm,
                                                StateChangeListener *scl);
 
+static inline void state_change_listener_init(StateChangeListener *scl,
+                                              NotifyStateSet state_set_fn,
+                                              NotifyStateClear state_clear_fn)
+{
+    scl->notify_to_state_set = state_set_fn;
+    scl->notify_to_state_clear = state_clear_fn;
+}
+
 typedef struct RamDiscardListener RamDiscardListener;
 
 struct RamDiscardListener {
@@ -770,8 +784,7 @@ static inline void ram_discard_listener_init(RamDiscardListener *rdl,
                                              NotifyStateClear discard_fn,
                                              bool double_discard_supported)
 {
-    rdl->scl.notify_to_state_set = populate_fn;
-    rdl->scl.notify_to_state_clear = discard_fn;
+    state_change_listener_init(&rdl->scl, populate_fn, discard_fn);
     rdl->double_discard_supported = double_discard_supported;
 }
 
@@ -814,6 +827,25 @@ struct RamDiscardManagerClass {
     GenericStateManagerClass parent_class;
 };
 
+typedef struct PrivateSharedListener PrivateSharedListener;
+struct PrivateSharedListener {
+    struct StateChangeListener scl;
+
+    QLIST_ENTRY(PrivateSharedListener) next;
+};
+
+struct PrivateSharedManagerClass {
+    /* private */
+    GenericStateManagerClass parent_class;
+};
+
+static inline void private_shared_listener_init(PrivateSharedListener *psl,
+                                                NotifyStateSet populate_fn,
+                                                NotifyStateClear discard_fn)
+{
+    state_change_listener_init(&psl->scl, populate_fn, discard_fn);
+}
+
 bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
                           ram_addr_t *ram_addr, bool *read_only,
                           bool *mr_has_discard_manager);
@@ -2588,6 +2620,14 @@ int memory_region_set_generic_state_manager(MemoryRegion *mr,
  */
 bool memory_region_has_ram_discard_manager(MemoryRegion *mr);
 
+/**
+ * memory_region_has_private_shared_manager: check whether a #MemoryRegion has a
+ * #PrivateSharedManager assigned
+ *
+ * @mr: the #MemoryRegion
+ */
+bool memory_region_has_private_shared_manager(MemoryRegion *mr);
+
 /**
  * memory_region_find: translate an address/size relative to a
  * MemoryRegion into a #MemoryRegionSection.
diff --git a/system/memory.c b/system/memory.c
index 38f73eb48b..fa99009701 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2143,6 +2143,16 @@ bool memory_region_has_ram_discard_manager(MemoryRegion *mr)
     return true;
 }
 
+bool memory_region_has_private_shared_manager(MemoryRegion *mr)
+{
+    if (!memory_region_is_ram(mr) ||
+        !object_dynamic_cast(OBJECT(mr->gsm), TYPE_PRIVATE_SHARED_MANAGER)) {
+        return false;
+    }
+
+    return true;
+}
+
 uint64_t generic_state_manager_get_min_granularity(const GenericStateManager *gsm,
                                                    const MemoryRegion *mr)
 {
@@ -3760,12 +3770,19 @@ static const TypeInfo ram_discard_manager_info = {
     .class_size         = sizeof(RamDiscardManagerClass),
 };
 
+static const TypeInfo private_shared_manager_info = {
+    .parent             = TYPE_GENERIC_STATE_MANAGER,
+    .name               = TYPE_PRIVATE_SHARED_MANAGER,
+    .class_size         = sizeof(PrivateSharedManagerClass),
+};
+
 static void memory_register_types(void)
 {
     type_register_static(&memory_region_info);
     type_register_static(&iommu_memory_region_info);
     type_register_static(&generic_state_manager_info);
     type_register_static(&ram_discard_manager_info);
+    type_register_static(&private_shared_manager_info);
 }
 
 type_init(memory_register_types)
-- 
Gitee


From 2cf51bbf91b9409b411e0904cd3a2f4875646fec Mon Sep 17 00:00:00 2001
From: Chenyi Qiang <chenyi.qiang@intel.com>
Date: Mon, 7 Apr 2025 15:49:26 +0800
Subject: [PATCH 06/40] vfio: Add the support for PrivateSharedManager
 Interface

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/f301a300d981459e74387ee10de01e8589d35451

Subsystems like VFIO previously disabled ram block discard and only
allowed coordinated discarding via RamDiscardManager. However,
guest_memfd in confidential VMs relies on discard operations for page
conversion between private and shared memory. This can lead to stale
IOMMU mapping issue when assigning a hardware device to a confidential
VM via shared memory. With the introduction of PrivateSharedManager
interface to manage private and shared states and being distinct from
RamDiscardManager, include PrivateSharedManager in coordinated RAM
discard and add related support in VFIO.

Currently, migration support for confidential VMs is not available, so
vfio_sync_dirty_bitmap() handling for PrivateSharedListener can be
ignored. The register/unregister of PrivateSharedListener is necessary
during vfio_listener_region_add/del(). The listener callbacks are
similar between RamDiscardListener and PrivateSharedListener, allowing
for extraction of common parts opportunisticlly.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Conflicts:
      hw/vfio/container-base.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/vfio/common.c                      | 104 +++++++++++++++++++++++---
 hw/vfio/container-base.c              |   1 +
 include/hw/vfio/vfio-container-base.h |  10 +++
 3 files changed, 105 insertions(+), 10 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ab7450f3bd..62a2000acd 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -350,13 +350,9 @@ out:
     rcu_read_unlock();
 }
 
-static void vfio_ram_discard_notify_discard(StateChangeListener *scl,
-                                            MemoryRegionSection *section)
+static void vfio_state_change_notify_to_state_clear(VFIOContainerBase *bcontainer,
+                                                    MemoryRegionSection *section)
 {
-    RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl);
-    VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
-                                                listener);
-    VFIOContainerBase *bcontainer = vrdl->bcontainer;
     const hwaddr size = int128_get64(section->size);
     const hwaddr iova = section->offset_within_address_space;
     int ret;
@@ -369,13 +365,28 @@ static void vfio_ram_discard_notify_discard(StateChangeListener *scl,
     }
 }
 
-static int vfio_ram_discard_notify_populate(StateChangeListener *scl,
+static void vfio_ram_discard_notify_discard(StateChangeListener *scl,
                                             MemoryRegionSection *section)
 {
     RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl);
     VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
                                                 listener);
-    VFIOContainerBase *bcontainer = vrdl->bcontainer;
+    vfio_state_change_notify_to_state_clear(vrdl->bcontainer, section);
+}
+
+static void vfio_private_shared_notify_to_private(StateChangeListener *scl,
+                                                  MemoryRegionSection *section)
+{
+    PrivateSharedListener *psl = container_of(scl, PrivateSharedListener, scl);
+    VFIOPrivateSharedListener *vpsl = container_of(psl, VFIOPrivateSharedListener,
+                                                   listener);
+    vfio_state_change_notify_to_state_clear(vpsl->bcontainer, section);
+}
+
+static int vfio_state_change_notify_to_state_set(VFIOContainerBase *bcontainer,
+                                                 MemoryRegionSection *section,
+                                                 uint64_t granularity)
+{
     const hwaddr end = section->offset_within_region +
                        int128_get64(section->size);
     hwaddr start, next, iova;
@@ -387,7 +398,7 @@ static int vfio_ram_discard_notify_populate(StateChangeListener *scl,
      * unmap in minimum granularity later.
      */
     for (start = section->offset_within_region; start < end; start = next) {
-        next = ROUND_UP(start + 1, vrdl->granularity);
+        next = ROUND_UP(start + 1, granularity);
         next = MIN(next, end);
 
         iova = start - section->offset_within_region +
@@ -398,13 +409,33 @@ static int vfio_ram_discard_notify_populate(StateChangeListener *scl,
                                      vaddr, section->readonly);
         if (ret) {
             /* Rollback */
-            vfio_ram_discard_notify_discard(scl, section);
+            vfio_state_change_notify_to_state_clear(bcontainer, section);
             return ret;
         }
     }
     return 0;
 }
 
+static int vfio_ram_discard_notify_populate(StateChangeListener *scl,
+                                            MemoryRegionSection *section)
+{
+    RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl);
+    VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
+                                                listener);
+    return vfio_state_change_notify_to_state_set(vrdl->bcontainer, section,
+                                                 vrdl->granularity);
+}
+
+static int vfio_private_shared_notify_to_shared(StateChangeListener *scl,
+                                                MemoryRegionSection *section)
+{
+    PrivateSharedListener *psl = container_of(scl, PrivateSharedListener, scl);
+    VFIOPrivateSharedListener *vpsl = container_of(psl, VFIOPrivateSharedListener,
+                                                   listener);
+    return vfio_state_change_notify_to_state_set(vpsl->bcontainer, section,
+                                                 vpsl->granularity);
+}
+
 static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer,
                                                MemoryRegionSection *section)
 {
@@ -481,6 +512,27 @@ static void vfio_register_ram_discard_listener(VFIOContainerBase *bcontainer,
     }
 }
 
+static void vfio_register_private_shared_listener(VFIOContainerBase *bcontainer,
+                                                  MemoryRegionSection *section)
+{
+    GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr);
+    VFIOPrivateSharedListener *vpsl;
+    PrivateSharedListener *psl;
+
+    vpsl = g_new0(VFIOPrivateSharedListener, 1);
+    vpsl->bcontainer = bcontainer;
+    vpsl->mr = section->mr;
+    vpsl->offset_within_address_space = section->offset_within_address_space;
+    vpsl->granularity = generic_state_manager_get_min_granularity(gsm,
+                                                                  section->mr);
+
+    psl = &vpsl->listener;
+    private_shared_listener_init(psl, vfio_private_shared_notify_to_shared,
+                                 vfio_private_shared_notify_to_private);
+    generic_state_manager_register_listener(gsm, &psl->scl, section);
+    QLIST_INSERT_HEAD(&bcontainer->vpsl_list, vpsl, next);
+}
+
 static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer,
                                                  MemoryRegionSection *section)
 {
@@ -506,6 +558,31 @@ static void vfio_unregister_ram_discard_listener(VFIOContainerBase *bcontainer,
     g_free(vrdl);
 }
 
+static void vfio_unregister_private_shared_listener(VFIOContainerBase *bcontainer,
+                                                    MemoryRegionSection *section)
+{
+    GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr);
+    VFIOPrivateSharedListener *vpsl = NULL;
+    PrivateSharedListener *psl;
+
+    QLIST_FOREACH(vpsl, &bcontainer->vpsl_list, next) {
+        if (vpsl->mr == section->mr &&
+            vpsl->offset_within_address_space ==
+            section->offset_within_address_space) {
+            break;
+        }
+    }
+
+    if (!vpsl) {
+        hw_error("vfio: Trying to unregister missing RAM discard listener");
+    }
+
+    psl = &vpsl->listener;
+    generic_state_manager_unregister_listener(gsm, &psl->scl);
+    QLIST_REMOVE(vpsl, next);
+    g_free(vpsl);
+}
+
 static bool vfio_known_safe_misalignment(MemoryRegionSection *section)
 {
     MemoryRegion *mr = section->mr;
@@ -677,6 +754,9 @@ static void vfio_listener_region_add(MemoryListener *listener,
     if (memory_region_has_ram_discard_manager(section->mr)) {
         vfio_register_ram_discard_listener(bcontainer, section);
         return;
+    } else if (memory_region_has_private_shared_manager(section->mr)) {
+        vfio_register_private_shared_listener(bcontainer, section);
+        return;
     }
 
     vaddr = memory_region_get_ram_ptr(section->mr) +
@@ -796,6 +876,10 @@ static void vfio_listener_region_del(MemoryListener *listener,
         vfio_unregister_ram_discard_listener(bcontainer, section);
         /* Unregistering will trigger an unmap. */
         try_unmap = false;
+    } else if (memory_region_has_private_shared_manager(section->mr)) {
+        vfio_unregister_private_shared_listener(bcontainer, section);
+        /* Unregistering will trigger an unmap. */
+        try_unmap = false;
     }
 
     if (try_unmap) {
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 913ae49077..a356ae91a9 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -82,6 +82,7 @@ void vfio_container_init(VFIOContainerBase *bcontainer, VFIOAddressSpace *space,
     bcontainer->iova_ranges = NULL;
     QLIST_INIT(&bcontainer->giommu_list);
     QLIST_INIT(&bcontainer->vrdl_list);
+    QLIST_INIT(&bcontainer->vpsl_list);
 }
 
 void vfio_container_destroy(VFIOContainerBase *bcontainer)
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index 7a4c575115..faed33bf92 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -46,6 +46,7 @@ typedef struct VFIOContainerBase {
     bool dirty_pages_supported;
     QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
     QLIST_HEAD(, VFIORamDiscardListener) vrdl_list;
+    QLIST_HEAD(, VFIOPrivateSharedListener) vpsl_list;
     QLIST_ENTRY(VFIOContainerBase) next;
     QLIST_HEAD(, VFIODevice) device_list;
     GList *iova_ranges;
@@ -69,6 +70,15 @@ typedef struct VFIORamDiscardListener {
     QLIST_ENTRY(VFIORamDiscardListener) next;
 } VFIORamDiscardListener;
 
+typedef struct VFIOPrivateSharedListener {
+    VFIOContainerBase *bcontainer;
+    MemoryRegion *mr;
+    hwaddr offset_within_address_space;
+    uint64_t granularity;
+    PrivateSharedListener listener;
+    QLIST_ENTRY(VFIOPrivateSharedListener) next;
+} VFIOPrivateSharedListener;
+
 int vfio_container_dma_map(VFIOContainerBase *bcontainer,
                            hwaddr iova, ram_addr_t size,
                            void *vaddr, bool readonly);
-- 
Gitee


From d99491bfe7983151fa8e2688f0b0aad591e36147 Mon Sep 17 00:00:00 2001
From: Chenyi Qiang <chenyi.qiang@intel.com>
Date: Mon, 7 Apr 2025 15:49:30 +0800
Subject: [PATCH 07/40] memory: Change NotifyStateClear() definition to return
 the result

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/13fd87aac38509ab07bacafa2e35eb528d4be365

So that the caller can check the result of NotifyStateClear() handler if
the operation fails.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/vfio/common.c      | 18 ++++++++++--------
 include/exec/memory.h |  4 ++--
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 62a2000acd..182874eccb 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -350,8 +350,8 @@ out:
     rcu_read_unlock();
 }
 
-static void vfio_state_change_notify_to_state_clear(VFIOContainerBase *bcontainer,
-                                                    MemoryRegionSection *section)
+static int vfio_state_change_notify_to_state_clear(VFIOContainerBase *bcontainer,
+                                                   MemoryRegionSection *section)
 {
     const hwaddr size = int128_get64(section->size);
     const hwaddr iova = section->offset_within_address_space;
@@ -363,24 +363,26 @@ static void vfio_state_change_notify_to_state_clear(VFIOContainerBase *bcontaine
         error_report("%s: vfio_container_dma_unmap() failed: %s", __func__,
                      strerror(-ret));
     }
+
+    return ret;
 }
 
-static void vfio_ram_discard_notify_discard(StateChangeListener *scl,
-                                            MemoryRegionSection *section)
+static int vfio_ram_discard_notify_discard(StateChangeListener *scl,
+                                           MemoryRegionSection *section)
 {
     RamDiscardListener *rdl = container_of(scl, RamDiscardListener, scl);
     VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
                                                 listener);
-    vfio_state_change_notify_to_state_clear(vrdl->bcontainer, section);
+    return vfio_state_change_notify_to_state_clear(vrdl->bcontainer, section);
 }
 
-static void vfio_private_shared_notify_to_private(StateChangeListener *scl,
-                                                  MemoryRegionSection *section)
+static int vfio_private_shared_notify_to_private(StateChangeListener *scl,
+                                                 MemoryRegionSection *section)
 {
     PrivateSharedListener *psl = container_of(scl, PrivateSharedListener, scl);
     VFIOPrivateSharedListener *vpsl = container_of(psl, VFIOPrivateSharedListener,
                                                    listener);
-    vfio_state_change_notify_to_state_clear(vpsl->bcontainer, section);
+    return vfio_state_change_notify_to_state_clear(vpsl->bcontainer, section);
 }
 
 static int vfio_state_change_notify_to_state_set(VFIOContainerBase *bcontainer,
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 964ec53afc..b93ffb533e 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -580,8 +580,8 @@ typedef int (*ReplayStateChange)(MemoryRegionSection *section, void *opaque);
 typedef struct StateChangeListener StateChangeListener;
 typedef int (*NotifyStateSet)(StateChangeListener *scl,
                               MemoryRegionSection *section);
-typedef void (*NotifyStateClear)(StateChangeListener *scl,
-                                 MemoryRegionSection *section);
+typedef int (*NotifyStateClear)(StateChangeListener *scl,
+                                MemoryRegionSection *section);
 
 struct StateChangeListener {
     /*
-- 
Gitee


From 71e7d77e5724b77fdba7bab48ef44e92b8e0c1ee Mon Sep 17 00:00:00 2001
From: Chenyi Qiang <chenyi.qiang@intel.com>
Date: Mon, 7 Apr 2025 15:49:32 +0800
Subject: [PATCH 08/40] ram-block-attribute: Add priority listener support for
 PrivateSharedListener

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/ed4157b155b571b62c4d88ca297909dbcb3922ed

In-place page conversion requires operations to follow a specific
sequence: unmap-before-conversion-to-private and
map-after-conversion-to-shared. Currently, both attribute changes and
VFIO DMA map/unmap operations are handled by PrivateSharedListeners,
they need to be invoked in a specific order.

For private to shared conversion:
- Change attribute to shared.
- VFIO populates the shared mappings into the IOMMU.
- Restore attribute if the operation fails.

For shared to private conversion:
- VFIO discards shared mapping from the IOMMU.
- Change attribute to private.

To faciliate this sequence, priority support is added to
PrivateSharedListener so that listeners are stored in a determined
order based on priority. A tail queue is used to store listeners,
allowing traversal in either direction.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Conflicts:
      include/exec/ramblock.h
      system/ram-block-attribute.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/vfio/common.c      |  3 ++-
 include/exec/memory.h | 19 +++++++++++++++++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 182874eccb..c0bc61fdee 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -530,7 +530,8 @@ static void vfio_register_private_shared_listener(VFIOContainerBase *bcontainer,
 
     psl = &vpsl->listener;
     private_shared_listener_init(psl, vfio_private_shared_notify_to_shared,
-                                 vfio_private_shared_notify_to_private);
+                                 vfio_private_shared_notify_to_private,
+                                 PRIVATE_SHARED_LISTENER_PRIORITY_COMMON);
     generic_state_manager_register_listener(gsm, &psl->scl, section);
     QLIST_INSERT_HEAD(&bcontainer->vpsl_list, vpsl, next);
 }
diff --git a/include/exec/memory.h b/include/exec/memory.h
index b93ffb533e..51fe10d4a0 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -827,11 +827,24 @@ struct RamDiscardManagerClass {
     GenericStateManagerClass parent_class;
 };
 
+#define PRIVATE_SHARED_LISTENER_PRIORITY_MIN       0
+#define PRIVATE_SHARED_LISTENER_PRIORITY_COMMON    10
+
 typedef struct PrivateSharedListener PrivateSharedListener;
 struct PrivateSharedListener {
     struct StateChangeListener scl;
 
-    QLIST_ENTRY(PrivateSharedListener) next;
+    /*
+     * @priority:
+     *
+     * Govern the order in which ram discard listeners are invoked. Lower priorities
+     * are invoked earlier.
+     * The listener priority can help to undo the effects of previous listeners in
+     * a reverse order in case of a failure callback.
+     */
+    int priority;
+
+    QTAILQ_ENTRY(PrivateSharedListener) next;
 };
 
 struct PrivateSharedManagerClass {
@@ -841,9 +854,11 @@ struct PrivateSharedManagerClass {
 
 static inline void private_shared_listener_init(PrivateSharedListener *psl,
                                                 NotifyStateSet populate_fn,
-                                                NotifyStateClear discard_fn)
+                                                NotifyStateClear discard_fn,
+                                                int priority)
 {
     state_change_listener_init(&psl->scl, populate_fn, discard_fn);
+    psl->priority = priority;
 }
 
 bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
-- 
Gitee


From d08cc1efcdf47b6cb3edece889cc36904ccf932d Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Fri, 13 May 2022 09:08:54 +0100
Subject: [PATCH 09/40] linux-headers: Add KVM Arm RME definitions to Linux
 headers

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/b1872e38b35f4e7b820880694ad876c41aabaa85

Copy the KVM definitions for Arm RME from the development branch.
Don't merge, they will be added from the periodic Linux header sync.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      linux-headers/asm-arm64/kvm.h
      linux-headers/linux/kvm.h
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 linux-headers/asm-arm64/kvm.h | 60 +++++++++++++++++++++++++++++++++++
 linux-headers/linux/kvm.h     | 28 +++++++++++++---
 2 files changed, 84 insertions(+), 4 deletions(-)

diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index 552fdcb18f..aed56ef371 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -111,6 +111,8 @@ struct kvm_regs {
 #define KVM_ARM_VCPU_PTRAUTH_GENERIC	6 /* VCPU uses generic authentication */
 #define KVM_ARM_VCPU_HAS_EL2		7 /* Support nested virtualization */
 #define KVM_ARM_VCPU_TEC		8 /* VCPU TEC state as part of cvm */
+#define KVM_ARM_VCPU_HAS_EL2_E2H0	9 /* Limit NV support to E2H RES0 */
+#define KVM_ARM_VCPU_REC		10 /* VCPU REC state as part of Realm */
 
 struct kvm_vcpu_init {
 	__u32 target;
@@ -366,6 +368,7 @@ enum {
 	KVM_REG_ARM_STD_HYP_BIT_PV_TIME	= 0,
 };
 
+/* Vendor hyper call function numbers 0-63 */
 #define KVM_REG_ARM_VENDOR_HYP_BMAP		KVM_REG_ARM_FW_FEAT_BMAP_REG(2)
 
 enum {
@@ -373,6 +376,14 @@ enum {
 	KVM_REG_ARM_VENDOR_HYP_BIT_PTP		= 1,
 };
 
+/* Vendor hyper call function numbers 64-127 */
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2		KVM_REG_ARM_FW_FEAT_BMAP_REG(3)
+
+enum {
+	KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_VER	= 0,
+	KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_CPUS	= 1,
+};
+
 /* Device Control API on vm fd */
 #define KVM_ARM_VM_SMCCC_CTRL		0
 #define   KVM_ARM_VM_SMCCC_FILTER	0
@@ -395,6 +406,7 @@ enum {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
+#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ  9
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
@@ -407,6 +419,54 @@ enum {
 #define   KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES	3
 #define   KVM_DEV_ARM_ITS_CTRL_RESET		4
 
+/* KVM_CAP_ARM_RME on VM fd */
+#define KVM_CAP_ARM_RME_CONFIG_REALM		0
+#define KVM_CAP_ARM_RME_CREATE_REALM		1
+#define KVM_CAP_ARM_RME_INIT_RIPAS_REALM	2
+#define KVM_CAP_ARM_RME_POPULATE_REALM		3
+#define KVM_CAP_ARM_RME_ACTIVATE_REALM		4
+
+/* List of configuration items accepted for KVM_CAP_ARM_RME_CONFIG_REALM */
+#define ARM_RME_CONFIG_RPV			0
+#define ARM_RME_CONFIG_HASH_ALGO		1
+
+#define ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA256		0
+#define ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA512		1
+
+#define ARM_RME_CONFIG_RPV_SIZE 64
+
+struct arm_rme_config {
+	__u32 cfg;
+	union {
+		/* cfg == ARM_RME_CONFIG_RPV */
+		struct {
+			__u8	rpv[ARM_RME_CONFIG_RPV_SIZE];
+		};
+
+		/* cfg == ARM_RME_CONFIG_HASH_ALGO */
+		struct {
+			__u32	hash_algo;
+		};
+
+		/* Fix the size of the union */
+		__u8	reserved[256];
+	};
+};
+
+#define KVM_ARM_RME_POPULATE_FLAGS_MEASURE	(1 << 0)
+struct arm_rme_populate_realm {
+	__u64 base;
+	__u64 size;
+	__u32 flags;
+	__u32 reserved[3];
+};
+
+struct arm_rme_init_ripas {
+	__u64 base;
+	__u64 size;
+	__u64 reserved[2];
+};
+
 /* Device Control API on vcpu fd */
 #define KVM_ARM_VCPU_PMU_V3_CTRL	0
 #define   KVM_ARM_VCPU_PMU_V3_IRQ	0
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index d3bf7fac00..beb41f7433 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -924,14 +924,25 @@ struct kvm_ppc_resize_hpt {
 #define KVM_S390_SIE_PAGE_OFFSET 1
 
 /*
- * On arm64, machine type can be used to request the physical
- * address size for the VM. Bits[7-0] are reserved for the guest
- * PA size shift (i.e, log2(PA_Size)). For backward compatibility,
- * value 0 implies the default IPA size, 40bits.
+ * On arm64, machine type can be used to request both the machine type and
+ * the physical address size for the VM.
+ *
+ * Bits[11-8] are reserved for the ARM specific machine type.
+ *
+ * Bits[7-0] are reserved for the guest PA size shift (i.e, log2(PA_Size)).
+ * For backward compatibility, value 0 implies the default IPA size, 40bits.
  */
+#define KVM_VM_TYPE_ARM_SHIFT		8
+#define KVM_VM_TYPE_ARM_MASK		(0xfULL << KVM_VM_TYPE_ARM_SHIFT)
+#define KVM_VM_TYPE_ARM(_type)		\
+	(((_type) << KVM_VM_TYPE_ARM_SHIFT) & KVM_VM_TYPE_ARM_MASK)
+#define KVM_VM_TYPE_ARM_NORMAL		KVM_VM_TYPE_ARM(0)
+#define KVM_VM_TYPE_ARM_REALM		KVM_VM_TYPE_ARM(1)
+
 #define KVM_VM_TYPE_ARM_IPA_SIZE_MASK	0xffULL
 #define KVM_VM_TYPE_ARM_IPA_SIZE(x)		\
 	((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
+
 /*
  * ioctls for /dev/kvm fds:
  */
@@ -1206,6 +1217,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230
+#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
+#define KVM_CAP_ARM_RME 240
 
 #define KVM_CAP_ARM_TMM 300
 
@@ -2451,4 +2464,11 @@ struct kvm_s390_zpci_op {
 #define KVM_GET_TMI_VERSION	_IOR(KVMIO, 0xd2, uint64_t)
 #define MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM  0x20001
 
+/* Available with KVM_CAP_ARM_RME, only for VMs with KVM_VM_TYPE_ARM_REALM  */
+struct kvm_arm_rmm_psci_complete {
+	__u64 target_mpidr;
+	__u32 psci_status;
+	__u32 padding[3];
+};
+
 #endif /* __LINUX_KVM_H */
-- 
Gitee


From 4242973f80d6779b2e4235bacc18d685bbfcfda8 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Wed, 4 Dec 2024 15:34:28 +0000
Subject: [PATCH 10/40] kvm: Use kvm_vm_check_extension() where necessary

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/97b19c96743303418578785a230019b8b26b0131

The Arm KVM code can return different values from KVM_CHECK_EXTENSION
depending on the VM type. Use kvm_vm_check_extension() where necessary
to ensure we get the right response from KVM.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      target/arm/kvm.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 accel/kvm/kvm-all.c | 6 +++---
 target/arm/kvm64.c  | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 7d175d3262..2cdd615025 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2363,13 +2363,13 @@ static int kvm_recommended_vcpus(KVMState *s)
 
 static int kvm_max_vcpus(KVMState *s)
 {
-    int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS);
+    int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPUS);
     return (ret) ? ret : kvm_recommended_vcpus(s);
 }
 
 static int kvm_max_vcpu_id(KVMState *s)
 {
-    int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPU_ID);
+    int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPU_ID);
     return (ret) ? ret : kvm_max_vcpus(s);
 }
 
@@ -2625,7 +2625,7 @@ static int kvm_init(MachineState *ms)
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
     kvm_has_guest_debug =
-        (kvm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG) > 0);
+        (kvm_vm_check_extension(s, KVM_CAP_SET_GUEST_DEBUG) > 0);
 #endif
 
     kvm_sstep_flags = 0;
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index b099287ed0..651f603dd8 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -39,11 +39,11 @@ void kvm_arm_init_debug(KVMState *s)
     have_guest_debug = kvm_check_extension(s,
                                            KVM_CAP_SET_GUEST_DEBUG);
 
-    max_hw_wps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS);
+    max_hw_wps = kvm_vm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_WPS);
     hw_watchpoints = g_array_sized_new(true, true,
                                        sizeof(HWWatchpoint), max_hw_wps);
 
-    max_hw_bps = kvm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS);
+    max_hw_bps = kvm_vm_check_extension(s, KVM_CAP_GUEST_DEBUG_HW_BPS);
     hw_breakpoints = g_array_sized_new(true, true,
                                        sizeof(HWBreakpoint), max_hw_bps);
     return;
@@ -513,12 +513,12 @@ bool kvm_arm_aarch32_supported(void)
 
 bool kvm_arm_sve_supported(void)
 {
-    return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE);
+    return kvm_vm_check_extension(kvm_state, KVM_CAP_ARM_SVE);
 }
 
 bool kvm_arm_steal_time_supported(void)
 {
-    return kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME);
+    return kvm_vm_check_extension(kvm_state, KVM_CAP_STEAL_TIME);
 }
 
 QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
-- 
Gitee


From b1304358281cd973a8c7ef057e350e5e2028e005 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Tue, 20 Feb 2024 16:06:16 +0000
Subject: [PATCH 11/40] include/qom/object.h: New OBJECT_DEFINE_SIMPLE_TYPE{,
 _WITH_INTERFACES} macros
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reference:https://gitlab.com/qemu-project/qemu/-/commit/e54c24339f3e6533af0b0c4364c5c9c9f74e9273

We have an OBJECT_DEFINE_TYPE_EXTENDED macro, plus several variations
on it, which emits the boilerplate for the TypeInfo and ensures it is
registered with the type system.  However, all the existing macros
insist that the type being defined has its own FooClass struct, so
they aren't useful for the common case of a simple leaf class which
doesn't have any new methods or any other need for its own class
struct (that is, for the kind of type that OBJECT_DECLARE_SIMPLE_TYPE
declares).

Pull the actual implementation of OBJECT_DEFINE_TYPE_EXTENDED out
into a new DO_OBJECT_DEFINE_TYPE_EXTENDED which parameterizes the
value we use for the class_size field.  This lets us add a new
OBJECT_DEFINE_SIMPLE_TYPE which does the same job as the various
existing OBJECT_DEFINE_*_TYPE_* family macros for this kind of simple
type, and the variant OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES for
when the type will implement some interfaces.

Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-id: 20240220160622.114437-5-peter.maydell@linaro.org
Reviewed-by: Zhao Liu <zhao1.liu@intel.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 docs/devel/qom.rst   | 34 +++++++++++++---
 include/qom/object.h | 96 ++++++++++++++++++++++++++++++++++++--------
 2 files changed, 108 insertions(+), 22 deletions(-)

diff --git a/docs/devel/qom.rst b/docs/devel/qom.rst
index 9918fac7f2..0889ca949c 100644
--- a/docs/devel/qom.rst
+++ b/docs/devel/qom.rst
@@ -348,12 +348,14 @@ used. This does the same as OBJECT_DECLARE_SIMPLE_TYPE(), but without
 the 'struct MyDeviceClass' definition.
 
 To implement the type, the OBJECT_DEFINE macro family is available.
-In the simple case the OBJECT_DEFINE_TYPE macro is suitable:
+For the simplest case of a leaf class which doesn't need any of its
+own virtual functions (i.e. which was declared with OBJECT_DECLARE_SIMPLE_TYPE)
+the OBJECT_DEFINE_SIMPLE_TYPE macro is suitable:
 
 .. code-block:: c
    :caption: Defining a simple type
 
-   OBJECT_DEFINE_TYPE(MyDevice, my_device, MY_DEVICE, DEVICE)
+   OBJECT_DEFINE_SIMPLE_TYPE(MyDevice, my_device, MY_DEVICE, DEVICE)
 
 This is equivalent to the following:
 
@@ -370,7 +372,6 @@ This is equivalent to the following:
        .instance_size = sizeof(MyDevice),
        .instance_init = my_device_init,
        .instance_finalize = my_device_finalize,
-       .class_size = sizeof(MyDeviceClass),
        .class_init = my_device_class_init,
    };
 
@@ -385,13 +386,36 @@ This is sufficient to get the type registered with the type
 system, and the three standard methods now need to be implemented
 along with any other logic required for the type.
 
+If the class needs its own virtual methods, or has some other
+per-class state it needs to store in its own class struct,
+then you can use the OBJECT_DEFINE_TYPE macro. This does the
+same thing as OBJECT_DEFINE_SIMPLE_TYPE, but it also sets the
+class_size of the type to the size of the class struct.
+
+.. code-block:: c
+   :caption: Defining a type which needs a class struct
+
+   OBJECT_DEFINE_TYPE(MyDevice, my_device, MY_DEVICE, DEVICE)
+
 If the type needs to implement one or more interfaces, then the
-OBJECT_DEFINE_TYPE_WITH_INTERFACES() macro can be used instead.
-This accepts an array of interface type names.
+OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES() and
+OBJECT_DEFINE_TYPE_WITH_INTERFACES() macros can be used instead.
+These accept an array of interface type names. The difference between
+them is that the former is for simple leaf classes that don't need
+a class struct, and the latter is for when you will be defining
+a class struct.
 
 .. code-block:: c
    :caption: Defining a simple type implementing interfaces
 
+   OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(MyDevice, my_device,
+                                             MY_DEVICE, DEVICE,
+                                             { TYPE_USER_CREATABLE },
+                                             { NULL })
+
+.. code-block:: c
+   :caption: Defining a type implementing interfaces
+
    OBJECT_DEFINE_TYPE_WITH_INTERFACES(MyDevice, my_device,
                                       MY_DEVICE, DEVICE,
                                       { TYPE_USER_CREATABLE },
diff --git a/include/qom/object.h b/include/qom/object.h
index afccd24ca7..f52ab216cd 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -259,31 +259,23 @@ struct Object
 
 
 /**
- * OBJECT_DEFINE_TYPE_EXTENDED:
+ * DO_OBJECT_DEFINE_TYPE_EXTENDED:
  * @ModuleObjName: the object name with initial caps
  * @module_obj_name: the object name in lowercase with underscore separators
  * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators
  * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore
  *                          separators
  * @ABSTRACT: boolean flag to indicate whether the object can be instantiated
+ * @CLASS_SIZE: size of the type's class
  * @...: list of initializers for "InterfaceInfo" to declare implemented interfaces
  *
- * This macro is typically used in a source file, and will:
- *
- *   - declare prototypes for _finalize, _class_init and _init methods
- *   - declare the TypeInfo struct instance
- *   - provide the constructor to register the type
- *
- * After using this macro, implementations of the _finalize, _class_init,
- * and _init methods need to be written. Any of these can be zero-line
- * no-op impls if no special logic is required for a given type.
- *
- * This macro should rarely be used, instead one of the more specialized
- * macros is usually a better choice.
+ * This is the base macro used to implement all the OBJECT_DEFINE_*
+ * macros. It should never be used directly in a source file.
  */
-#define OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \
-                                    MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \
-                                    ABSTRACT, ...) \
+#define DO_OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \
+                                       MODULE_OBJ_NAME, \
+                                       PARENT_MODULE_OBJ_NAME, \
+                                       ABSTRACT, CLASS_SIZE, ...) \
     static void \
     module_obj_name##_finalize(Object *obj); \
     static void \
@@ -298,7 +290,7 @@ struct Object
         .instance_align = __alignof__(ModuleObjName), \
         .instance_init = module_obj_name##_init, \
         .instance_finalize = module_obj_name##_finalize, \
-        .class_size = sizeof(ModuleObjName##Class), \
+        .class_size = CLASS_SIZE, \
         .class_init = module_obj_name##_class_init, \
         .abstract = ABSTRACT, \
         .interfaces = (InterfaceInfo[]) { __VA_ARGS__ } , \
@@ -311,6 +303,37 @@ struct Object
     } \
     type_init(module_obj_name##_register_types);
 
+/**
+ * OBJECT_DEFINE_TYPE_EXTENDED:
+ * @ModuleObjName: the object name with initial caps
+ * @module_obj_name: the object name in lowercase with underscore separators
+ * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators
+ * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore
+ *                          separators
+ * @ABSTRACT: boolean flag to indicate whether the object can be instantiated
+ * @...: list of initializers for "InterfaceInfo" to declare implemented interfaces
+ *
+ * This macro is typically used in a source file, and will:
+ *
+ *   - declare prototypes for _finalize, _class_init and _init methods
+ *   - declare the TypeInfo struct instance
+ *   - provide the constructor to register the type
+ *
+ * After using this macro, implementations of the _finalize, _class_init,
+ * and _init methods need to be written. Any of these can be zero-line
+ * no-op impls if no special logic is required for a given type.
+ *
+ * This macro should rarely be used, instead one of the more specialized
+ * macros is usually a better choice.
+ */
+#define OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \
+                                    MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \
+                                    ABSTRACT, ...) \
+    DO_OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \
+                                   MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \
+                                   ABSTRACT, sizeof(ModuleObjName##Class), \
+                                   __VA_ARGS__)
+
 /**
  * OBJECT_DEFINE_TYPE:
  * @ModuleObjName: the object name with initial caps
@@ -368,6 +391,45 @@ struct Object
                                 MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \
                                 true, { NULL })
 
+/**
+ * OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES:
+ * @ModuleObjName: the object name with initial caps
+ * @module_obj_name: the object name in lowercase with underscore separators
+ * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators
+ * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore
+ *                          separators
+ *
+ * This is a variant of OBJECT_DEFINE_TYPE_EXTENDED, which is suitable for
+ * the case of a non-abstract type, with interfaces, and with no requirement
+ * for a class struct.
+ */
+#define OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(ModuleObjName, \
+                                                  module_obj_name, \
+                                                  MODULE_OBJ_NAME, \
+                                                  PARENT_MODULE_OBJ_NAME, ...) \
+    DO_OBJECT_DEFINE_TYPE_EXTENDED(ModuleObjName, module_obj_name, \
+                                   MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, \
+                                   false, 0, __VA_ARGS__)
+
+/**
+ * OBJECT_DEFINE_SIMPLE_TYPE:
+ * @ModuleObjName: the object name with initial caps
+ * @module_obj_name: the object name in lowercase with underscore separators
+ * @MODULE_OBJ_NAME: the object name in uppercase with underscore separators
+ * @PARENT_MODULE_OBJ_NAME: the parent object name in uppercase with underscore
+ *                          separators
+ *
+ * This is a variant of OBJECT_DEFINE_TYPE_EXTENDED, which is suitable for
+ * the common case of a non-abstract type, without any interfaces, and with
+ * no requirement for a class struct. If you declared your type with
+ * OBJECT_DECLARE_SIMPLE_TYPE then this is probably the right choice for
+ * defining it.
+ */
+#define OBJECT_DEFINE_SIMPLE_TYPE(ModuleObjName, module_obj_name, \
+                                  MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME) \
+    OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(ModuleObjName, module_obj_name, \
+        MODULE_OBJ_NAME, PARENT_MODULE_OBJ_NAME, { NULL })
+
 /**
  * struct TypeInfo:
  * @name: The name of the type.
-- 
Gitee


From 754c30c1d126357d60ea29a2c17428a0abdcca49 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 16 Jun 2022 18:24:55 +0100
Subject: [PATCH 12/40] target/arm: Add confidential guest support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/6353278a78f3942ff1b576aab77d79d926e8f9f0

Add a new RmeGuest object, inheriting from ConfidentialGuestSupport, to
support the Arm Realm Management Extension (RME). It is instantiated by
passing on the command-line:

  -M virt,confidential-guest-support=<id>
  -object rme-guest,id=<id>[,options...]

This is only the skeleton. Support will be added in following patches.

Cc: Eric Blake <eblake@redhat.com>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Daniel P. Berrangé <berrange@redhat.com>
Cc: Eduardo Habkost <eduardo@habkost.net>
Acked-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      target/arm/meson.build
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 docs/system/confidential-guest-support.rst |  1 +
 qapi/qom.json                              |  1 +
 target/arm/kvm-rme.c                       | 40 ++++++++++++++++++++++
 target/arm/meson.build                     |  2 +-
 4 files changed, 43 insertions(+), 1 deletion(-)
 create mode 100644 target/arm/kvm-rme.c

diff --git a/docs/system/confidential-guest-support.rst b/docs/system/confidential-guest-support.rst
index 0c490dbda2..acf46d8856 100644
--- a/docs/system/confidential-guest-support.rst
+++ b/docs/system/confidential-guest-support.rst
@@ -40,5 +40,6 @@ Currently supported confidential guest mechanisms are:
 * AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`)
 * POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`)
 * s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`)
+* Arm Realm Management Extension (RME)
 
 Other mechanisms may be supported in future.
diff --git a/qapi/qom.json b/qapi/qom.json
index a5336e6b11..e405c51da3 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -999,6 +999,7 @@
     { 'name': 'pr-manager-helper',
       'if': 'CONFIG_LINUX' },
     'qtest',
+    'rme-guest',
     'rng-builtin',
     'rng-egd',
     { 'name': 'rng-random',
diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c
new file mode 100644
index 0000000000..1de65f2b1d
--- /dev/null
+++ b/target/arm/kvm-rme.c
@@ -0,0 +1,40 @@
+/*
+ * QEMU Arm RME support
+ *
+ * Copyright Linaro 2024
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/boards.h"
+#include "hw/core/cpu.h"
+#include "kvm_arm.h"
+#include "migration/blocker.h"
+#include "qapi/error.h"
+#include "qom/object_interfaces.h"
+#include "exec/confidential-guest-support.h"
+#include "sysemu/kvm.h"
+#include "sysemu/runstate.h"
+
+#define TYPE_RME_GUEST "rme-guest"
+OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST)
+
+struct RmeGuest {
+    ConfidentialGuestSupport parent_obj;
+};
+
+OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST,
+                                          CONFIDENTIAL_GUEST_SUPPORT,
+                                          { TYPE_USER_CREATABLE }, { })
+
+static void rme_guest_class_init(ObjectClass *oc, void *data)
+{
+}
+
+static void rme_guest_init(Object *obj)
+{
+}
+
+static void rme_guest_finalize(Object *obj)
+{
+}
diff --git a/target/arm/meson.build b/target/arm/meson.build
index 389ee54658..7973b35cca 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -8,7 +8,7 @@ arm_ss.add(files(
 ))
 arm_ss.add(zlib)
 
-arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c', 'kvm64.c'), if_false: files('kvm-stub.c'))
+arm_ss.add(when: 'CONFIG_KVM', if_true: files('hyp_gdbstub.c', 'kvm.c', 'kvm64.c', 'kvm-rme.c'), if_false: files('kvm-stub.c'))
 arm_ss.add(when: 'CONFIG_HVF', if_true: files('hyp_gdbstub.c'))
 arm_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c', 'kvm64.c', 'kvm-tmm.c'), if_false: files('kvm-stub.c'))
 
-- 
Gitee


From 06d0249f7fc42d05b8461e6b2675f8d1fddb0707 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Wed, 21 Feb 2024 15:50:42 +0000
Subject: [PATCH 13/40] target/arm/kvm: Return immediately on error in
 kvm_arch_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/1385e5d0517c42a8a3d18c4eb36db48e86370aa3

Returning an error to kvm_init() is fatal anyway, no need to continue
the initialization.

Leave the `ret` variable in the function scope because it will be reused
when adding RME support.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Conflicts:
      target/arm/kvm.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 target/arm/kvm.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index ab31515a2a..e32a064f94 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -276,7 +276,7 @@ static void kvm_update_ipiv_cap(KVMState *s)
 int kvm_arch_init(MachineState *ms, KVMState *s)
 {
     MachineClass *mc = MACHINE_GET_CLASS(ms);
-    int ret = 0;
+    int ret;
 
     /* For ARM interrupt delivery is always asynchronous,
      * whether we are using an in-kernel VGIC or not.
@@ -295,7 +295,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
         !kvm_check_extension(s, KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)) {
         error_report("Using more than 256 vcpus requires a host kernel "
                      "with KVM_CAP_ARM_IRQ_LINE_LAYOUT_2");
-        ret = -EINVAL;
+        return -EINVAL;
     }
 
     if (kvm_check_extension(s, KVM_CAP_ARM_NISV_TO_USER)) {
@@ -317,13 +317,14 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
             warn_report("Eager Page Split support not available");
         } else if (!(s->kvm_eager_split_size & sizes)) {
             error_report("Eager Page Split requested chunk size not valid");
-            ret = -EINVAL;
+            return -EINVAL;
         } else {
             ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE, 0,
                                     s->kvm_eager_split_size);
             if (ret < 0) {
                 error_report("Enabling of Eager Page Split failed: %s",
                              strerror(-ret));
+                return ret;
             }
         }
     }
@@ -348,7 +349,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     kvm_arm_init_debug(s);
     kvm_update_ipiv_cap(s);
 
-    return ret;
+    return 0;
 }
 
 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
-- 
Gitee


From 98c7d031289a52028656a64bd393a5b959209e19 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Mar 2024 14:41:10 -0400
Subject: [PATCH 14/40] KVM: track whether guest state is encrypted
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reference:https://gitlab.com/qemu-project/qemu/-/commit/5c3131c392f84c660033d511ec39872d8beb4b1e

So far, KVM has allowed KVM_GET/SET_* ioctls to execute even if the
guest state is encrypted, in which case they do nothing.  For the new
API using VM types, instead, the ioctls will fail which is a safer and
more robust approach.

The new API will be the only one available for SEV-SNP and TDX, but it
is also usable for SEV and SEV-ES.  In preparation for that, require
architecture-specific KVM code to communicate the point at which guest
state is protected (which must be after kvm_cpu_synchronize_post_init(),
though that might change in the future in order to suppor migration).
From that point, skip reading registers so that cpu->vcpu_dirty is
never true: if it ever becomes true, kvm_arch_put_registers() will
fail miserably.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Conflicts:
      include/sysemu/kvm.h
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 accel/kvm/kvm-all.c      | 17 ++++++++++++++---
 include/sysemu/kvm.h     |  3 +++
 include/sysemu/kvm_int.h |  1 +
 target/i386/sev.c        |  1 +
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 2cdd615025..50047b9b71 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2782,7 +2782,7 @@ bool kvm_cpu_check_are_resettable(void)
 
 static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 {
-    if (!cpu->vcpu_dirty) {
+    if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) {
         int ret = kvm_arch_get_registers(cpu);
         if (ret) {
             error_report("Failed to get registers: %s", strerror(-ret));
@@ -2796,7 +2796,7 @@ static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 
 void kvm_cpu_synchronize_state(CPUState *cpu)
 {
-    if (!cpu->vcpu_dirty) {
+    if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) {
         run_on_cpu(cpu, do_kvm_cpu_synchronize_state, RUN_ON_CPU_NULL);
     }
 }
@@ -2831,7 +2831,13 @@ static void do_kvm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 
 void kvm_cpu_synchronize_post_init(CPUState *cpu)
 {
-    run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
+    if (!kvm_state->guest_state_protected) {
+        /*
+         * This runs before the machine_init_done notifiers, and is the last
+         * opportunity to synchronize the state of confidential guests.
+         */
+        run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
+    }
 }
 
 static void do_kvm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
@@ -4223,3 +4229,8 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
         query_stats_schema_vcpu(first_cpu, &stats_args);
     }
 }
+
+void kvm_mark_guest_state_protected(void)
+{
+    kvm_state->guest_state_protected = true;
+}
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 098257e72f..5f3f779de4 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -604,4 +604,7 @@ int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_star
 int kvm_create_shadow_device(PCIDevice *dev);
 int kvm_delete_shadow_device(PCIDevice *dev);
 #endif
+
+void kvm_mark_guest_state_protected(void);
+
 #endif
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index b2d2c59477..9a7bc1a4b8 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -87,6 +87,7 @@ struct KVMState
     bool kernel_irqchip_required;
     OnOffAuto kernel_irqchip_split;
     bool sync_mmu;
+    bool guest_state_protected;
     uint64_t manual_dirty_log_protect;
     /* The man page (and posix) say ioctl numbers are signed int, but
      * they're not.  Linux, glibc and *BSD all treat ioctl numbers as
diff --git a/target/i386/sev.c b/target/i386/sev.c
index b4b42fd716..8c1f4d653e 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -936,6 +936,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused)
         if (ret) {
             exit(1);
         }
+        kvm_mark_guest_state_protected();
     }
 
     /* query the measurement blob length */
-- 
Gitee


From fa74508ed08091c350f431438f42a78b54896e3e Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Mon, 9 Jan 2023 10:45:27 +0000
Subject: [PATCH 15/40] target/arm/kvm-rme: Initialize realm

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/017b4eea65b93578831312e9548f8b3c6479fc08

The machine code calls kvm_arm_rme_vm_type() to get the VM flag and KVM
calls kvm_arm_rme_init() to prepare for launching a Realm. Once VM
creation is complete, create the Realm:

* Create the realm descriptor,
* load images into Realm RAM (in another patch),
* finalize the REC (vCPU) after the registers are reset,
* activate the realm, at which point the realm is sealed.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      target/arm/kvm.c
      target/arm/kvm_arm.h
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 target/arm/kvm-rme.c | 105 +++++++++++++++++++++++++++++++++++++++++++
 target/arm/kvm.c     |   7 ++-
 target/arm/kvm_arm.h |  53 ++++++++++++++++------
 3 files changed, 150 insertions(+), 15 deletions(-)

diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c
index 1de65f2b1d..3c6fecc741 100644
--- a/target/arm/kvm-rme.c
+++ b/target/arm/kvm-rme.c
@@ -11,6 +11,7 @@
 #include "kvm_arm.h"
 #include "migration/blocker.h"
 #include "qapi/error.h"
+#include "qemu/error-report.h"
 #include "qom/object_interfaces.h"
 #include "exec/confidential-guest-support.h"
 #include "sysemu/kvm.h"
@@ -27,14 +28,118 @@ OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST,
                                           CONFIDENTIAL_GUEST_SUPPORT,
                                           { TYPE_USER_CREATABLE }, { })
 
+static RmeGuest *rme_guest;
+
+static int rme_init_cpus(Error **errp)
+{
+    int ret;
+    CPUState *cs;
+
+    /*
+     * Now that do_cpu_reset() initialized the boot PC and
+     * kvm_cpu_synchronize_post_reset() registered it, we can finalize the REC.
+     */
+    CPU_FOREACH(cs) {
+        ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_REC);
+        if (ret) {
+            error_setg_errno(errp, -ret, "failed to finalize vCPU");
+            return ret;
+        }
+    }
+    return 0;
+}
+
+static int rme_create_realm(Error **errp)
+{
+    int ret;
+
+    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0,
+                            KVM_CAP_ARM_RME_CREATE_REALM);
+    if (ret) {
+        error_setg_errno(errp, -ret, "failed to create Realm Descriptor");
+        return -1;
+    }
+
+    if (rme_init_cpus(errp)) {
+        return -1;
+    }
+
+    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0,
+                            KVM_CAP_ARM_RME_ACTIVATE_REALM);
+    if (ret) {
+        error_setg_errno(errp, -ret, "failed to activate realm");
+        return -1;
+    }
+
+    kvm_mark_guest_state_protected();
+    return 0;
+}
+
+static void rme_vm_state_change(void *opaque, bool running, RunState state)
+{
+    Error *err = NULL;
+
+    if (!running) {
+        return;
+    }
+
+    if (rme_create_realm(&err)) {
+        error_propagate_prepend(&error_fatal, err, "RME: ");
+    }
+}
+
 static void rme_guest_class_init(ObjectClass *oc, void *data)
 {
 }
 
 static void rme_guest_init(Object *obj)
 {
+    if (rme_guest) {
+        error_report("a single instance of RmeGuest is supported");
+        exit(1);
+    }
+    rme_guest = RME_GUEST(obj);
 }
 
 static void rme_guest_finalize(Object *obj)
 {
 }
+
+int kvm_arm_rme_init(MachineState *ms)
+{
+    static Error *rme_mig_blocker;
+    ConfidentialGuestSupport *cgs = ms->cgs;
+
+    if (!rme_guest) {
+        return 0;
+    }
+
+    if (!cgs) {
+        error_report("missing -machine confidential-guest-support parameter");
+        return -EINVAL;
+    }
+
+    if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_RME)) {
+        return -ENODEV;
+    }
+
+    error_setg(&rme_mig_blocker, "RME: migration is not implemented");
+    migrate_add_blocker(&rme_mig_blocker, &error_fatal);
+
+    /*
+     * The realm activation is done last, when the VM starts, after all images
+     * have been loaded and all vcpus finalized.
+     */
+    qemu_add_vm_change_state_handler(rme_vm_state_change, NULL);
+
+    cgs->ready = true;
+    return 0;
+}
+
+int kvm_arm_rme_vm_type(MachineState *ms)
+{
+    if (rme_guest) {
+        return KVM_VM_TYPE_ARM_REALM;
+    }
+    return 0;
+}
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index e32a064f94..83462f3f62 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -349,7 +349,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     kvm_arm_init_debug(s);
     kvm_update_ipiv_cap(s);
 
-    return 0;
+    ret = kvm_arm_rme_init(ms);
+    if (ret) {
+        error_report("Failed to enable RME: %s", strerror(-ret));
+    }
+
+    return ret;
 }
 
 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index a29d4548f4..f17de8855a 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -38,20 +38,6 @@ void kvm_arm_init_debug(KVMState *s);
  */
 int kvm_arm_vcpu_init(CPUState *cs);
 
-/**
- * kvm_arm_vcpu_finalize:
- * @cs: CPUState
- * @feature: feature to finalize
- *
- * Finalizes the configuration of the specified VCPU feature by
- * invoking the KVM_ARM_VCPU_FINALIZE ioctl. Features requiring
- * this are documented in the "KVM_ARM_VCPU_FINALIZE" section of
- * KVM's API documentation.
- *
- * Returns: 0 if success else < 0 error code
- */
-int kvm_arm_vcpu_finalize(CPUState *cs, int feature);
-
 /**
  * kvm_arm_register_device:
  * @mr: memory region for this device
@@ -285,6 +271,14 @@ void kvm_arm_set_cpu_features_from_host(ARMCPU *cpu);
  */
 void kvm_arm_add_vcpu_properties(Object *obj);
 
+/**
+ * @cs: CPUState
+ * @feature: a KVM_ARM_VCPU_* feature
+ *
+ * Finalize the configuration of the given vcpu feature.
+ */
+int kvm_arm_vcpu_finalize(CPUState *cs, int feature);
+
 /**
  * kvm_arm_steal_time_finalize:
  * @cpu: ARMCPU for which to finalize kvm-steal-time
@@ -408,6 +402,22 @@ bool kvm_arm_tmm_enabled(void);
  */
 int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction);
 
+/**
+ * kvm_arm_rme_init
+ * @ms: the machine state
+ *
+ * Prepare the machine to be a Realm, if the user enabled it.
+ */
+int kvm_arm_rme_init(MachineState *ms);
+
+/**
+ * kvm_arm_rme_vm_type
+ * @ms: the machine state
+ *
+ * Returns the Realm KVM VM type if the user requested a Realm, 0 otherwise.
+ */
+int kvm_arm_rme_vm_type(MachineState *ms);
+
 #else
 
 /*
@@ -447,6 +457,11 @@ static inline void kvm_arm_add_vcpu_properties(Object *obj)
     g_assert_not_reached();
 }
 
+static inline int kvm_arm_vcpu_finalize(CPUState *cs, int feature)
+{
+    g_assert_not_reached();
+}
+
 static inline int kvm_arm_get_max_vm_ipa_size(MachineState *ms, bool *fixed_ipa)
 {
     g_assert_not_reached();
@@ -512,6 +527,16 @@ static inline int tmm_get_kae_num(void)
 {
     g_assert_not_reached();
 }
+
+static inline int kvm_arm_rme_init(MachineState *ms)
+{
+    g_assert_not_reached();
+}
+
+static inline int kvm_arm_rme_vm_type(MachineState *ms)
+{
+    g_assert_not_reached();
+}
 #endif
 
 /**
-- 
Gitee


From 4b69d18a5600e610d08584fafb87030e272ebb2b Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 21 Jun 2022 11:52:14 +0100
Subject: [PATCH 16/40] target/arm/kvm: Split kvm_arch_get/put_registers

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/a66c2761d7d6ba0f1f0db383cbad158e4cced72f

The confidential guest support in KVM limits the number of registers
that we can read and write. Split the get/put_registers function to
prepare for it.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      target/arm/kvm.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 target/arm/kvm64.c | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 651f603dd8..20a357061c 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -838,7 +838,7 @@ static int kvm_arch_put_sve(CPUState *cs)
     return 0;
 }
 
-int kvm_arch_put_registers(CPUState *cs, int level)
+static int kvm_arm_put_core_regs(CPUState *cs, int level)
 {
     uint64_t val;
     uint32_t fpr;
@@ -941,6 +941,19 @@ int kvm_arch_put_registers(CPUState *cs, int level)
         return ret;
     }
 
+    return 0;
+}
+
+int kvm_arch_put_registers(CPUState *cs, int level)
+{
+    int ret;
+    ARMCPU *cpu = ARM_CPU(cs);
+
+    ret = kvm_arm_put_core_regs(cs, level);
+    if (ret) {
+        return ret;
+    }
+
     write_cpustate_to_list(cpu, true);
 
     if (!write_list_to_kvmstate(cpu, level)) {
@@ -1024,7 +1037,7 @@ static int kvm_arch_get_sve(CPUState *cs)
     return 0;
 }
 
-int kvm_arch_get_registers(CPUState *cs)
+static int kvm_arm_get_core_regs(CPUState *cs)
 {
     uint64_t val;
     unsigned int el;
@@ -1127,6 +1140,19 @@ int kvm_arch_get_registers(CPUState *cs)
     }
     vfp_set_fpcr(env, fpr);
 
+    return 0;
+}
+
+int kvm_arch_get_registers(CPUState *cs)
+{
+    int ret;
+    ARMCPU *cpu = ARM_CPU(cs);
+
+    ret = kvm_arm_get_core_regs(cs);
+    if (ret) {
+        return ret;
+    }
+
     ret = kvm_get_vcpu_events(cpu);
     if (ret) {
         return ret;
-- 
Gitee


From 7f5d4809907044fd11fa040210f62b520f16ba02 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Mon, 9 Jan 2023 10:55:32 +0000
Subject: [PATCH 17/40] target/arm/kvm-rme: Initialize vCPU

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/0808c64a827150c4a8576e52101386df9c08c136

The target code calls kvm_arm_vcpu_init() to mark the vCPU as part of a
Realm. For a Realm vCPU, only x0-x7 can be set at runtime. Before boot,
the PC can also be set, and is ignored at runtime. KVM also accepts a
few system register changes during initial configuration, as returned by
KVM_GET_REG_LIST.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      target/arm/kvm.c
      target/arm/kvm_arm.h
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 target/arm/cpu.h     |  3 +++
 target/arm/kvm-rme.c | 11 +++++++++
 target/arm/kvm64.c   | 53 ++++++++++++++++++++++++++++++++++++++++++++
 target/arm/kvm_arm.h | 16 +++++++++++++
 4 files changed, 83 insertions(+)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index a5ba7f2a26..12305effd4 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -976,6 +976,9 @@ struct ArchCPU {
     bool kvm_sve_finalized;
 #endif /* CONFIG_KVM */
 
+    /* Realm Management Extension */
+    bool kvm_rme;
+
     /* Uniprocessor system with MP extensions */
     bool mp_is_up;
 
diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c
index 3c6fecc741..b080552076 100644
--- a/target/arm/kvm-rme.c
+++ b/target/arm/kvm-rme.c
@@ -136,6 +136,17 @@ int kvm_arm_rme_init(MachineState *ms)
     return 0;
 }
 
+int kvm_arm_rme_vcpu_init(CPUState *cs)
+{
+    ARMCPU *cpu = ARM_CPU(cs);
+
+    if (rme_guest) {
+        cpu->kvm_rme = true;
+        cpu->kvm_init_features[0] |= (1 << KVM_ARM_VCPU_REC);
+    }
+    return 0;
+}
+
 int kvm_arm_rme_vm_type(MachineState *ms)
 {
     if (rme_guest) {
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 20a357061c..d314927027 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -646,6 +646,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
                                       1 << KVM_ARM_VCPU_PTRAUTH_GENERIC);
     }
 
+    ret = kvm_arm_rme_vcpu_init(cs);
+    if (ret) {
+        return ret;
+    }
+
     /* Do KVM_ARM_VCPU_INIT ioctl */
     ret = kvm_arm_vcpu_init(cs);
     if (ret) {
@@ -838,6 +843,29 @@ static int kvm_arch_put_sve(CPUState *cs)
     return 0;
 }
 
+static int kvm_arm_rme_put_core_regs(CPUState *cs)
+{
+    int i, ret;
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+
+    /* The RME ABI only allows us to set 8 GPRs and the PC */
+    for (i = 0; i < 8; i++) {
+        ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]),
+                              &env->xregs[i]);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    ret = kvm_set_one_reg(cs, AARCH64_CORE_REG(regs.pc), &env->pc);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
+
 static int kvm_arm_put_core_regs(CPUState *cs, int level)
 {
     uint64_t val;
@@ -848,6 +876,10 @@ static int kvm_arm_put_core_regs(CPUState *cs, int level)
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
 
+    if (cpu->kvm_rme) {
+        return kvm_arm_rme_put_core_regs(cs);
+    }
+
     /* If we are in AArch32 mode then we need to copy the AArch32 regs to the
      * AArch64 registers before pushing them out to 64-bit KVM.
      */
@@ -1037,6 +1069,23 @@ static int kvm_arch_get_sve(CPUState *cs)
     return 0;
 }
 
+static int kvm_arm_rme_get_core_regs(CPUState *cs)
+{
+    int i, ret;
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+
+    for (i = 0; i < 8; i++) {
+        ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]),
+                              &env->xregs[i]);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
 static int kvm_arm_get_core_regs(CPUState *cs)
 {
     uint64_t val;
@@ -1047,6 +1096,10 @@ static int kvm_arm_get_core_regs(CPUState *cs)
     ARMCPU *cpu = ARM_CPU(cs);
     CPUARMState *env = &cpu->env;
 
+    if (cpu->kvm_rme) {
+        return kvm_arm_rme_get_core_regs(cs);
+    }
+
     for (i = 0; i < 31; i++) {
         ret = kvm_get_one_reg(cs, AARCH64_CORE_REG(regs.regs[i]),
                               &env->xregs[i]);
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index f17de8855a..b6a07eb80f 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -418,6 +418,16 @@ int kvm_arm_rme_init(MachineState *ms);
  */
 int kvm_arm_rme_vm_type(MachineState *ms);
 
+/**
+ * kvm_arm_rme_vcpu_init
+ * @cs: the CPU
+ *
+ * If the user requested a Realm, setup the given vCPU accordingly. Realm vCPUs
+ * behave a little differently, for example most of their register state is
+ * hidden from the host.
+ */
+int kvm_arm_rme_vcpu_init(CPUState *cs);
+
 #else
 
 /*
@@ -537,6 +547,12 @@ static inline int kvm_arm_rme_vm_type(MachineState *ms)
 {
     g_assert_not_reached();
 }
+
+static inline int kvm_arm_rme_vcpu_init(CPUState *cs)
+{
+    g_assert_not_reached();
+}
+
 #endif
 
 /**
-- 
Gitee


From 64f88add04d798c28bfa5e61a134ccde67fcada9 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Mon, 4 Dec 2023 18:48:36 +0000
Subject: [PATCH 18/40] target/arm/kvm: Create scratch VM as Realm if necessary

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/bf7f456dfa60a022ac690004ddb08695b23ccde4

Some ID registers have a different value for a Realm VM, for example
ID_AA64DFR0_EL1 contains the number of breakpoints/watchpoints
implemented by RMM instead of the hardware.

Even though RMM is in charge of setting up most Realm registers, KVM
still provides GET_ONE_REG interface on a Realm VM to probe the VM's
capabilities.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 target/arm/kvm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 83462f3f62..cec95483f3 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -73,6 +73,7 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
 {
     int ret = 0, kvmfd = -1, vmfd = -1, cpufd = -1;
     int max_vm_pa_size;
+    int vm_type;
 
     kvmfd = qemu_open_old("/dev/kvm", O_RDWR);
     if (kvmfd < 0) {
@@ -82,8 +83,9 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
     if (max_vm_pa_size < 0) {
         max_vm_pa_size = 0;
     }
+    vm_type = kvm_arm_rme_vm_type(MACHINE(qdev_get_machine()));
     do {
-        vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size);
+        vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size | vm_type);
     } while (vmfd == -1 && errno == EINTR);
     if (vmfd < 0) {
         goto err;
-- 
Gitee


From 9964f1260d5e67c2bc54031136629b10a4d81a2c Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 13 Jun 2023 18:01:50 +0100
Subject: [PATCH 19/40] hw/core/loader: Add ROM loader notifier

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/4575987ee573474185f8ad8c715dffa9a40494ed

Add a function to register a notifier, that is invoked after a ROM gets
loaded into guest memory.

It will be used by Arm confidential guest support, in order to register
all blobs loaded into memory with KVM, so that their content is moved
into Realm state and measured into the initial VM state.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/core/loader.c    | 14 ++++++++++++++
 include/hw/loader.h | 15 +++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/hw/core/loader.c b/hw/core/loader.c
index e7a9b3775b..1627ef1976 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -67,6 +67,8 @@
 #include <zlib.h>
 
 static int roms_loaded;
+static NotifierList rom_loader_notifier =
+    NOTIFIER_LIST_INITIALIZER(rom_loader_notifier);
 
 /* return the size or -1 if error */
 int64_t get_image_size(const char *filename)
@@ -1209,6 +1211,11 @@ MemoryRegion *rom_add_blob(const char *name, const void *blob, size_t len,
     return mr;
 }
 
+void rom_add_load_notifier(Notifier *notifier)
+{
+    notifier_list_add(&rom_loader_notifier, notifier);
+}
+
 /* This function is specific for elf program because we don't need to allocate
  * all the rom. We just allocate the first part and the rest is just zeros. This
  * is why romsize and datasize are different. Also, this function takes its own
@@ -1250,6 +1257,7 @@ ssize_t rom_add_option(const char *file, int32_t bootindex)
 static void rom_reset(void *unused)
 {
     Rom *rom;
+    RomLoaderNotifyData notify;
 
     QTAILQ_FOREACH(rom, &roms, next) {
         if (rom->fw_file) {
@@ -1298,6 +1306,12 @@ static void rom_reset(void *unused)
         cpu_flush_icache_range(rom->addr, rom->datasize);
 
         trace_loader_write_rom(rom->name, rom->addr, rom->datasize, rom->isrom);
+
+        notify = (RomLoaderNotifyData) {
+            .addr = rom->addr,
+            .len = rom->datasize,
+        };
+        notifier_list_notify(&rom_loader_notifier, &notify);
     }
 }
 
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 8685e27334..5df632c5bd 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -356,6 +356,21 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict);
 ssize_t rom_add_vga(const char *file);
 ssize_t rom_add_option(const char *file, int32_t bootindex);
 
+typedef struct RomLoaderNotifyData {
+    /* Address of the blob in guest memory */
+    hwaddr addr;
+    /* Length of the blob */
+    size_t len;
+} RomLoaderNotifyData;
+
+/**
+ * rom_add_load_notifier - Add a notifier for loaded images
+ *
+ * Add a notifier that will be invoked with a RomLoaderNotifyData structure for
+ * each blob loaded into guest memory, after the blob is loaded.
+ */
+void rom_add_load_notifier(Notifier *notifier);
+
 /* This is the usual maximum in uboot, so if a uImage overflows this, it would
  * overflow on real hardware too. */
 #define UBOOT_MAX_GUNZIP_BYTES (64 << 20)
-- 
Gitee


From 113dda44a4857134af03ea8001a656dfea730f0e Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Wed, 14 Jun 2023 16:54:00 +0100
Subject: [PATCH 20/40] target/arm/kvm-rme: Initialize Realm memory

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/7f3408b58fee5e7aaf7cda65bd506f7b7ce4b789

Initialize the IPA state of RAM. Collect the images copied into guest
RAM into a sorted list, and issue POPULATE_REALM KVM ioctls once we've
created the Realm Descriptor. The images are part of the Realm Initial
Measurement.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      target/arm/kvm-rme.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 target/arm/kvm-rme.c | 127 +++++++++++++++++++++++++++++++++++++++++++
 target/arm/kvm_arm.h |  14 +++++
 2 files changed, 141 insertions(+)

diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c
index b080552076..1f42187699 100644
--- a/target/arm/kvm-rme.c
+++ b/target/arm/kvm-rme.c
@@ -8,6 +8,7 @@
 
 #include "hw/boards.h"
 #include "hw/core/cpu.h"
+#include "hw/loader.h"
 #include "kvm_arm.h"
 #include "migration/blocker.h"
 #include "qapi/error.h"
@@ -20,8 +21,19 @@
 #define TYPE_RME_GUEST "rme-guest"
 OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST)
 
+#define RME_PAGE_SIZE qemu_real_host_page_size()
+
+typedef struct {
+    hwaddr base;
+    hwaddr size;
+} RmeRamRegion;
+
 struct RmeGuest {
     ConfidentialGuestSupport parent_obj;
+    Notifier rom_load_notifier;
+    GSList *ram_regions;
+
+    RmeRamRegion init_ram;
 };
 
 OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST,
@@ -30,6 +42,63 @@ OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST,
 
 static RmeGuest *rme_guest;
 
+static int rme_init_ram(RmeRamRegion *ram, Error **errp)
+{
+    int ret;
+    hwaddr start = QEMU_ALIGN_DOWN(ram->base, RME_PAGE_SIZE);
+    hwaddr end = QEMU_ALIGN_UP(ram->base + ram->size, RME_PAGE_SIZE);
+    struct arm_rme_init_ripas init_args = {
+        .base = start,
+        .size = end - start,
+    };
+
+    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0,
+                            KVM_CAP_ARM_RME_INIT_RIPAS_REALM,
+                            (intptr_t)&init_args);
+    if (ret) {
+        error_setg_errno(errp, -ret,
+                         "failed to init RAM [0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx")",
+                         start, end);
+    }
+
+    return ret;
+}
+
+static int rme_populate_range(hwaddr base, size_t size, bool measure,
+                              Error **errp)
+{
+    int ret;
+    hwaddr start = QEMU_ALIGN_DOWN(base, RME_PAGE_SIZE);
+    hwaddr end = QEMU_ALIGN_UP(base + size, RME_PAGE_SIZE);
+    struct arm_rme_populate_realm populate_args = {
+        .base = start,
+        .size = end - start,
+        .flags = measure ? KVM_ARM_RME_POPULATE_FLAGS_MEASURE : 0,
+    };
+
+    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0,
+                            KVM_CAP_ARM_RME_POPULATE_REALM,
+                            (intptr_t)&populate_args);
+    if (ret) {
+        error_setg_errno(errp, -ret,
+                   "failed to populate realm [0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx")",
+                   start, end);
+    }
+    return ret;
+}
+
+static void rme_populate_ram_region(gpointer data, gpointer err)
+{
+    Error **errp = err;
+    const RmeRamRegion *region = data;
+
+    if (*errp) {
+        return;
+    }
+
+    rme_populate_range(region->base, region->size, /* measure */ true, errp);
+}
+
 static int rme_init_cpus(Error **errp)
 {
     int ret;
@@ -60,6 +129,16 @@ static int rme_create_realm(Error **errp)
         return -1;
     }
 
+    if (rme_init_ram(&rme_guest->init_ram, errp)) {
+        return -1;
+    }
+
+    g_slist_foreach(rme_guest->ram_regions, rme_populate_ram_region, errp);
+    g_slist_free_full(g_steal_pointer(&rme_guest->ram_regions), g_free);
+    if (*errp) {
+        return -1;
+    }
+
     if (rme_init_cpus(errp)) {
         return -1;
     }
@@ -105,6 +184,43 @@ static void rme_guest_finalize(Object *obj)
 {
 }
 
+static gint rme_compare_ram_regions(gconstpointer a, gconstpointer b)
+{
+    const RmeRamRegion *ra = a;
+    const RmeRamRegion *rb = b;
+
+    g_assert(ra->base != rb->base);
+    return ra->base < rb->base ? -1 : 1;
+}
+
+static void rme_rom_load_notify(Notifier *notifier, void *data)
+{
+    RmeRamRegion *region;
+    RomLoaderNotifyData *rom = data;
+
+    if (rom->addr == -1) {
+        /*
+         * These blobs (ACPI tables) are not loaded into guest RAM at reset.
+         * Instead the firmware will load them via fw_cfg and measure them
+         * itself.
+         */
+        return;
+    }
+
+    region = g_new0(RmeRamRegion, 1);
+    region->base = rom->addr;
+    region->size = rom->len;
+
+    /*
+     * The Realm Initial Measurement (RIM) depends on the order in which we
+     * initialize and populate the RAM regions. To help a verifier
+     * independently calculate the RIM, sort regions by GPA.
+     */
+    rme_guest->ram_regions = g_slist_insert_sorted(rme_guest->ram_regions,
+                                                   region,
+                                                   rme_compare_ram_regions);
+}
+
 int kvm_arm_rme_init(MachineState *ms)
 {
     static Error *rme_mig_blocker;
@@ -132,10 +248,21 @@ int kvm_arm_rme_init(MachineState *ms)
      */
     qemu_add_vm_change_state_handler(rme_vm_state_change, NULL);
 
+    rme_guest->rom_load_notifier.notify = rme_rom_load_notify;
+    rom_add_load_notifier(&rme_guest->rom_load_notifier);
+
     cgs->ready = true;
     return 0;
 }
 
+void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size)
+{
+    if (rme_guest) {
+        rme_guest->init_ram.base = base;
+        rme_guest->init_ram.size = size;
+    }
+}
+
 int kvm_arm_rme_vcpu_init(CPUState *cs)
 {
     ARMCPU *cpu = ARM_CPU(cs);
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index b6a07eb80f..78ff8b7375 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -428,6 +428,16 @@ int kvm_arm_rme_vm_type(MachineState *ms);
  */
 int kvm_arm_rme_vcpu_init(CPUState *cs);
 
+/*
+ * kvm_arm_rme_init_guest_ram
+ * @base: base address of RAM
+ * @size: size of RAM
+ *
+ * If the user requested a Realm, set the base and size of guest RAM, in order
+ * to initialize the Realm IPA space.
+ */
+void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size);
+
 #else
 
 /*
@@ -454,6 +464,10 @@ static inline bool kvm_arm_steal_time_supported(void)
     return false;
 }
 
+static inline void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size)
+{
+}
+
 /*
  * These functions should never actually be called without KVM support.
  */
-- 
Gitee


From 853f2c56d022c88aff929824ed5278c958a47a6d Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 7 Feb 2023 18:55:22 +0000
Subject: [PATCH 21/40] target/arm/kvm-rme: Add Realm Personalization Value
 parameter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/c2659aa7e7fde76a3bc9914f348ee5c2d7b4d15d

The Realm Personalization Value (RPV) is provided by the user to
distinguish Realms that have the same initial measurement.

The user provides a base64 string encoding 64 bytes. They are stored
into the RPV in the same order.

Cc: Eric Blake <eblake@redhat.com>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Daniel P. Berrangé <berrange@redhat.com>
Cc: Eduardo Habkost <eduardo@habkost.net>
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 qapi/qom.json        | 15 ++++++++
 target/arm/kvm-rme.c | 85 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/qapi/qom.json b/qapi/qom.json
index e405c51da3..0120369454 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -952,6 +952,20 @@
             '*kae': 'uint32',
             '*measurement-algo': 'TmmGuestMeasurementAlgo' } }
 
+##
+# @RmeGuestProperties:
+#
+# Properties for rme-guest objects.
+#
+# @personalization-value: a base64 string encoding a 64-byte (512-bit) value.
+#     This optional parameter allows to uniquely identify the VM instance
+#     during attestation. (default: all-zero)
+#
+# Since: 10.0
+##
+{ 'struct': 'RmeGuestProperties',
+  'data': { '*personalization-value': 'str' } }
+
 ##
 # @ObjectType:
 #
@@ -1070,6 +1084,7 @@
       'pr-manager-helper':          { 'type': 'PrManagerHelperProperties',
                                       'if': 'CONFIG_LINUX' },
       'qtest':                      'QtestProperties',
+      'rme-guest':                  'RmeGuestProperties',
       'rng-builtin':                'RngProperties',
       'rng-egd':                    'RngEgdProperties',
       'rng-random':                 { 'type': 'RngRandomProperties',
diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c
index 1f42187699..e8976e4740 100644
--- a/target/arm/kvm-rme.c
+++ b/target/arm/kvm-rme.c
@@ -12,6 +12,7 @@
 #include "kvm_arm.h"
 #include "migration/blocker.h"
 #include "qapi/error.h"
+#include "qemu/base64.h"
 #include "qemu/error-report.h"
 #include "qom/object_interfaces.h"
 #include "exec/confidential-guest-support.h"
@@ -33,6 +34,9 @@ struct RmeGuest {
     Notifier rom_load_notifier;
     GSList *ram_regions;
 
+    char *personalization_value_str;
+    uint8_t personalization_value[ARM_RME_CONFIG_RPV_SIZE];
+
     RmeRamRegion init_ram;
 };
 
@@ -42,6 +46,48 @@ OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST,
 
 static RmeGuest *rme_guest;
 
+static int rme_configure_one(RmeGuest *guest, uint32_t cfg, Error **errp)
+{
+    int ret;
+    const char *cfg_str;
+    struct arm_rme_config args = {
+        .cfg = cfg,
+    };
+
+    switch (cfg) {
+    case ARM_RME_CONFIG_RPV:
+        memcpy(args.rpv, guest->personalization_value, ARM_RME_CONFIG_RPV_SIZE);
+        cfg_str = "personalization value";
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0,
+                            KVM_CAP_ARM_RME_CONFIG_REALM, (intptr_t)&args);
+    if (ret) {
+        error_setg_errno(errp, -ret, "failed to configure %s", cfg_str);
+    }
+    return ret;
+}
+
+static int rme_configure(Error **errp)
+{
+    int ret;
+    size_t option;
+    const uint32_t config_options[] = {
+        ARM_RME_CONFIG_RPV,
+    };
+
+    for (option = 0; option < ARRAY_SIZE(config_options); option++) {
+        ret = rme_configure_one(rme_guest, config_options[option], errp);
+        if (ret) {
+            return ret;
+        }
+    }
+    return 0;
+}
+
 static int rme_init_ram(RmeRamRegion *ram, Error **errp)
 {
     int ret;
@@ -122,6 +168,10 @@ static int rme_create_realm(Error **errp)
 {
     int ret;
 
+    if (rme_configure(errp)) {
+        return -1;
+    }
+
     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0,
                             KVM_CAP_ARM_RME_CREATE_REALM);
     if (ret) {
@@ -167,8 +217,43 @@ static void rme_vm_state_change(void *opaque, bool running, RunState state)
     }
 }
 
+static char *rme_get_rpv(Object *obj, Error **errp)
+{
+    RmeGuest *guest = RME_GUEST(obj);
+
+    return g_strdup(guest->personalization_value_str);
+}
+
+static void rme_set_rpv(Object *obj, const char *value, Error **errp)
+{
+    RmeGuest *guest = RME_GUEST(obj);
+    g_autofree uint8_t *rpv = NULL;
+    size_t len;
+
+    rpv = qbase64_decode(value, -1, &len, errp);
+    if (!rpv) {
+        return;
+    }
+
+    if (len != sizeof(guest->personalization_value)) {
+        error_setg(errp,
+                   "expecting a Realm Personalization Value of size %zu, got %zu\n",
+                   sizeof(guest->personalization_value), len);
+        return;
+    }
+    memcpy(guest->personalization_value, rpv, len);
+
+    /* Save the value so we don't need to encode it in the getter */
+    g_free(guest->personalization_value_str);
+    guest->personalization_value_str = g_strdup(value);
+}
+
 static void rme_guest_class_init(ObjectClass *oc, void *data)
 {
+    object_class_property_add_str(oc, "personalization-value", rme_get_rpv,
+                                  rme_set_rpv);
+    object_class_property_set_description(oc, "personalization-value",
+            "Realm personalization value (64 bytes encodede in base64)");
 }
 
 static void rme_guest_init(Object *obj)
-- 
Gitee


From 82c8a1979a23a073c3ed8965de10f79e3a676b2c Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 27 Oct 2022 19:22:48 +0100
Subject: [PATCH 22/40] target/arm/kvm-rme: Add measurement algorithm property
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/26ed0bafc44a8d4d8fcc46a1ee7a2b8aa35b1c33

This option selects which measurement algorithm to use for attestation.
Supported values are SHA256 and SHA512. Default to SHA512 arbitrarily.

SHA512 is generally faster on 64-bit architectures. On a few arm64 CPUs
I tested SHA256 is much faster, but that's most likely because they only
support acceleration via FEAT_SHA256 (Armv8.0) and not FEAT_SHA512
(Armv8.2). Future CPUs supporting RME are likely to also support
FEAT_SHA512.

Cc: Eric Blake <eblake@redhat.com>
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Daniel P. Berrangé <berrange@redhat.com>
Cc: Eduardo Habkost <eduardo@habkost.net>
Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 qapi/qom.json        | 20 +++++++++++++++++++-
 target/arm/kvm-rme.c | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/qapi/qom.json b/qapi/qom.json
index 0120369454..02b45e1068 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -952,6 +952,20 @@
             '*kae': 'uint32',
             '*measurement-algo': 'TmmGuestMeasurementAlgo' } }
 
+##
+# @RmeGuestMeasurementAlgorithm:
+#
+# @sha256: Use the SHA256 algorithm
+#
+# @sha512: Use the SHA512 algorithm
+#
+# Algorithm to use for realm measurements
+#
+# Since: 10.0
+##
+{ 'enum': 'RmeGuestMeasurementAlgorithm',
+  'data': ['sha256', 'sha512'] }
+
 ##
 # @RmeGuestProperties:
 #
@@ -961,10 +975,14 @@
 #     This optional parameter allows to uniquely identify the VM instance
 #     during attestation. (default: all-zero)
 #
+# @measurement-algorithm: Realm measurement algorithm
+#     (default: sha512)
+#
 # Since: 10.0
 ##
 { 'struct': 'RmeGuestProperties',
-  'data': { '*personalization-value': 'str' } }
+  'data': { '*personalization-value': 'str',
+            '*measurement-algorithm': 'RmeGuestMeasurementAlgorithm' } }
 
 ##
 # @ObjectType:
diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c
index e8976e4740..5e785fa3b6 100644
--- a/target/arm/kvm-rme.c
+++ b/target/arm/kvm-rme.c
@@ -36,6 +36,7 @@ struct RmeGuest {
 
     char *personalization_value_str;
     uint8_t personalization_value[ARM_RME_CONFIG_RPV_SIZE];
+    RmeGuestMeasurementAlgorithm measurement_algo;
 
     RmeRamRegion init_ram;
 };
@@ -59,6 +60,19 @@ static int rme_configure_one(RmeGuest *guest, uint32_t cfg, Error **errp)
         memcpy(args.rpv, guest->personalization_value, ARM_RME_CONFIG_RPV_SIZE);
         cfg_str = "personalization value";
         break;
+    case ARM_RME_CONFIG_HASH_ALGO:
+        switch (guest->measurement_algo) {
+        case RME_GUEST_MEASUREMENT_ALGORITHM_SHA256:
+            args.hash_algo = ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA256;
+            break;
+        case RME_GUEST_MEASUREMENT_ALGORITHM_SHA512:
+            args.hash_algo = ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA512;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        cfg_str = "hash algorithm";
+        break;
     default:
         g_assert_not_reached();
     }
@@ -77,6 +91,7 @@ static int rme_configure(Error **errp)
     size_t option;
     const uint32_t config_options[] = {
         ARM_RME_CONFIG_RPV,
+        ARM_RME_CONFIG_HASH_ALGO,
     };
 
     for (option = 0; option < ARRAY_SIZE(config_options); option++) {
@@ -248,12 +263,34 @@ static void rme_set_rpv(Object *obj, const char *value, Error **errp)
     guest->personalization_value_str = g_strdup(value);
 }
 
+static int rme_get_measurement_algo(Object *obj, Error **errp)
+{
+    RmeGuest *guest = RME_GUEST(obj);
+
+    return guest->measurement_algo;
+}
+
+static void rme_set_measurement_algo(Object *obj, int algo, Error **errp)
+{
+    RmeGuest *guest = RME_GUEST(obj);
+
+    guest->measurement_algo = algo;
+}
+
 static void rme_guest_class_init(ObjectClass *oc, void *data)
 {
     object_class_property_add_str(oc, "personalization-value", rme_get_rpv,
                                   rme_set_rpv);
     object_class_property_set_description(oc, "personalization-value",
             "Realm personalization value (64 bytes encodede in base64)");
+
+    object_class_property_add_enum(oc, "measurement-algorithm",
+                                   "RmeGuestMeasurementAlgorithm",
+                                   &RmeGuestMeasurementAlgorithm_lookup,
+                                   rme_get_measurement_algo,
+                                   rme_set_measurement_algo);
+    object_class_property_set_description(oc, "measurement-algorithm",
+            "Realm measurement algorithm ('sha256', 'sha512')");
 }
 
 static void rme_guest_init(Object *obj)
@@ -263,6 +300,7 @@ static void rme_guest_init(Object *obj)
         exit(1);
     }
     rme_guest = RME_GUEST(obj);
+    rme_guest->measurement_algo = RME_GUEST_MEASUREMENT_ALGORITHM_SHA512;
 }
 
 static void rme_guest_finalize(Object *obj)
-- 
Gitee


From 3b881e82b73be727e783e1762084025233fba0cc Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Mon, 4 Dec 2023 18:48:19 +0000
Subject: [PATCH 23/40] target/arm/cpu: Set number of breakpoints and
 watchpoints in KVM

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/99082dee9c26b2b0f0f4d39bc9f6f99e73701e2f

Add "num-breakpoints" and "num-watchpoints" CPU parameters to configure
the debug features that KVM presents to the guest. The KVM vCPU
configuration is modified by calling SET_ONE_REG on the ID register.

This is needed for Realm VMs, whose parameters include breakpoints and
watchpoints, and influence the Realm Initial Measurement.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      target/arm/arm-qmp-cmds.c
      target/arm/kvm.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 target/arm/arm-qmp-cmds.c |  1 +
 target/arm/cpu.h          |  4 ++
 target/arm/cpu64.c        | 77 +++++++++++++++++++++++++++++++++++++++
 target/arm/kvm.c          | 54 +++++++++++++++++++++++++++
 target/arm/kvm64.c        |  2 +-
 target/arm/kvm_arm.h      |  2 +
 6 files changed, 139 insertions(+), 1 deletion(-)

diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c
index b53d5efe13..98b3498428 100644
--- a/target/arm/arm-qmp-cmds.c
+++ b/target/arm/arm-qmp-cmds.c
@@ -96,6 +96,7 @@ static const char *cpu_model_advertised_features[] = {
     "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048",
     "kvm-no-adjvtime", "kvm-steal-time",
     "pauth", "pauth-impdef", "pauth-qarma3",
+    "num-breakpoints", "num-watchpoints",
     NULL
 };
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 12305effd4..223d8abd8a 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1124,6 +1124,10 @@ struct ArchCPU {
 
     /* Generic timer counter frequency, in Hz */
     uint64_t gt_cntfrq_hz;
+
+    /* Allows to override the default configuration */
+    uint8_t num_bps;
+    uint8_t num_wps;
 };
 
 typedef struct ARMCPUInfo {
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 6eca55ac29..c0edffb679 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -576,6 +576,82 @@ void aarch64_add_pauth_properties(Object *obj)
     }
 }
 
+#if defined(CONFIG_KVM)
+static void arm_cpu_get_num_wps(Object *obj, Visitor *v, const char *name,
+                                void *opaque, Error **errp)
+{
+    uint8_t val;
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    val = cpu->num_wps;
+    if (val == 0) {
+        val = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1;
+    }
+
+    visit_type_uint8(v, name, &val, errp);
+}
+
+static void arm_cpu_set_num_wps(Object *obj, Visitor *v, const char *name,
+                                void *opaque, Error **errp)
+{
+    uint8_t val;
+    ARMCPU *cpu = ARM_CPU(obj);
+    uint8_t max_wps = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS) + 1;
+
+    if (!visit_type_uint8(v, name, &val, errp)) {
+        return;
+    }
+
+    if (val < 2 || val > max_wps) {
+        error_setg(errp, "invalid number of watchpoints");
+        return;
+    }
+
+    cpu->num_wps = val;
+}
+
+static void arm_cpu_get_num_bps(Object *obj, Visitor *v, const char *name,
+                                void *opaque, Error **errp)
+{
+    uint8_t val;
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    val = cpu->num_bps;
+    if (val == 0) {
+        val = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1;
+    }
+
+    visit_type_uint8(v, name, &val, errp);
+}
+
+static void arm_cpu_set_num_bps(Object *obj, Visitor *v, const char *name,
+                                void *opaque, Error **errp)
+{
+    uint8_t val;
+    ARMCPU *cpu = ARM_CPU(obj);
+    uint8_t max_bps = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS) + 1;
+
+    if (!visit_type_uint8(v, name, &val, errp)) {
+        return;
+    }
+
+    if (val < 2 || val > max_bps) {
+        error_setg(errp, "invalid number of breakpoints");
+        return;
+    }
+
+    cpu->num_bps = val;
+}
+
+static void aarch64_add_kvm_writable_properties(Object *obj)
+{
+    object_property_add(obj, "num-breakpoints", "uint8", arm_cpu_get_num_bps,
+                        arm_cpu_set_num_bps, NULL, NULL);
+    object_property_add(obj, "num-watchpoints", "uint8", arm_cpu_get_num_wps,
+                        arm_cpu_set_num_wps, NULL, NULL);
+}
+#endif /* CONFIG_KVM */
+
 void arm_cpu_lpa2_finalize(ARMCPU *cpu, Error **errp)
 {
     uint64_t t;
@@ -789,6 +865,7 @@ static void aarch64_host_initfn(Object *obj)
     if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) {
         aarch64_add_sve_properties(obj);
         aarch64_add_pauth_properties(obj);
+        aarch64_add_kvm_writable_properties(obj);
     }
 #elif defined(CONFIG_HVF)
     ARMCPU *cpu = ARM_CPU(obj);
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index cec95483f3..bf17da37e5 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -681,6 +681,54 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu)
     }
 }
 
+static void kvm_arm_configure_aa64dfr0(ARMCPU *cpu)
+{
+    int ret;
+    uint64_t val, newval;
+    CPUState *cs = CPU(cpu);
+
+    if (!cpu->num_bps && !cpu->num_wps) {
+        return;
+    }
+
+    newval = cpu->isar.id_aa64dfr0;
+    if (cpu->num_bps) {
+        uint64_t ctx_cmps = FIELD_EX64(newval, ID_AA64DFR0, CTX_CMPS);
+
+        /* CTX_CMPs is never greater than BRPs */
+        ctx_cmps = MIN(ctx_cmps, cpu->num_bps - 1);
+        newval = FIELD_DP64(newval, ID_AA64DFR0, BRPS, cpu->num_bps - 1);
+        newval = FIELD_DP64(newval, ID_AA64DFR0, CTX_CMPS, ctx_cmps);
+    }
+    if (cpu->num_wps) {
+        newval = FIELD_DP64(newval, ID_AA64DFR0, WRPS, cpu->num_wps - 1);
+    }
+    ret = kvm_set_one_reg(cs, KVM_REG_ARM_ID_AA64DFR0_EL1, &newval);
+    if (ret) {
+        error_report("Failed to set KVM_REG_ARM_ID_AA64DFR0_EL1");
+        return;
+    }
+
+    /*
+     * Check if the write succeeded. KVM does offer the writable mask for this
+     * register, but this way we also check if the value we wrote was sane.
+     */
+    ret = kvm_get_one_reg(cs, KVM_REG_ARM_ID_AA64DFR0_EL1, &val);
+    if (ret) {
+        error_report("Failed to get KVM_REG_ARM_ID_AA64DFR0_EL1");
+        return;
+    }
+
+    if (val != newval) {
+        error_report("Failed to update KVM_REG_ARM_ID_AA64DFR0_EL1");
+    }
+}
+
+static void kvm_arm_configure_vcpu_regs(ARMCPU *cpu)
+{
+    kvm_arm_configure_aa64dfr0(cpu);
+}
+
 void kvm_arm_reset_vcpu(ARMCPU *cpu)
 {
     int ret;
@@ -694,6 +742,12 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu)
         fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret));
         abort();
     }
+
+    /*
+     * Before loading the KVM values into CPUState, update the KVM configuration
+     */
+    kvm_arm_configure_vcpu_regs(cpu);
+
     if (!write_kvmstate_to_list(cpu)) {
         fprintf(stderr, "write_kvmstate_to_list failed\n");
         abort();
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index d314927027..e84bc9f94d 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -338,7 +338,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
         err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64smfr0,
                               ARM64_SYS_REG(3, 0, 0, 4, 5));
         err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0,
-                              ARM64_SYS_REG(3, 0, 0, 5, 0));
+                              KVM_REG_ARM_ID_AA64DFR0_EL1);
         err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1,
                               ARM64_SYS_REG(3, 0, 0, 5, 1));
         err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0,
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 78ff8b7375..63b5d9affd 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -18,6 +18,8 @@
 #define KVM_ARM_VGIC_V2   (1 << 0)
 #define KVM_ARM_VGIC_V3   (1 << 1)
 
+#define KVM_REG_ARM_ID_AA64DFR0_EL1     ARM64_SYS_REG(3, 0, 0, 5, 0)
+
 /**
  * kvm_arm_init_debug() - initialize guest debug capabilities
  * @s: KVMState
-- 
Gitee


From 4febb6917e0e09279c86ce1679566bb9bc63b0df Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 7 Dec 2023 17:32:13 +0000
Subject: [PATCH 24/40] target/arm/cpu: Set number of PMU counters in KVM

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/22f6eef79582fc88a779bc5baa502bcd6e592f8f

Add a "num-pmu-counters" CPU parameter to configure the number of
counters that KVM presents to the guest. This is needed for Realm VMs,
whose parameters include the number of PMU counters and influence the
Realm Initial Measurement.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      target/arm/arm-qmp-cmds.c
      target/arm/kvm.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 target/arm/arm-qmp-cmds.c |  2 +-
 target/arm/cpu.h          |  3 +++
 target/arm/cpu64.c        | 41 +++++++++++++++++++++++++++++++++++++++
 target/arm/kvm.c          | 32 ++++++++++++++++++++++++++++++
 target/arm/kvm64.c        |  2 +-
 target/arm/kvm_arm.h      |  1 +
 6 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c
index 98b3498428..d201d319bd 100644
--- a/target/arm/arm-qmp-cmds.c
+++ b/target/arm/arm-qmp-cmds.c
@@ -96,7 +96,7 @@ static const char *cpu_model_advertised_features[] = {
     "sve1408", "sve1536", "sve1664", "sve1792", "sve1920", "sve2048",
     "kvm-no-adjvtime", "kvm-steal-time",
     "pauth", "pauth-impdef", "pauth-qarma3",
-    "num-breakpoints", "num-watchpoints",
+    "num-breakpoints", "num-watchpoints", "num-pmu-counters",
     NULL
 };
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 223d8abd8a..cb546a93e2 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1128,6 +1128,7 @@ struct ArchCPU {
     /* Allows to override the default configuration */
     uint8_t num_bps;
     uint8_t num_wps;
+    int8_t num_pmu_ctrs;
 };
 
 typedef struct ARMCPUInfo {
@@ -2477,6 +2478,8 @@ FIELD(MFAR, FPA, 12, 40)
 FIELD(MFAR, NSE, 62, 1)
 FIELD(MFAR, NS, 63, 1)
 
+FIELD(PMCR, N, 11, 5)
+
 QEMU_BUILD_BUG_ON(ARRAY_SIZE(((ARMCPU *)0)->ccsidr) <= R_V7M_CSSELR_INDEX_MASK);
 
 /* If adding a feature bit which corresponds to a Linux ELF
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index c0edffb679..4cf8446b6e 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -643,12 +643,53 @@ static void arm_cpu_set_num_bps(Object *obj, Visitor *v, const char *name,
     cpu->num_bps = val;
 }
 
+static void arm_cpu_get_num_pmu_ctrs(Object *obj, Visitor *v, const char *name,
+                                     void *opaque, Error **errp)
+{
+    uint8_t val;
+    ARMCPU *cpu = ARM_CPU(obj);
+
+    if (cpu->num_pmu_ctrs == -1) {
+        val = FIELD_EX64(cpu->isar.reset_pmcr_el0, PMCR, N);
+    } else {
+        val = cpu->num_pmu_ctrs;
+    }
+
+    visit_type_uint8(v, name, &val, errp);
+}
+
+static void arm_cpu_set_num_pmu_ctrs(Object *obj, Visitor *v, const char *name,
+                                     void *opaque, Error **errp)
+{
+    uint8_t val;
+    ARMCPU *cpu = ARM_CPU(obj);
+    uint8_t max_ctrs = FIELD_EX64(cpu->isar.reset_pmcr_el0, PMCR, N);
+
+    if (!visit_type_uint8(v, name, &val, errp)) {
+        return;
+    }
+
+    if (val > max_ctrs) {
+        error_setg(errp, "invalid number of PMU counters");
+        return;
+    }
+
+    cpu->num_pmu_ctrs = val;
+}
+
 static void aarch64_add_kvm_writable_properties(Object *obj)
 {
+    ARMCPU *cpu = ARM_CPU(obj);
+
     object_property_add(obj, "num-breakpoints", "uint8", arm_cpu_get_num_bps,
                         arm_cpu_set_num_bps, NULL, NULL);
     object_property_add(obj, "num-watchpoints", "uint8", arm_cpu_get_num_wps,
                         arm_cpu_set_num_wps, NULL, NULL);
+
+    cpu->num_pmu_ctrs = -1;
+    object_property_add(obj, "num-pmu-counters", "uint8",
+                        arm_cpu_get_num_pmu_ctrs, arm_cpu_set_num_pmu_ctrs,
+                        NULL, NULL);
 }
 #endif /* CONFIG_KVM */
 
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index bf17da37e5..f45783a9da 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -724,9 +724,41 @@ static void kvm_arm_configure_aa64dfr0(ARMCPU *cpu)
     }
 }
 
+static void kvm_arm_configure_pmcr(ARMCPU *cpu)
+{
+    int ret;
+    uint64_t val, newval;
+    CPUState *cs = CPU(cpu);
+
+    if (cpu->num_pmu_ctrs == -1) {
+        return;
+    }
+
+    newval = FIELD_DP64(cpu->isar.reset_pmcr_el0, PMCR, N, cpu->num_pmu_ctrs);
+    ret = kvm_set_one_reg(cs, KVM_REG_ARM_PMCR_EL0, &newval);
+    if (ret) {
+        error_report("Failed to set KVM_REG_ARM_PMCR_EL0");
+        return;
+    }
+
+    /*
+     * Check if the write succeeded, since older versions of KVM ignore it.
+     */
+    ret = kvm_get_one_reg(cs, KVM_REG_ARM_PMCR_EL0, &val);
+    if (ret) {
+        error_report("Failed to get KVM_REG_ARM_PMCR_EL0");
+        return;
+    }
+
+    if (val != newval) {
+        error_report("Failed to update KVM_REG_ARM_PMCR_EL0");
+    }
+}
+
 static void kvm_arm_configure_vcpu_regs(ARMCPU *cpu)
 {
     kvm_arm_configure_aa64dfr0(cpu);
+    kvm_arm_configure_pmcr(cpu);
 }
 
 void kvm_arm_reset_vcpu(ARMCPU *cpu)
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index e84bc9f94d..6a8aad0f06 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -438,7 +438,7 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
         if (pmu_supported) {
             /* PMCR_EL0 is only accessible if the vCPU has feature PMU_V3 */
             err |= read_sys_reg64(fdarray[2], &ahcf->isar.reset_pmcr_el0,
-                                  ARM64_SYS_REG(3, 3, 9, 12, 0));
+                                  KVM_REG_ARM_PMCR_EL0);
         }
 
         if (sve_supported) {
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 63b5d9affd..4a9707a435 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -19,6 +19,7 @@
 #define KVM_ARM_VGIC_V3   (1 << 1)
 
 #define KVM_REG_ARM_ID_AA64DFR0_EL1     ARM64_SYS_REG(3, 0, 0, 5, 0)
+#define KVM_REG_ARM_PMCR_EL0            ARM64_SYS_REG(3, 3, 9, 12, 0)
 
 /**
  * kvm_arm_init_debug() - initialize guest debug capabilities
-- 
Gitee


From 21bfc55d5d2580bcf61e174c95cd3fe27c608b27 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Tue, 7 Feb 2023 13:05:40 +0000
Subject: [PATCH 25/40] target/arm/cpu: Inform about reading confidential CPU
 registers

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/f7dbc9b0e0677feabac408bed8fb9fcbd9b946c3

The host cannot access registers of a Realm. Instead of showing all
registers as zero in "info registers", display a message about this
restriction.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 target/arm/cpu.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 09d391bd34..3de2e1a3c3 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1082,6 +1082,11 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, int flags)
     const char *ns_status;
     bool sve;
 
+    if (cpu->kvm_rme) {
+        qemu_fprintf(f, "the CPU registers are confidential to the realm\n");
+        return;
+    }
+
     qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
     for (i = 0; i < 32; i++) {
         if (i == 31) {
-- 
Gitee


From 8f73dd3647c1ea8255c3fbd809ded08d30cbe746 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Mon, 6 Feb 2023 16:49:25 +0000
Subject: [PATCH 26/40] hw/arm/virt: Add support for Arm RME

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/6e0e9f49e9bdf22e4bd06d3506b9abc63c927b85

When confidential-guest-support is enabled for the virt machine, add the
RME flag to the VM type.

The HVC conduit for PSCI is not supported for Realms.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      hw/arm/virt.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/arm/virt.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a43f18020c..ec4faab9dc 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -260,6 +260,11 @@ static bool cpu_type_valid(const char *cpu)
     return false;
 }
 
+static bool virt_machine_is_confidential(VirtMachineState *vms)
+{
+    return MACHINE(vms)->cgs;
+}
+
 static void create_randomness(MachineState *ms, const char *node)
 {
     struct {
@@ -2610,10 +2615,12 @@ static void machvirt_init(MachineState *machine)
      * if the guest has EL2 then we will use SMC as the conduit,
      * and otherwise we will use HVC (for backwards compatibility and
      * because if we're using KVM then we must use HVC).
+     * Realm guests must also use SMC.
      */
     if (vms->secure && firmware_loaded) {
         vms->psci_conduit = QEMU_PSCI_CONDUIT_DISABLED;
-    } else if (vms->virt || virtcca_cvm_enabled()) {
+    } else if (vms->virt || virtcca_cvm_enabled() ||
+		    virt_machine_is_confidential(vms)) {
         vms->psci_conduit = QEMU_PSCI_CONDUIT_SMC;
     } else {
         vms->psci_conduit = QEMU_PSCI_CONDUIT_HVC;
@@ -3813,6 +3820,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
             virtcca_cvm_type = VIRTCCA_CVM_TYPE;
         }
     }
+    int rme_vm_type = kvm_arm_rme_vm_type(ms), type;
     int max_vm_pa_size, requested_pa_size;
     bool fixed_ipa;
 
@@ -3842,9 +3850,12 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
      * the implicit legacy 40b IPA setting, in which case the kvm_type
      * must be 0.
      */
-    return strcmp(type_str, "cvm") == 0 ?
-        ((fixed_ipa ? 0 : requested_pa_size) | virtcca_cvm_type) :
-        (fixed_ipa ? 0 : requested_pa_size);
+    type = strcmp(type_str, "cvm") == 0 ? virtcca_cvm_type : 0;
+    if (fixed_ipa) {
+        return type;
+    }
+
+    return requested_pa_size | rme_vm_type | type;
 }
 
 static void virt_machine_class_init(ObjectClass *oc, void *data)
-- 
Gitee


From 8796ed125a4e424df483e2059eab2b4fa7f88f8d Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Mon, 6 Feb 2023 16:52:37 +0000
Subject: [PATCH 27/40] hw/arm/virt: Disable DTB randomness for confidential
 VMs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/68a0501d8fbf67b2828c262e8aa296820a1b32a1

The dtb-randomness feature, which adds random seeds to the DTB, isn't
really compatible with confidential VMs since it randomizes the Realm
Initial Measurement. Enabling it is not an error, but it prevents
attestation. It also isn't useful to a Realm, which doesn't trust host
input.

Currently the feature is automatically enabled, unless the user disables
it on the command-line. Change it to OnOffAuto, and automatically
disable it for confidential VMs, unless the user explicitly enables it.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Conflicts:
      hw/arm/virt.c
      include/hw/arm/virt.h
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 docs/system/arm/virt.rst |  9 +++++----
 hw/arm/virt.c            | 41 +++++++++++++++++++++++++---------------
 include/hw/arm/virt.h    |  2 +-
 3 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index 7c4c80180c..0ba6d8610f 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -153,10 +153,11 @@ dtb-randomness
   rng-seed and kaslr-seed nodes (in both "/chosen" and
   "/secure-chosen") to use for features like the random number
   generator and address space randomisation. The default is
-  ``on``. You will want to disable it if your trusted boot chain
-  will verify the DTB it is passed, since this option causes the
-  DTB to be non-deterministic. It would be the responsibility of
-  the firmware to come up with a seed and pass it on if it wants to.
+  ``off`` for confidential VMs, and ``on`` otherwise. You will want
+  to disable it if your trusted boot chain will verify the DTB it is
+  passed, since this option causes the DTB to be non-deterministic.
+  It would be the responsibility of the firmware to come up with a
+  seed and pass it on if it wants to.
 
 dtb-kaslr-seed
   A deprecated synonym for dtb-randomness.
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index ec4faab9dc..66d2d68944 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -281,6 +281,7 @@ static void create_randomness(MachineState *ms, const char *node)
 
 static void create_fdt(VirtMachineState *vms)
 {
+    bool dtb_randomness = true;
     MachineState *ms = MACHINE(vms);
     int nb_numa_nodes = ms->numa_state->num_nodes;
     void *fdt = create_device_tree(&vms->fdt_size);
@@ -290,6 +291,16 @@ static void create_fdt(VirtMachineState *vms)
         exit(1);
     }
 
+    /*
+     * Including random data in the DTB causes random intial measurement on CCA,
+     * so disable it for confidential VMs.
+     */
+    if (vms->dtb_randomness == ON_OFF_AUTO_OFF ||
+        (vms->dtb_randomness == ON_OFF_AUTO_AUTO &&
+         virt_machine_is_confidential(vms))) {
+        dtb_randomness = false;
+    }
+
     ms->fdt = fdt;
 
     /* Header */
@@ -306,7 +317,7 @@ static void create_fdt(VirtMachineState *vms)
         kvm_type = object_property_get_str(OBJECT(current_machine),
                                            "kvm-type", &error_abort);
     }
-    if (vms->dtb_randomness) {
+    if (dtb_randomness) {
         if (!(kvm_type && !strcmp(kvm_type, "cvm"))) {
             create_randomness(ms, "/chosen");
         }
@@ -314,7 +325,7 @@ static void create_fdt(VirtMachineState *vms)
 
     if (vms->secure) {
         qemu_fdt_add_subnode(fdt, "/secure-chosen");
-        if (vms->dtb_randomness) {
+        if (dtb_randomness) {
             create_randomness(ms, "/secure-chosen");
         }
     }
@@ -2998,18 +3009,21 @@ static void virt_set_its(Object *obj, bool value, Error **errp)
     vms->its = value;
 }
 
-static bool virt_get_dtb_randomness(Object *obj, Error **errp)
+static void virt_get_dtb_randomness(Object *obj, Visitor *v, const char *name,
+                                    void *opaque, Error **errp)
 {
     VirtMachineState *vms = VIRT_MACHINE(obj);
+    OnOffAuto dtb_randomness = vms->dtb_randomness;
 
-    return vms->dtb_randomness;
+    visit_type_OnOffAuto(v, name, &dtb_randomness, errp);
 }
 
-static void virt_set_dtb_randomness(Object *obj, bool value, Error **errp)
+static void virt_set_dtb_randomness(Object *obj, Visitor *v, const char *name,
+                                    void *opaque, Error **errp)
 {
     VirtMachineState *vms = VIRT_MACHINE(obj);
 
-    vms->dtb_randomness = value;
+    visit_type_OnOffAuto(v, name, &vms->dtb_randomness, errp);
 }
 
 static char *virt_get_oem_id(Object *obj, Error **errp)
@@ -3996,16 +4010,16 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
                                           "Set on/off to enable/disable "
                                           "ITS instantiation");
 
-    object_class_property_add_bool(oc, "dtb-randomness",
-                                   virt_get_dtb_randomness,
-                                   virt_set_dtb_randomness);
+    object_class_property_add(oc, "dtb-randomness", "OnOffAuto",
+                              virt_get_dtb_randomness, virt_set_dtb_randomness,
+                              NULL, NULL);
     object_class_property_set_description(oc, "dtb-randomness",
                                           "Set off to disable passing random or "
                                           "non-deterministic dtb nodes to guest");
 
-    object_class_property_add_bool(oc, "dtb-kaslr-seed",
-                                   virt_get_dtb_randomness,
-                                   virt_set_dtb_randomness);
+    object_class_property_add(oc, "dtb-kaslr-seed", "OnOffAuto",
+                              virt_get_dtb_randomness, virt_set_dtb_randomness,
+                              NULL, NULL);
     object_class_property_set_description(oc, "dtb-kaslr-seed",
                                           "Deprecated synonym of dtb-randomness");
 
@@ -4092,9 +4106,6 @@ static void virt_instance_init(Object *obj)
     /* MTE is disabled by default.  */
     vms->mte = false;
 
-    /* Supply kaslr-seed and rng-seed by default */
-    vms->dtb_randomness = true;
-
     vms->irqmap = a15irqmap;
 
     virt_flash_create(vms);
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 3e2759d225..9b43e72aac 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -225,7 +225,7 @@ struct VirtMachineState {
     bool cpu_hotplug_enabled;
     bool ras;
     bool mte;
-    bool dtb_randomness;
+    OnOffAuto dtb_randomness;
     bool pmu;
     int smmu_accel_count;
     OnOffAuto acpi;
-- 
Gitee


From 726dbebf1dc71cf4ede0f0bf6ea049639d93c00d Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Mon, 6 Feb 2023 16:56:39 +0000
Subject: [PATCH 28/40] hw/arm/virt: Reserve one bit of guest-physical address
 for RME

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/ebffee632eb86b3423ac08a264ea0edc5cf97ead

When RME is enabled, the upper GPA bit is used to distinguish protected
from unprotected addresses. Reserve it when setting up the guest memory
map.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/arm/virt.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 66d2d68944..51f7c940f4 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3836,14 +3836,24 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
     }
     int rme_vm_type = kvm_arm_rme_vm_type(ms), type;
     int max_vm_pa_size, requested_pa_size;
+    int rme_reserve_bit = 0;
     bool fixed_ipa;
 
-    max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa);
+    if (rme_vm_type) {
+        /*
+         * With RME, the upper GPA bit differentiates Realm from NS memory.
+         * Reserve the upper bit to ensure that highmem devices will fit.
+         */
+        rme_reserve_bit = 1;
+    }
+
+    max_vm_pa_size = kvm_arm_get_max_vm_ipa_size(ms, &fixed_ipa) -
+                     rme_reserve_bit;
 
     /* we freeze the memory map to compute the highest gpa */
     virt_set_memmap(vms, max_vm_pa_size);
 
-    requested_pa_size = 64 - clz64(vms->highest_gpa);
+    requested_pa_size = 64 - clz64(vms->highest_gpa) + rme_reserve_bit;
 
     /*
      * KVM requires the IPA size to be at least 32 bits.
@@ -3852,11 +3862,11 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
         requested_pa_size = 32;
     }
 
-    if (requested_pa_size > max_vm_pa_size) {
+    if (requested_pa_size > max_vm_pa_size + rme_reserve_bit) {
         error_report("-m and ,maxmem option values "
                      "require an IPA range (%d bits) larger than "
                      "the one supported by the host (%d bits)",
-                     requested_pa_size, max_vm_pa_size);
+                     requested_pa_size, max_vm_pa_size + rme_reserve_bit);
         return -1;
     }
     /*
-- 
Gitee


From 080ba1535c68e2d819dc8e7597aa941f478d0296 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Wed, 14 Jun 2023 16:36:52 +0100
Subject: [PATCH 29/40] hw/arm/boot: Mark all guest memory as RIPAS_RAM.

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/7dd79c57dd097f2de2cb4c3ce428dad78ca452f3

All Realm IPA states are by default RIPAS_EMPTY, and accessing them in
that state causes injection of synchronous exception. Either the loader
or the guest needs to set IPA state to RIPAS_RAM before accessing it.
Since a Linux guest needs all memory ready at boot [1], initialize it
here.

[1] https://docs.kernel.org/arch/arm64/booting.html
    https://lore.kernel.org/all/20241004144307.66199-12-steven.price@arm.com/

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/arm/boot.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 9a33601d35..1e931d91d3 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -1330,6 +1330,9 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info)
         }
     }
 
+    /* Mark all Realm memory as RAM */
+    kvm_arm_rme_init_guest_ram(info->loader_start, info->ram_size);
+
     /* Load the kernel.  */
     if (!info->kernel_filename || info->firmware_loaded) {
         arm_setup_firmware_boot(cpu, info, ms->firmware);
-- 
Gitee


From 3b1146d0a9d5e7a31e84b1c26b7331c84d0b5b05 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Wed, 8 Jan 2025 17:34:11 +0000
Subject: [PATCH 30/40] target/arm/kvm-rme: Add DMA remapping for the shared
 memory region

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/1efc2744bf6ac5fc074baedd42d3d40ed73c6405

In Arm CCA, the guest-physical address space is split in half. The top
half represents memory shared between guest and host, and the bottom
half is private to the guest. From QEMU's point of view, the two halves
are merged into a single region, and pages within this region are either
shared or private.

Addresses used by device DMA can potentially target both halves.
Physical devices assigned to the VM access the top half, until they are
authenticated using features like PCIe CMA-SPDM at which point they can
also access memory private to the guest.

Virtual devices implemented by the host are only allowed to access the
top half. For emulated MMIO, KVM strips the GPA before returning to
QEMU, so the GPA already belongs to QEMU's merged view of guest memory.
However DMA addresses cannot be stripped this way and need special
handling by the VMM:

* When emulating DMA the VMM needs to translate the addresses into its
  merged view. Add an IOMMU memory region on the top half, that
  retargets DMA accesses to the merged sysmem.

* when creating IOMMU mappings for (unauthenticated) VFIO devices, the VMM
  needs to map the top half of guest-physical addresses to the shared pages.
  Install RAM discard listeners that issue IOMMU map and unmap requests
  to IOMMU listeners such as VFIO.

The resulting mtree looks like this:

  address-space: vfio-pci
    0000000000000000-ffffffffffffffff (prio 0, i/o): bus master container
      0000000000000000-000001ffffffffff (prio 0, i/o): alias bus master @realm-dma-region 0000000000000000-000001ffffffffff

  memory-region: realm-dma-region
    0000000000000000-000001ffffffffff (prio 0, i/o): realm-dma-region

There are at least two problems with this approach: given that we use
the PCI bus master address space, a vIOMMU cannot install its own
address space at the moment. And since sysbus devices can't have an
IOMMU at the moment, DMA from non-PCI devices isn't supported.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/arm/virt.c        |   2 +
 target/arm/kvm-rme.c | 222 +++++++++++++++++++++++++++++++++++++++++++
 target/arm/kvm_arm.h |  15 +++
 3 files changed, 239 insertions(+)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 51f7c940f4..95f6acf655 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2880,6 +2880,8 @@ static void machvirt_init(MachineState *machine)
                                vms->fw_cfg, OBJECT(vms));
     }
 
+    kvm_arm_rme_init_gpa_space(vms->highest_gpa, vms->bus);
+
     vms->bootinfo.ram_size = machine->ram_size;
     vms->bootinfo.board_id = -1;
     vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base;
diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c
index 5e785fa3b6..299af009d9 100644
--- a/target/arm/kvm-rme.c
+++ b/target/arm/kvm-rme.c
@@ -9,6 +9,7 @@
 #include "hw/boards.h"
 #include "hw/core/cpu.h"
 #include "hw/loader.h"
+#include "hw/pci/pci.h"
 #include "kvm_arm.h"
 #include "migration/blocker.h"
 #include "qapi/error.h"
@@ -24,6 +25,35 @@ OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST)
 
 #define RME_PAGE_SIZE qemu_real_host_page_size()
 
+/*
+ * Realms have a split guest-physical address space: the bottom half is private
+ * to the realm, and the top half is shared with the host. Within QEMU, we use a
+ * merged view of both halves. Most of RAM is private to the guest and not
+ * accessible to us, but the guest shares some pages with us.
+ *
+ * For DMA, devices generally target the shared half (top) of the guest address
+ * space. Only the devices trusted by the guest (using mechanisms like TDISP for
+ * device authentication) can access the bottom half.
+ *
+ * RealmDmaRegion performs remapping of top-half accesses to system memory.
+ */
+struct RealmDmaRegion {
+    IOMMUMemoryRegion parent_obj;
+};
+
+#define TYPE_REALM_DMA_REGION "realm-dma-region"
+OBJECT_DECLARE_SIMPLE_TYPE(RealmDmaRegion, REALM_DMA_REGION)
+OBJECT_DEFINE_SIMPLE_TYPE(RealmDmaRegion, realm_dma_region,
+                          REALM_DMA_REGION, IOMMU_MEMORY_REGION);
+
+typedef struct RealmPrivateSharedListener {
+    MemoryRegion *mr;
+    hwaddr offset_within_region;
+    uint64_t granularity;
+    PrivateSharedListener listener;
+    QLIST_ENTRY(RealmPrivateSharedListener) rpsl_next;
+} RealmPrivateSharedListener;
+
 typedef struct {
     hwaddr base;
     hwaddr size;
@@ -39,6 +69,12 @@ struct RmeGuest {
     RmeGuestMeasurementAlgorithm measurement_algo;
 
     RmeRamRegion init_ram;
+    uint8_t ipa_bits;
+
+    RealmDmaRegion *dma_region;
+    QLIST_HEAD(, RealmPrivateSharedListener) ram_discard_list;
+    MemoryListener memory_listener;
+    AddressSpace dma_as;
 };
 
 OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST,
@@ -305,6 +341,7 @@ static void rme_guest_init(Object *obj)
 
 static void rme_guest_finalize(Object *obj)
 {
+    memory_listener_unregister(&rme_guest->memory_listener);
 }
 
 static gint rme_compare_ram_regions(gconstpointer a, gconstpointer b)
@@ -404,3 +441,188 @@ int kvm_arm_rme_vm_type(MachineState *ms)
     }
     return 0;
 }
+
+static int rme_ram_discard_notify(StateChangeListener *scl,
+                                  MemoryRegionSection *section,
+                                  bool populate)
+{
+    hwaddr gpa, next;
+    IOMMUTLBEvent event;
+    const hwaddr end = section->offset_within_address_space +
+                       int128_get64(section->size);
+    const hwaddr address_mask = MAKE_64BIT_MASK(0, rme_guest->ipa_bits - 1);
+    PrivateSharedListener *psl = container_of(scl, PrivateSharedListener, scl);
+    RealmPrivateSharedListener *rpsl = container_of(psl, RealmPrivateSharedListener,
+                                                 listener);
+
+    assert(rme_guest->dma_region != NULL);
+
+    event.type = populate ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP;
+    event.entry.target_as = &address_space_memory;
+    event.entry.perm = populate ? IOMMU_RW : IOMMU_NONE;
+    event.entry.addr_mask = rpsl->granularity - 1;
+
+    assert(end <= address_mask);
+
+    /*
+     * Create IOMMU mappings from the top half of the address space to the RAM
+     * region.
+     */
+    for (gpa = section->offset_within_address_space; gpa < end; gpa = next) {
+        event.entry.iova = gpa + address_mask + 1;
+        event.entry.translated_addr = gpa;
+        memory_region_notify_iommu(IOMMU_MEMORY_REGION(rme_guest->dma_region),
+                                   0, event);
+
+        next = ROUND_UP(gpa + 1, rpsl->granularity);
+        next = MIN(next, end);
+    }
+
+    return 0;
+}
+
+static int rme_ram_discard_notify_populate(StateChangeListener *scl,
+                                           MemoryRegionSection *section)
+{
+    return rme_ram_discard_notify(scl, section, /* populate */ true);
+}
+
+static int rme_ram_discard_notify_discard(StateChangeListener *scl,
+                                           MemoryRegionSection *section)
+{
+    return rme_ram_discard_notify(scl, section, /* populate */ false);
+}
+
+/* Install a RAM discard listener */
+static void rme_listener_region_add(MemoryListener *listener,
+                                    MemoryRegionSection *section)
+{
+    RealmPrivateSharedListener *rpsl;
+    GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr);
+
+
+    if (!gsm) {
+        return;
+    }
+
+    rpsl = g_new0(RealmPrivateSharedListener, 1);
+    rpsl->mr = section->mr;
+    rpsl->offset_within_region = section->offset_within_region;
+    rpsl->granularity = generic_state_manager_get_min_granularity(gsm,
+                                                                  section->mr);
+    QLIST_INSERT_HEAD(&rme_guest->ram_discard_list, rpsl, rpsl_next);
+
+    private_shared_listener_init(&rpsl->listener,
+                                 rme_ram_discard_notify_populate,
+                                 rme_ram_discard_notify_discard, true);
+    generic_state_manager_register_listener(gsm, &rpsl->listener.scl, section);
+}
+
+static void rme_listener_region_del(MemoryListener *listener,
+                                    MemoryRegionSection *section)
+{
+    RealmPrivateSharedListener *rpsl;
+    GenericStateManager *gsm = memory_region_get_generic_state_manager(section->mr);
+
+    if (!gsm) {
+        return;
+    }
+
+    QLIST_FOREACH(rpsl, &rme_guest->ram_discard_list, rpsl_next) {
+        if (MEMORY_REGION(rpsl->mr) == section->mr &&
+            rpsl->offset_within_region == section->offset_within_region) {
+            generic_state_manager_unregister_listener(gsm, &rpsl->listener.scl);
+            g_free(rpsl);
+            break;
+        }
+    }
+}
+
+static AddressSpace *rme_dma_get_address_space(PCIBus *bus, void *opaque,
+                                               int devfn)
+{
+    return &rme_guest->dma_as;
+}
+
+static const PCIIOMMUOps rme_dma_ops = {
+    .get_address_space = rme_dma_get_address_space,
+};
+
+void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, PCIBus *pci_bus)
+{
+    RealmDmaRegion *dma_region;
+    const unsigned int ipa_bits = 64 - clz64(highest_gpa) + 1;
+
+    if (!rme_guest) {
+        return;
+    }
+
+    assert(ipa_bits < 64);
+
+    /*
+     * Setup a DMA translation from the shared top half of the guest-physical
+     * address space to our merged view of RAM.
+     */
+    dma_region = g_new0(RealmDmaRegion, 1);
+
+    memory_region_init_iommu(dma_region, sizeof(*dma_region),
+                             TYPE_REALM_DMA_REGION, OBJECT(rme_guest),
+                             "realm-dma-region", 1ULL << ipa_bits);
+    address_space_init(&rme_guest->dma_as, MEMORY_REGION(dma_region),
+                       TYPE_REALM_DMA_REGION);
+    rme_guest->dma_region = dma_region;
+
+    pci_setup_iommu(pci_bus, &rme_dma_ops, NULL);
+
+    /*
+     * Install notifiers to forward RAM discard changes to the IOMMU notifiers
+     * (ie. tell VFIO to map shared pages and unmap private ones).
+     */
+    rme_guest->memory_listener = (MemoryListener) {
+        .name = "rme",
+        .region_add = rme_listener_region_add,
+        .region_del = rme_listener_region_del,
+    };
+    memory_listener_register(&rme_guest->memory_listener,
+                             &address_space_memory);
+
+    rme_guest->ipa_bits = ipa_bits;
+}
+
+static void realm_dma_region_init(Object *obj)
+{
+}
+
+static IOMMUTLBEntry realm_dma_region_translate(IOMMUMemoryRegion *mr,
+                                                hwaddr addr,
+                                                IOMMUAccessFlags flag,
+                                                int iommu_idx)
+{
+    const hwaddr address_mask = MAKE_64BIT_MASK(0, rme_guest->ipa_bits - 1);
+    IOMMUTLBEntry entry = {
+        .target_as = &address_space_memory,
+        .iova = addr,
+        .translated_addr = addr & address_mask,
+        .addr_mask = address_mask,
+        .perm = IOMMU_RW,
+    };
+
+    return entry;
+}
+
+static void realm_dma_region_replay(IOMMUMemoryRegion *mr, IOMMUNotifier *n)
+{
+    /* Nothing is shared at boot */
+}
+
+static void realm_dma_region_finalize(Object *obj)
+{
+}
+
+static void realm_dma_region_class_init(ObjectClass *oc, void *data)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(oc);
+
+    imrc->translate = realm_dma_region_translate;
+    imrc->replay = realm_dma_region_replay;
+}
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index 4a9707a435..b4d54e816f 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -441,6 +441,16 @@ int kvm_arm_rme_vcpu_init(CPUState *cs);
  */
 void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size);
 
+/**
+ * kvm_arm_rme_setup_gpa
+ * @highest_gpa: highest address of the lower half of the guest address space
+ * @pci_bus: The main PCI bus, for which PCI queries DMA address spaces
+ *
+ * Setup the guest-physical address space for a Realm. Install a memory region
+ * and notifier to manage the shared upper half of the address space.
+ */
+void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, PCIBus *pci_bus);
+
 #else
 
 /*
@@ -471,6 +481,11 @@ static inline void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size)
 {
 }
 
+static inline void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa,
+                                              PCIBus *pci_bus)
+{
+}
+
 /*
  * These functions should never actually be called without KVM support.
  */
-- 
Gitee


From ddf23b6f58d3c605a083ad3f09388dcb6edf729e Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Fri, 12 Aug 2022 11:53:11 +0100
Subject: [PATCH 31/40] hw/arm/virt: Move virt_flash_create() to
 machvirt_init()

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/b7d6407b658327eb0be8a3014a63f84f58406043

For confidential VMs we'll want to skip flash device creation.
Unfortunately, in virt_instance_init() the machine->cgs member has not
yet been initialized, so we cannot check whether confidential guest is
enabled. Move virt_flash_create() to machvirt_init(), where we can
access the machine->cgs member.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      hw/arm/virt.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/arm/virt.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 95f6acf655..116c3ddbf0 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2572,6 +2572,7 @@ static void machvirt_init(MachineState *machine)
     }
 
     finalize_gic_version(vms);
+    virt_flash_create(vms);
 
     possible_cpus = mc->possible_cpu_arch_ids(machine);
 
@@ -4120,8 +4121,6 @@ static void virt_instance_init(Object *obj)
 
     vms->irqmap = a15irqmap;
 
-    virt_flash_create(vms);
-
     vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
     vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
 
-- 
Gitee


From 2e0ea64c8643318f8824040b010f0b2421efbd33 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Fri, 12 Aug 2022 12:08:58 +0100
Subject: [PATCH 32/40] hw/arm/virt: Use RAM instead of flash for confidential
 guest firmware

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/a8d4685f97e63dac012647cc3f9b1d830b784f8c

The flash device that holds firmware code relies on read-only stage-2
mappings. Read accesses behave as RAM and write accesses as MMIO. Since
the RMM does not support read-only mappings we cannot use the flash
device as-is.

That isn't a problem because the firmware does not want to disclose any
information to the host, hence will not store its variables in clear
persistent memory. We can therefore replace the flash device with RAM,
and load the firmware there.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      hw/arm/boot.c
      hw/arm/virt.c
      include/hw/arm/boot.h
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/arm/virt.c         | 20 +++++++++++++++++++-
 include/hw/arm/boot.h |  5 +++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 116c3ddbf0..8423912c89 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1407,6 +1407,10 @@ static PFlashCFI01 *virt_flash_create1(VirtMachineState *vms,
 
 static void virt_flash_create(VirtMachineState *vms)
 {
+    if (virt_machine_is_confidential(vms)) {
+        return;
+    }
+
     vms->flash[0] = virt_flash_create1(vms, "virt.flash0", "pflash0");
     vms->flash[1] = virt_flash_create1(vms, "virt.flash1", "pflash1");
 }
@@ -1445,6 +1449,10 @@ static void virt_flash_map(VirtMachineState *vms,
     hwaddr flashsize = vms->memmap[VIRT_FLASH].size / 2;
     hwaddr flashbase = vms->memmap[VIRT_FLASH].base;
 
+    if (virt_machine_is_confidential(vms)) {
+        return;
+    }
+
     virt_flash_map1(vms->flash[0], flashbase, flashsize,
                     secure_sysmem);
     virt_flash_map1(vms->flash[1], flashbase + flashsize, flashsize,
@@ -1460,7 +1468,7 @@ static void virt_flash_fdt(VirtMachineState *vms,
     MachineState *ms = MACHINE(vms);
     char *nodename;
 
-    if (virtcca_cvm_enabled()) {
+    if (virtcca_cvm_enabled() || virt_machine_is_confidential(vms)) {
         return;
     }
 
@@ -1524,6 +1532,15 @@ static bool virt_firmware_init(VirtMachineState *vms,
     const char *bios_name;
     BlockBackend *pflash_blk0;
 
+    /*
+     * For a confidential VM, the firmware image and any boot information,
+     * including EFI variables, are stored in RAM in order to be measurable and
+     * private. Create a RAM region and load the firmware image there.
+     */
+    if (virt_machine_is_confidential(vms)) {
+        return virt_confidential_firmware_init(vms, sysmem);
+    }
+
     /* Map legacy -drive if=pflash to machine properties */
     for (i = 0; i < ARRAY_SIZE(vms->flash); i++) {
         pflash_cfi01_legacy_drive(vms->flash[i],
@@ -2893,6 +2910,7 @@ static void machvirt_init(MachineState *machine)
     vms->bootinfo.firmware_max_size = vms->memmap[VIRT_FLASH].size;
     vms->bootinfo.confidential = virtcca_cvm_enabled();
     vms->bootinfo.psci_conduit = vms->psci_conduit;
+    vms->bootinfo.confidential = virt_machine_is_confidential(vms);
     arm_load_kernel(ARM_CPU(first_cpu), machine, &vms->bootinfo);
 
     vms->machine_done.notify = virt_machine_done;
diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h
index 06ca1d90b2..0cbae4685b 100644
--- a/include/hw/arm/boot.h
+++ b/include/hw/arm/boot.h
@@ -133,8 +133,13 @@ struct arm_boot_info {
     bool secure_board_setup;
 
     arm_endianness endianness;
+
+    /* Used when loading firmware into RAM */
     hwaddr firmware_base;
     hwaddr firmware_max_size;
+    /*
+     * Confidential guest boot loads everything into RAM so it can be measured.
+     */
     bool confidential;
 };
 
-- 
Gitee


From e8055696aa1d0ee3fab298fb3605473f285c9cc6 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Wed, 16 Apr 2025 13:40:08 +0100
Subject: [PATCH 33/40] docs/interop/firmware.json: Add arm-rme firmware
 feature

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/b547ad23843a33030968a51e547d0e2ff875086b

Some distributions provide packages continaing firmware to be run under
QEMU, such as "qemu-efi-aarch64" or "edk2-aarch64". Those packages also
contain descriptors in /usr/share/qemu/firmware/*.json listing the
firmware features, so that environments like libvirt can figure out
which firmware they can load.

Define an optional feature for arm64 firmware to indicate that a
firmware supports running in a Realm. Firmware implementations need
extra support for running in a Realm, in particular to distinguish
shared from private guest memory.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      docs/interop/firmware.json
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 docs/interop/firmware.json | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/interop/firmware.json b/docs/interop/firmware.json
index cc8f869186..08c2fbabe7 100644
--- a/docs/interop/firmware.json
+++ b/docs/interop/firmware.json
@@ -127,6 +127,9 @@
 #               options related to this feature are documented in
 #               "docs/system/i386/amd-memory-encryption.rst".
 #
+# @arm-rme: The firmware supports running in a Realm, under the Arm Realm
+#           Management Extension (RME).
+#
 # @intel-tdx: The firmware supports running under Intel Trust Domain
 #             Extensions (TDX).
 #
@@ -196,7 +199,7 @@
 { 'enum' : 'FirmwareFeature',
   'data' : [ 'acpi-s3', 'acpi-s4',
              'amd-sev', 'amd-sev-es', 'amd-sev-snp',
-             'intel-tdx',
+             'arm-rme', 'intel-tdx',
              'enrolled-keys', 'requires-smm', 'secure-boot',
              'verbose-dynamic', 'verbose-static' ] }
 
-- 
Gitee


From ac5a8a0a35b5f41a2b86f5b0681519123dc7da57 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Wed, 21 Feb 2024 13:58:14 +0000
Subject: [PATCH 34/40] hw/arm/boot: Load DTB as is for confidential VMs

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/2c85282f4b10b301880b5067834ef83ad368d50a

For confidential VMs it may be necessary to measure the DTB, to ensure a
malicious host does not insert harmful information in there. In case an
external tool can generated and measured the DTB, load it as is without
patching it.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/arm/boot.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 1e931d91d3..e2fbde1699 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -527,7 +527,14 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
     char **node_path;
     Error *err = NULL;
 
-    if (binfo->dtb_filename) {
+    if (binfo->dtb_filename && binfo->confidential) {
+        /*
+         * If the user is providing a DTB for a confidential VM, it is already
+         * tailored to this configuration and measured. Load it as is, without
+         * any modification.
+         */
+        return rom_add_file_fixed_as(binfo->dtb_filename, addr, -1, as);
+    } else if (binfo->dtb_filename) {
         char *filename;
         filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, binfo->dtb_filename);
         if (!filename) {
-- 
Gitee


From 215b18636f45a1ecdad8abba5db383075efa722b Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Fri, 26 Apr 2024 16:11:59 +0100
Subject: [PATCH 35/40] hw/arm/boot: Skip bootloader for confidential guests

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/79359e41a418cffbb2f2ae0314599a29d9f183a7

An independent verifier needs to reconstruct the content of guest memory
in order to attest that it is running trusted code. To avoid having to
reconstruct the bootloader generated by QEMU, skip this step and jump
directly to the kernel, with the DTB address in x0 as specified by the
Linux boot protocol [1].

[1] https://docs.kernel.org/arch/arm64/booting.html

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/arm/boot.c         | 23 +++++++++++++++++------
 hw/arm/virt.c         |  1 +
 include/hw/arm/boot.h |  6 ++++++
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index e2fbde1699..6980aebe1e 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -766,7 +766,13 @@ void do_cpu_reset(void *opaque)
             if (cs == first_cpu) {
                 AddressSpace *as = arm_boot_address_space(cpu, info);
 
-                cpu_set_pc(cs, info->loader_start);
+                if (info->skip_bootloader)  {
+                    assert(is_a64(env));
+                    env->xregs[0] = info->dtb_start;
+                    cpu_set_pc(cs, info->entry);
+                } else {
+                    cpu_set_pc(cs, info->loader_start);
+                }
 
                 if (!have_dtb(info)) {
                     if (old_param) {
@@ -858,7 +864,8 @@ static ssize_t arm_load_elf(struct arm_boot_info *info, uint64_t *pentry,
 }
 
 static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base,
-                                   hwaddr *entry, AddressSpace *as)
+                                   hwaddr *entry, AddressSpace *as,
+                                   bool skip_bootloader)
 {
     hwaddr kernel_load_offset = KERNEL64_LOAD_ADDR;
     uint64_t kernel_size = 0;
@@ -910,7 +917,8 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base,
              * bootloader, we can just load it starting at 2MB+offset rather
              * than 0MB + offset.
              */
-            if (kernel_load_offset < BOOTLOADER_MAX_SIZE) {
+            if (kernel_load_offset < BOOTLOADER_MAX_SIZE &&
+                !skip_bootloader) {
                 kernel_load_offset += 2 * MiB;
             }
         }
@@ -994,7 +1002,8 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
     }
     if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && kernel_size < 0) {
         kernel_size = load_aarch64_image(info->kernel_filename,
-                                         info->loader_start, &entry, as);
+                                         info->loader_start, &entry, as,
+                                         info->skip_bootloader);
         is_linux = 1;
         if (kernel_size >= 0) {
             image_low_addr = entry;
@@ -1134,8 +1143,10 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
         fixupcontext[FIXUP_ENTRYPOINT_LO] = entry;
         fixupcontext[FIXUP_ENTRYPOINT_HI] = entry >> 32;
 
-        arm_write_bootloader("bootloader", as, info->loader_start,
-                             primary_loader, fixupcontext);
+        if (!info->skip_bootloader) {
+            arm_write_bootloader("bootloader", as, info->loader_start,
+                                 primary_loader, fixupcontext);
+        }
 
         if (info->write_board_setup) {
             info->write_board_setup(cpu, info);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 8423912c89..e6053acec6 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2911,6 +2911,7 @@ static void machvirt_init(MachineState *machine)
     vms->bootinfo.confidential = virtcca_cvm_enabled();
     vms->bootinfo.psci_conduit = vms->psci_conduit;
     vms->bootinfo.confidential = virt_machine_is_confidential(vms);
+    vms->bootinfo.skip_bootloader = vms->bootinfo.confidential;
     arm_load_kernel(ARM_CPU(first_cpu), machine, &vms->bootinfo);
 
     vms->machine_done.notify = virt_machine_done;
diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h
index 0cbae4685b..326c92782e 100644
--- a/include/hw/arm/boot.h
+++ b/include/hw/arm/boot.h
@@ -137,6 +137,12 @@ struct arm_boot_info {
     /* Used when loading firmware into RAM */
     hwaddr firmware_base;
     hwaddr firmware_max_size;
+    /*
+     * Instead of starting in a small bootloader that jumps to the kernel,
+     * immediately start in the kernel.
+     */
+    bool skip_bootloader;
+
     /*
      * Confidential guest boot loads everything into RAM so it can be measured.
      */
-- 
Gitee


From ace3d13d5db0b33fdda4c31549aed8e3f87ce47d Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 7 Nov 2024 13:11:56 +0000
Subject: [PATCH 36/40] hw/tpm: Add TPM event log

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/b8e00505df37d35bcbcb05abdca5819d616099f4

Provide a library allowing the VMM to create an event log that describes
what is loaded into memory. During remote attestation in confidential
computing this helps an independent verifier reconstruct the initial
measurements of a VM, which contain the initial state of memory and
CPUs.

We provide some definitions and structures described by the Trusted
Computing Group (TCG) in "TCG PC Client Platform Firmware Profile
Specification" Level 00 Version 1.06 Revision 52 [1]. This is the same
format as used by UEFI, and UEFI could reuse this log after finding it
in DT or ACPI tables, but can also copy its content into a new one.

[1] https://trustedcomputinggroup.org/resource/pc-client-specific-platform-firmware-profile-specification/

Cc: Stefan Berger <stefanb@linux.vnet.ibm.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/tpm/Kconfig           |   4 +
 hw/tpm/meson.build       |   1 +
 hw/tpm/tpm_log.c         | 325 +++++++++++++++++++++++++++++++++++++++
 include/hw/tpm/tpm_log.h |  89 +++++++++++
 qapi/tpm.json            |  14 ++
 5 files changed, 433 insertions(+)
 create mode 100644 hw/tpm/tpm_log.c
 create mode 100644 include/hw/tpm/tpm_log.h

diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig
index a46663288c..70694b14a3 100644
--- a/hw/tpm/Kconfig
+++ b/hw/tpm/Kconfig
@@ -30,3 +30,7 @@ config TPM_SPAPR
     default y
     depends on TPM && PSERIES
     select TPM_BACKEND
+
+config TPM_LOG
+    bool
+    default y
diff --git a/hw/tpm/meson.build b/hw/tpm/meson.build
index 6968e60b3f..81efb557f3 100644
--- a/hw/tpm/meson.build
+++ b/hw/tpm/meson.build
@@ -6,4 +6,5 @@ system_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_crb.c'))
 system_ss.add(when: 'CONFIG_TPM_TIS', if_true: files('tpm_ppi.c'))
 system_ss.add(when: 'CONFIG_TPM_CRB', if_true: files('tpm_ppi.c'))
 
+system_ss.add(when: 'CONFIG_TPM_LOG', if_true: files('tpm_log.c'))
 specific_ss.add(when: 'CONFIG_TPM_SPAPR', if_true: files('tpm_spapr.c'))
diff --git a/hw/tpm/tpm_log.c b/hw/tpm/tpm_log.c
new file mode 100644
index 0000000000..ab29d8569b
--- /dev/null
+++ b/hw/tpm/tpm_log.c
@@ -0,0 +1,325 @@
+/*
+ * tpm_log.c - Event log as described by the Trusted Computing Group (TCG)
+ *
+ * Copyright (c) 2024 Linaro Ltd.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * Create an event log in the format specified by:
+ *
+ *  TCG PC Client Platform Firmware Profile Specification
+ *  Level 00 Version 1.06 Revision 52
+ *  Family “2.0”
+ */
+
+#include "qemu/osdep.h"
+
+#include "crypto/hash.h"
+#include "exec/address-spaces.h"
+#include "exec/memory.h"
+#include "hw/tpm/tpm_log.h"
+#include "qapi/error.h"
+#include "qemu/bswap.h"
+#include "qom/object_interfaces.h"
+
+/*
+ * Legacy structure used only in the first event in the log, for compatibility
+ */
+struct TcgPcClientPcrEvent {
+        uint32_t pcr_index;
+        uint32_t event_type;
+        uint8_t  digest[20];
+        uint32_t event_data_size;
+        uint8_t  event[];
+} QEMU_PACKED;
+
+struct TcgEfiSpecIdEvent {
+        uint8_t  signature[16];
+        uint32_t platform_class;
+        uint8_t  family_version_minor;
+        uint8_t  family_version_major;
+        uint8_t  spec_revision;
+        uint8_t  uintn_size;
+        uint32_t number_of_algorithms; /* 1 */
+        /*
+         * For now we declare a single algo, but if we want UEFI to reuse this
+         * header then we'd need to add entries here for all algos supported by
+         * UEFI (and expand the digest field for EV_NO_ACTION).
+         */
+        uint16_t algorithm_id;
+        uint16_t digest_size;
+        uint8_t  vendor_info_size;
+        uint8_t  vendor_info[];
+} QEMU_PACKED;
+
+struct TcgPcrEvent2Head {
+        uint32_t pcr_index;
+        uint32_t event_type;
+        /* variable-sized digests */
+        uint8_t  digests[];
+} QEMU_PACKED;
+
+struct TcgPcrEvent2Tail {
+        uint32_t event_size;
+        uint8_t  event[];
+} QEMU_PACKED;
+
+struct TpmlDigestValues {
+        uint32_t count;     /* 1 */
+        uint16_t hash_alg;
+        uint8_t  digest[];
+} QEMU_PACKED;
+
+struct TpmLog {
+    Object parent_obj;
+
+    TpmLogDigestAlgo digest_algo;
+    size_t max_size;
+    uint64_t load_addr;
+
+    uint16_t tcg_algo;
+    GByteArray *content;
+    uint8_t *digest;
+    size_t digest_size;
+};
+
+OBJECT_DEFINE_SIMPLE_TYPE(TpmLog, tpm_log, TPM_LOG, OBJECT)
+
+static void tpm_log_init(Object *obj)
+{
+    TpmLog *log = TPM_LOG(obj);
+
+    log->digest_algo = TPM_LOG_DIGEST_ALGO_SHA256;
+}
+
+static void tpm_log_destroy(TpmLog *log)
+{
+    if (!log->content) {
+        return;
+    }
+    g_free(log->digest);
+    log->digest = NULL;
+    g_byte_array_free(log->content, /* free_segment */ true);
+    log->content = NULL;
+}
+
+static void tpm_log_finalize(Object *obj)
+{
+    tpm_log_destroy(TPM_LOG(obj));
+}
+
+static int tpm_log_get_digest_algo(Object *obj, Error **errp)
+{
+    TpmLog *log = TPM_LOG(obj);
+
+    return log->digest_algo;
+}
+
+static void tpm_log_set_digest_algo(Object *obj, int algo, Error **errp)
+{
+    TpmLog *log = TPM_LOG(obj);
+
+    if (log->content != NULL) {
+        error_setg(errp, "cannot set digest algo after log creation");
+        return;
+    }
+
+    log->digest_algo = algo;
+}
+
+static void tpm_log_get_max_size(Object *obj, Visitor *v, const char *name,
+                                void *opaque, Error **errp)
+{
+    TpmLog *log = TPM_LOG(obj);
+    uint64_t value = log->max_size;
+
+    visit_type_uint64(v, name, &value, errp);
+}
+
+static void tpm_log_get_load_addr(Object *obj, Visitor *v, const char *name,
+                                  void *opaque, Error **errp)
+{
+    TpmLog *log = TPM_LOG(obj);
+    uint64_t value = log->load_addr;
+
+    visit_type_uint64(v, name, &value, errp);
+}
+
+static void tpm_log_set_load_addr(Object *obj, Visitor *v, const char *name,
+                                  void *opaque, Error **errp)
+{
+    TpmLog *log = TPM_LOG(obj);
+    uint64_t value;
+
+    if (!visit_type_uint64(v, name, &value, errp)) {
+        return;
+    }
+
+    log->load_addr = value;
+}
+
+
+static void tpm_log_class_init(ObjectClass *oc, void *data)
+{
+    object_class_property_add_enum(oc, "digest-algo",
+                                   "TpmLogDigestAlgo",
+                                   &TpmLogDigestAlgo_lookup,
+                                   tpm_log_get_digest_algo,
+                                   tpm_log_set_digest_algo);
+    object_class_property_set_description(oc, "digest-algo",
+            "Algorithm used to hash blobs added as events ('sha256', 'sha512')");
+
+    /* max_size is set while allocating the log in tpm_log_create */
+    object_class_property_add(oc, "max-size", "uint64", tpm_log_get_max_size,
+                              NULL, NULL, NULL);
+    object_class_property_set_description(oc, "max-size",
+            "Maximum size of the log, reserved in guest memory");
+
+    object_class_property_add(oc, "load-addr", "uint64", tpm_log_get_load_addr,
+                              tpm_log_set_load_addr, NULL, NULL);
+    object_class_property_set_description(oc, "load-addr",
+            "Base address of the log in guest memory");
+}
+
+int tpm_log_create(TpmLog *log, size_t max_size, Error **errp)
+{
+    struct TcgEfiSpecIdEvent event;
+    struct TcgPcClientPcrEvent header = {
+        .pcr_index = 0,
+        .event_type = cpu_to_le32(TCG_EV_NO_ACTION),
+        .digest = {0},
+        .event_data_size = cpu_to_le32(sizeof(event)),
+    };
+
+    log->content = g_byte_array_sized_new(max_size);
+    log->max_size = max_size;
+
+    switch (log->digest_algo) {
+    case TPM_LOG_DIGEST_ALGO_SHA256:
+        log->tcg_algo = TCG_ALG_SHA256;
+        log->digest_size = TCG_ALG_SHA256_DIGEST_SIZE;
+        break;
+    case TPM_LOG_DIGEST_ALGO_SHA512:
+        log->tcg_algo = TCG_ALG_SHA512;
+        log->digest_size = TCG_ALG_SHA512_DIGEST_SIZE;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    log->digest = g_malloc0(log->digest_size);
+
+    event = (struct TcgEfiSpecIdEvent) {
+        .signature = "Spec ID Event03",
+        .platform_class = 0,
+        .family_version_minor = 0,
+        .family_version_major = 2,
+        .spec_revision = 106,
+        .uintn_size = 2, /* UINT64 */
+        .number_of_algorithms = cpu_to_le32(1),
+        .algorithm_id = cpu_to_le16(log->tcg_algo),
+        .digest_size = cpu_to_le16(log->digest_size),
+        .vendor_info_size = 0,
+    };
+
+    g_byte_array_append(log->content, (guint8 *)&header, sizeof(header));
+    g_byte_array_append(log->content, (guint8 *)&event, sizeof(event));
+    return 0;
+}
+
+int tpm_log_add_event(TpmLog *log, uint32_t event_type, const uint8_t *event,
+                      size_t event_size, const uint8_t *data, size_t data_size,
+                      Error **errp)
+{
+    int digests = 0;
+    size_t rollback_len;
+    struct TcgPcrEvent2Head header = {
+        .pcr_index = 0,
+        .event_type = cpu_to_le32(event_type),
+    };
+    struct TpmlDigestValues digest_header = {0};
+    struct TcgPcrEvent2Tail tail = {
+        .event_size = cpu_to_le32(event_size),
+    };
+
+    if (log->content == NULL) {
+        error_setg(errp, "event log is not initialized");
+        return -EINVAL;
+    }
+    rollback_len = log->content->len;
+
+    g_byte_array_append(log->content, (guint8 *)&header, sizeof(header));
+
+    if (data) {
+        QCryptoHashAlgorithm qc_algo;
+
+        digest_header.hash_alg = cpu_to_le16(log->tcg_algo);
+        switch (log->digest_algo) {
+        case TPM_LOG_DIGEST_ALGO_SHA256:
+            qc_algo = QCRYPTO_HASH_ALG_SHA256;
+            break;
+        case TPM_LOG_DIGEST_ALGO_SHA512:
+            qc_algo = QCRYPTO_HASH_ALG_SHA512;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        if (qcrypto_hash_bytes(qc_algo, (const char *)data, data_size,
+                               &log->digest, &log->digest_size, errp)) {
+            goto err_rollback;
+        }
+        digests = 1;
+    } else if (event_type == TCG_EV_NO_ACTION) {
+        /* EV_NO_ACTION contains empty digests for each supported algo */
+        memset(log->digest, 0, log->digest_size);
+        digest_header.hash_alg = 0;
+        digests = 1;
+    }
+
+    if (digests) {
+        digest_header.count = cpu_to_le32(digests);
+        g_byte_array_append(log->content, (guint8 *)&digest_header,
+                            sizeof(digest_header));
+        g_byte_array_append(log->content, log->digest, log->digest_size);
+    } else {
+        /* Add an empty digests list */
+        g_byte_array_append(log->content, (guint8 *)&digest_header.count,
+                            sizeof(digest_header.count));
+    }
+
+    g_byte_array_append(log->content, (guint8 *)&tail, sizeof(tail));
+    g_byte_array_append(log->content, event, event_size);
+
+    if (log->content->len > log->max_size) {
+        error_setg(errp, "event log exceeds max size");
+        goto err_rollback;
+    }
+
+    return 0;
+
+err_rollback:
+    g_byte_array_set_size(log->content, rollback_len);
+    return -1;
+}
+
+int tpm_log_write_and_close(TpmLog *log, Error **errp)
+{
+    int ret;
+
+    if (!log->content) {
+        error_setg(errp, "event log is not initialized");
+        return -1;
+    }
+
+    ret = address_space_write_rom(&address_space_memory, log->load_addr,
+                                  MEMTXATTRS_UNSPECIFIED, log->content->data,
+                                  log->content->len);
+    if (ret) {
+        error_setg(errp, "cannot load log into memory");
+        return -1;
+    }
+
+    tpm_log_destroy(log);
+    return ret;
+}
diff --git a/include/hw/tpm/tpm_log.h b/include/hw/tpm/tpm_log.h
new file mode 100644
index 0000000000..b3cd2e7563
--- /dev/null
+++ b/include/hw/tpm/tpm_log.h
@@ -0,0 +1,89 @@
+#ifndef QEMU_TPM_LOG_H
+#define QEMU_TPM_LOG_H
+
+#include "qom/object.h"
+#include "sysemu/tpm.h"
+
+/*
+ * Defined in: TCG Algorithm Registry
+ * Family 2.0 Level 00 Revision 01.34
+ *
+ * (Here TCG stands for Trusted Computing Group)
+ */
+#define TCG_ALG_SHA256  0xB
+#define TCG_ALG_SHA512  0xD
+
+/* Size of a digest in bytes */
+#define TCG_ALG_SHA256_DIGEST_SIZE      32
+#define TCG_ALG_SHA512_DIGEST_SIZE      64
+
+/*
+ * Defined in: TCG PC Client Platform Firmware Profile Specification
+ * Version 1.06 revision 52
+ */
+#define TCG_EV_NO_ACTION                        0x00000003
+#define TCG_EV_EVENT_TAG                        0x00000006
+#define TCG_EV_POST_CODE2                       0x00000013
+#define TCG_EV_EFI_PLATFORM_FIRMWARE_BLOB2      0x8000000A
+
+struct UefiPlatformFirmwareBlob2Head {
+        uint8_t blob_description_size;
+        uint8_t blob_description[];
+} __attribute__((packed));
+
+struct UefiPlatformFirmwareBlob2Tail {
+        uint64_t blob_base;
+        uint64_t blob_size;
+} __attribute__((packed));
+
+#define TYPE_TPM_LOG "tpm-log"
+
+OBJECT_DECLARE_SIMPLE_TYPE(TpmLog, TPM_LOG)
+
+/**
+ * tpm_log_create - Create the event log
+ * @log: the log object
+ * @max_size: maximum size of the log. Adding an event past that size will
+ *            return an error
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Allocate the event log and create the initial entry (Spec ID Event03)
+ * describing the log format.
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int tpm_log_create(TpmLog *log, size_t max_size, Error **errp);
+
+/**
+ * tpm_log_add_event - Append an event to the log
+ * @log: the log object
+ * @event_type: the `eventType` field in TCG_PCR_EVENT2
+ * @event: the `event` field in TCG_PCR_EVENT2
+ * @event_size: the `eventSize` field in TCG_PCR_EVENT2
+ * @data: content to be hashed into the event digest. May be NULL.
+ * @data_size: size of @data. Should be zero when @data is NULL.
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Add a TCG_PCR_EVENT2 event to the event log. Depending on the event type, a
+ * data buffer may be hashed into the event digest (for example
+ * TCG_EV_EFI_PLATFORM_FIRMWARE_BLOB2 contains a digest of the blob.)
+ *
+ * Returns: 0 on success, -1 on error
+ */
+int tpm_log_add_event(TpmLog *log, uint32_t event_type, const uint8_t *event,
+                      size_t event_size, const uint8_t *data, size_t data_size,
+                      Error **errp);
+
+/**
+ * tpm_log_write_and_close - Move the log to guest memory
+ * @log: the log object
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Write the log into memory, at the address set in the load-addr property.
+ * After this operation, the log is not writable anymore.
+ *
+ * Return: 0 on success, -1 on error
+ */
+int tpm_log_write_and_close(TpmLog *log, Error **errp);
+
+#endif
diff --git a/qapi/tpm.json b/qapi/tpm.json
index a754455ca5..a051d7bf5c 100644
--- a/qapi/tpm.json
+++ b/qapi/tpm.json
@@ -186,3 +186,17 @@
 ##
 { 'command': 'query-tpm', 'returns': ['TPMInfo'],
   'if': 'CONFIG_TPM' }
+
+##
+# @TpmLogDigestAlgo:
+#
+# @sha256: Use the SHA256 algorithm
+#
+# @sha512: Use the SHA512 algorithm
+#
+# Algorithm to use for event log digests
+#
+# Since: 9.3
+##
+{ 'enum': 'TpmLogDigestAlgo',
+  'data': ['sha256', 'sha512'] }
-- 
Gitee


From b398484a5425336c57256dde48b1ee6630be1552 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 7 Nov 2024 14:03:34 +0000
Subject: [PATCH 37/40] hw/core/loader: Add fields to RomLoaderNotify

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/3bf3a64142d22868078d191d5ff0e6a3ddf0644c

In order to write an event log, the ROM load notification handler needs
two more fields.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/core/loader.c    | 2 ++
 include/hw/loader.h | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/hw/core/loader.c b/hw/core/loader.c
index 1627ef1976..7990147ade 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -1308,6 +1308,8 @@ static void rom_reset(void *unused)
         trace_loader_write_rom(rom->name, rom->addr, rom->datasize, rom->isrom);
 
         notify = (RomLoaderNotifyData) {
+            .name = rom->name,
+            .blob_ptr = rom->data,
             .addr = rom->addr,
             .len = rom->datasize,
         };
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 5df632c5bd..3a5212b897 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -357,6 +357,10 @@ ssize_t rom_add_vga(const char *file);
 ssize_t rom_add_option(const char *file, int32_t bootindex);
 
 typedef struct RomLoaderNotifyData {
+    /* Description of the loaded ROM */
+    const char *name;
+    /* Blob */
+    void *blob_ptr;
     /* Address of the blob in guest memory */
     hwaddr addr;
     /* Length of the blob */
-- 
Gitee


From 58bb383f608b5f4f58f9fac365efe742c1f0335c Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 7 Nov 2024 17:38:11 +0000
Subject: [PATCH 38/40] target/arm/kvm-rme: Add measurement log

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/4a2fc9b28becfdae3d5662218b921f8970825bd6

Create an event log in the format defined by Trusted Computing Group
for TPM2. It contains information about the VMM, the Realm parameters,
any data loaded into guest memory before boot, and the initial vCPU
state.

The guest can access this log from RAM and send it to a verifier, to
help the verifier independently compute the Realm Initial Measurement,
and check that the data we load into guest RAM is known-good images.
Without this log, in order to end up with the right Measurement, the
verifier needs to guess what is loaded, where and in what order.

Cc: Stefan Berger <stefanb@linux.vnet.ibm.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      target/arm/Kconfig
      target/arm/kvm-rme.c
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 qapi/qom.json        |   9 +-
 target/arm/Kconfig   |   1 +
 target/arm/kvm-rme.c | 403 ++++++++++++++++++++++++++++++++++++++++++-
 target/arm/kvm_arm.h |  15 ++
 4 files changed, 426 insertions(+), 2 deletions(-)

diff --git a/qapi/qom.json b/qapi/qom.json
index 02b45e1068..e0590a6019 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -978,11 +978,18 @@
 # @measurement-algorithm: Realm measurement algorithm
 #     (default: sha512)
 #
+# @measurement-log: Enable a measurement log for the Realm. All events
+#     that contribute to the Realm Initial Measurement (RIM) are added
+#     to a log in TCG TPM2 format, which is itself loaded into Realm
+#     memory (unmeasured) and can then be read by a verifier to
+#     reconstruct the RIM.
+#
 # Since: 10.0
 ##
 { 'struct': 'RmeGuestProperties',
   'data': { '*personalization-value': 'str',
-            '*measurement-algorithm': 'RmeGuestMeasurementAlgorithm' } }
+            '*measurement-algorithm': 'RmeGuestMeasurementAlgorithm',
+            '*measurement-log': 'bool'} }
 
 ##
 # @ObjectType:
diff --git a/target/arm/Kconfig b/target/arm/Kconfig
index bf57d739cd..14977f1d83 100644
--- a/target/arm/Kconfig
+++ b/target/arm/Kconfig
@@ -9,3 +9,4 @@ config ARM
 config AARCH64
     bool
     select ARM
+    select TPM_LOG if KVM
diff --git a/target/arm/kvm-rme.c b/target/arm/kvm-rme.c
index 299af009d9..26dda39df6 100644
--- a/target/arm/kvm-rme.c
+++ b/target/arm/kvm-rme.c
@@ -10,11 +10,13 @@
 #include "hw/core/cpu.h"
 #include "hw/loader.h"
 #include "hw/pci/pci.h"
+#include "hw/tpm/tpm_log.h"
 #include "kvm_arm.h"
 #include "migration/blocker.h"
 #include "qapi/error.h"
 #include "qemu/base64.h"
 #include "qemu/error-report.h"
+#include "qemu/units.h"
 #include "qom/object_interfaces.h"
 #include "exec/confidential-guest-support.h"
 #include "sysemu/kvm.h"
@@ -25,6 +27,14 @@ OBJECT_DECLARE_SIMPLE_TYPE(RmeGuest, RME_GUEST)
 
 #define RME_PAGE_SIZE qemu_real_host_page_size()
 
+#define RME_MEASUREMENT_LOG_SIZE    (64 * KiB)
+
+typedef struct RmeLogFiletype {
+    uint32_t event_type;
+    /* Description copied into the log event */
+    const char *desc;
+} RmeLogFiletype;
+
 /*
  * Realms have a split guest-physical address space: the bottom half is private
  * to the realm, and the top half is shared with the host. Within QEMU, we use a
@@ -57,6 +67,8 @@ typedef struct RealmPrivateSharedListener {
 typedef struct {
     hwaddr base;
     hwaddr size;
+    uint8_t *blob_ptr;
+    RmeLogFiletype *filetype;
 } RmeRamRegion;
 
 struct RmeGuest {
@@ -67,22 +79,335 @@ struct RmeGuest {
     char *personalization_value_str;
     uint8_t personalization_value[ARM_RME_CONFIG_RPV_SIZE];
     RmeGuestMeasurementAlgorithm measurement_algo;
+    bool use_measurement_log;
 
     RmeRamRegion init_ram;
     uint8_t ipa_bits;
+    size_t num_cpus;
 
     RealmDmaRegion *dma_region;
     QLIST_HEAD(, RealmPrivateSharedListener) ram_discard_list;
     MemoryListener memory_listener;
     AddressSpace dma_as;
+
+    TpmLog *log;
+    GHashTable *images;
 };
 
 OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RmeGuest, rme_guest, RME_GUEST,
                                           CONFIDENTIAL_GUEST_SUPPORT,
                                           { TYPE_USER_CREATABLE }, { })
 
+typedef struct {
+    char        signature[16];
+    char        name[32];
+    char        version[40];
+    uint64_t    ram_size;
+    uint32_t    num_cpus;
+    uint64_t    flags;
+} EventLogVmmVersion;
+
+typedef struct {
+    uint32_t    id;
+    uint32_t    data_size;
+    uint8_t     data[];
+} EventLogTagged;
+
+#define EVENT_LOG_TAG_REALM_CREATE  1
+#define EVENT_LOG_TAG_INIT_RIPAS    2
+#define EVENT_LOG_TAG_REC_CREATE    3
+
+#define REALM_PARAMS_FLAG_SVE       (1 << 1)
+#define REALM_PARAMS_FLAG_PMU       (1 << 2)
+
+#define REC_CREATE_FLAG_RUNNABLE    (1 << 0)
+
 static RmeGuest *rme_guest;
 
+static int rme_init_measurement_log(MachineState *ms)
+{
+    Object *log;
+    gpointer filename;
+    TpmLogDigestAlgo algo;
+    RmeLogFiletype *filetype;
+
+    if (!rme_guest->use_measurement_log) {
+        return 0;
+    }
+
+    switch (rme_guest->measurement_algo) {
+    case RME_GUEST_MEASUREMENT_ALGORITHM_SHA256:
+        algo = TPM_LOG_DIGEST_ALGO_SHA256;
+        break;
+    case RME_GUEST_MEASUREMENT_ALGORITHM_SHA512:
+        algo = TPM_LOG_DIGEST_ALGO_SHA512;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    log = object_new_with_props(TYPE_TPM_LOG, OBJECT(rme_guest),
+                                "log", &error_fatal,
+                                "digest-algo", TpmLogDigestAlgo_str(algo),
+                                NULL);
+
+    tpm_log_create(TPM_LOG(log), RME_MEASUREMENT_LOG_SIZE, &error_fatal);
+    rme_guest->log = TPM_LOG(log);
+
+    /*
+     * Write down the image names we're expecting to encounter when handling the
+     * ROM load notifications, so we can record the type of image being loaded
+     * to help the verifier.
+     */
+    rme_guest->images = g_hash_table_new_full(g_str_hash, g_str_equal, g_free,
+                                              g_free);
+
+    filename = g_strdup(ms->kernel_filename);
+    if (filename) {
+        filetype = g_new0(RmeLogFiletype, 1);
+        filetype->event_type = TCG_EV_POST_CODE2;
+        filetype->desc = "KERNEL";
+        g_hash_table_insert(rme_guest->images, filename, (gpointer)filetype);
+    }
+
+    filename = g_strdup(ms->initrd_filename);
+    if (filename) {
+        filetype = g_new0(RmeLogFiletype, 1);
+        filetype->event_type = TCG_EV_POST_CODE2;
+        filetype->desc = "INITRD";
+        g_hash_table_insert(rme_guest->images, filename, (gpointer)filetype);
+    }
+
+    filename = g_strdup(ms->firmware);
+    if (filename) {
+        filetype = g_new0(RmeLogFiletype, 1);
+        filetype->event_type = TCG_EV_EFI_PLATFORM_FIRMWARE_BLOB2;
+        filetype->desc = "FIRMWARE";
+        g_hash_table_insert(rme_guest->images, filename, filetype);
+    }
+
+    filename = g_strdup(ms->dtb);
+    if (!filename) {
+        filename = g_strdup("dtb");
+    }
+    filetype = g_new0(RmeLogFiletype, 1);
+    filetype->event_type = TCG_EV_POST_CODE2;
+    filetype->desc = "DTB";
+    g_hash_table_insert(rme_guest->images, filename, filetype);
+
+    return 0;
+}
+
+static int rme_log_event_tag(uint32_t id, uint8_t *data, size_t size,
+                             Error **errp)
+{
+    int ret;
+    EventLogTagged event = {
+        .id = id,
+        .data_size = size,
+    };
+    GByteArray *bytes = g_byte_array_new();
+
+    if (!rme_guest->log) {
+        return 0;
+    }
+
+    g_byte_array_append(bytes, (uint8_t *)&event, sizeof(event));
+    g_byte_array_append(bytes, data, size);
+    ret = tpm_log_add_event(rme_guest->log, TCG_EV_EVENT_TAG, bytes->data,
+                             bytes->len, NULL, 0, errp);
+    g_byte_array_free(bytes, true);
+    return ret;
+}
+
+/* Log VM type and Realm Descriptor create */
+static int rme_log_realm_create(Error **errp)
+{
+    int ret;
+    ARMCPU *cpu;
+    EventLogVmmVersion vmm_version = {
+        .signature = "VM VERSION",
+        .name = "QEMU",
+        .version = QEMU_VERSION,
+        .ram_size = cpu_to_le64(rme_guest->init_ram.size),
+        .num_cpus = cpu_to_le32(rme_guest->num_cpus),
+        .flags = 0,
+    };
+    struct {
+        uint64_t    flags;
+        uint8_t     s2sz;
+        uint8_t     sve_vl;
+        uint8_t     num_bps;
+        uint8_t     num_wps;
+        uint8_t     pmu_num_ctrs;
+        uint8_t     hash_algo;
+    } params = {
+        .s2sz = rme_guest->ipa_bits,
+    };
+
+    if (!rme_guest->log) {
+        return 0;
+    }
+
+    ret = tpm_log_add_event(rme_guest->log, TCG_EV_NO_ACTION,
+                            (uint8_t *)&vmm_version, sizeof(vmm_version),
+                            NULL, 0, errp);
+    if (ret) {
+        return ret;
+    }
+
+    /* With KVM all CPUs have the same capability */
+    cpu = ARM_CPU(first_cpu);
+    if (cpu->has_pmu) {
+        params.flags |= REALM_PARAMS_FLAG_PMU;
+        params.pmu_num_ctrs = FIELD_EX64(cpu->isar.reset_pmcr_el0, PMCR, N);
+    }
+
+    if (cpu->sve_max_vq) {
+        params.flags |= REALM_PARAMS_FLAG_SVE;
+        params.sve_vl = cpu->sve_max_vq - 1;
+    }
+    params.num_bps = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, BRPS);
+    params.num_wps = FIELD_EX64(cpu->isar.id_aa64dfr0, ID_AA64DFR0, WRPS);
+
+    switch (rme_guest->measurement_algo) {
+    case RME_GUEST_MEASUREMENT_ALGORITHM_SHA256:
+        params.hash_algo = ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA256;
+        break;
+    case RME_GUEST_MEASUREMENT_ALGORITHM_SHA512:
+        params.hash_algo = ARM_RME_CONFIG_MEASUREMENT_ALGO_SHA512;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    return rme_log_event_tag(EVENT_LOG_TAG_REALM_CREATE, (uint8_t *)&params,
+                             sizeof(params), errp);
+}
+
+/* unmeasured images are logged with @data == NULL */
+static int rme_log_image(RmeLogFiletype *filetype, uint8_t *data, hwaddr base,
+                          size_t size, Error **errp)
+{
+    int ret;
+    size_t desc_size;
+    GByteArray *event = g_byte_array_new();
+    struct UefiPlatformFirmwareBlob2Head head = {0};
+    struct UefiPlatformFirmwareBlob2Tail tail = {0};
+
+    if (!rme_guest->log) {
+        return 0;
+    }
+
+    if (!filetype) {
+        error_setg(errp, "cannot log image without a filetype");
+        return -1;
+    }
+
+    /* EV_POST_CODE2 strings are not NUL-terminated */
+    desc_size = strlen(filetype->desc);
+    head.blob_description_size = desc_size;
+    tail.blob_base = cpu_to_le64(base);
+    tail.blob_size = cpu_to_le64(size);
+
+    g_byte_array_append(event, (guint8 *)&head, sizeof(head));
+    g_byte_array_append(event, (guint8 *)filetype->desc, desc_size);
+    g_byte_array_append(event, (guint8 *)&tail, sizeof(tail));
+
+    ret = tpm_log_add_event(rme_guest->log, filetype->event_type, event->data,
+                            event->len, data, size, errp);
+    g_byte_array_free(event, true);
+    return ret;
+}
+
+static int rme_log_ripas(hwaddr base, size_t size, Error **errp)
+{
+    struct {
+        uint64_t base;
+        uint64_t size;
+    } init_ripas = {
+        .base = cpu_to_le64(base),
+        .size = cpu_to_le64(size),
+    };
+
+    return rme_log_event_tag(EVENT_LOG_TAG_INIT_RIPAS, (uint8_t *)&init_ripas,
+                             sizeof(init_ripas), errp);
+}
+
+static int rme_log_rec(uint64_t flags, uint64_t pc, uint64_t gprs[8], Error **errp)
+{
+    struct {
+        uint64_t flags;
+        uint64_t pc;
+        uint64_t gprs[8];
+    } rec_create = {
+        .flags = cpu_to_le64(flags),
+        .pc = cpu_to_le64(pc),
+        .gprs[0] = cpu_to_le64(gprs[0]),
+        .gprs[1] = cpu_to_le64(gprs[1]),
+        .gprs[2] = cpu_to_le64(gprs[2]),
+        .gprs[3] = cpu_to_le64(gprs[3]),
+        .gprs[4] = cpu_to_le64(gprs[4]),
+        .gprs[5] = cpu_to_le64(gprs[5]),
+        .gprs[6] = cpu_to_le64(gprs[6]),
+        .gprs[7] = cpu_to_le64(gprs[7]),
+    };
+
+    return rme_log_event_tag(EVENT_LOG_TAG_REC_CREATE, (uint8_t *)&rec_create,
+                             sizeof(rec_create), errp);
+}
+
+static int rme_populate_range(hwaddr base, size_t size, bool measure,
+                              Error **errp);
+
+static int rme_close_measurement_log(Error **errp)
+{
+    int ret;
+    hwaddr base;
+    size_t size;
+    RmeLogFiletype filetype = {
+        .event_type = TCG_EV_POST_CODE2,
+        .desc = "LOG",
+    };
+
+    if (!rme_guest->log) {
+        return 0;
+    }
+
+    base = object_property_get_uint(OBJECT(rme_guest->log), "load-addr", errp);
+    if (*errp) {
+        return -1;
+    }
+
+    size = object_property_get_uint(OBJECT(rme_guest->log), "max-size", errp);
+    if (*errp) {
+        return -1;
+    }
+
+    /* Log the log itself */
+    ret = rme_log_image(&filetype, NULL, base, size, errp);
+    if (ret) {
+        return ret;
+    }
+
+    ret = tpm_log_write_and_close(rme_guest->log, errp);
+    if (ret) {
+        return ret;
+    }
+
+    ret = rme_populate_range(base, size, /* measure */ false, errp);
+    if (ret) {
+        return ret;
+    }
+
+    g_hash_table_destroy(rme_guest->images);
+
+    /* The log is now in the guest. Free this object */
+    object_unparent(OBJECT(rme_guest->log));
+    rme_guest->log = NULL;
+    return 0;
+}
+
 static int rme_configure_one(RmeGuest *guest, uint32_t cfg, Error **errp)
 {
     int ret;
@@ -156,9 +481,10 @@ static int rme_init_ram(RmeRamRegion *ram, Error **errp)
         error_setg_errno(errp, -ret,
                          "failed to init RAM [0x%"HWADDR_PRIx", 0x%"HWADDR_PRIx")",
                          start, end);
+        return ret;
     }
 
-    return ret;
+    return rme_log_ripas(ram->base, ram->size, errp);
 }
 
 static int rme_populate_range(hwaddr base, size_t size, bool measure,
@@ -194,23 +520,42 @@ static void rme_populate_ram_region(gpointer data, gpointer err)
     }
 
     rme_populate_range(region->base, region->size, /* measure */ true, errp);
+    if (*errp) {
+        return;
+    }
+
+    rme_log_image(region->filetype, region->blob_ptr, region->base,
+                  region->size, errp);
 }
 
 static int rme_init_cpus(Error **errp)
 {
     int ret;
     CPUState *cs;
+    bool logged_primary_cpu = false;
 
     /*
      * Now that do_cpu_reset() initialized the boot PC and
      * kvm_cpu_synchronize_post_reset() registered it, we can finalize the REC.
      */
     CPU_FOREACH(cs) {
+        ARMCPU *cpu = ARM_CPU(cs);
+
         ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_REC);
         if (ret) {
             error_setg_errno(errp, -ret, "failed to finalize vCPU");
             return ret;
         }
+
+        if (!logged_primary_cpu) {
+            ret = rme_log_rec(REC_CREATE_FLAG_RUNNABLE, cpu->env.pc,
+                              cpu->env.xregs, errp);
+            if (ret) {
+                return ret;
+            }
+
+            logged_primary_cpu = true;
+        }
     }
     return 0;
 }
@@ -230,6 +575,10 @@ static int rme_create_realm(Error **errp)
         return -1;
     }
 
+    if (rme_log_realm_create(errp)) {
+        return -1;
+    }
+
     if (rme_init_ram(&rme_guest->init_ram, errp)) {
         return -1;
     }
@@ -244,6 +593,10 @@ static int rme_create_realm(Error **errp)
         return -1;
     }
 
+    if (rme_close_measurement_log(errp)) {
+        return -1;
+    }
+
     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0,
                             KVM_CAP_ARM_RME_ACTIVATE_REALM);
     if (ret) {
@@ -313,6 +666,20 @@ static void rme_set_measurement_algo(Object *obj, int algo, Error **errp)
     guest->measurement_algo = algo;
 }
 
+static bool rme_get_measurement_log(Object *obj, Error **errp)
+{
+    RmeGuest *guest = RME_GUEST(obj);
+
+    return guest->use_measurement_log;
+}
+
+static void rme_set_measurement_log(Object *obj, bool value, Error **errp)
+{
+    RmeGuest *guest = RME_GUEST(obj);
+
+    guest->use_measurement_log = value;
+}
+
 static void rme_guest_class_init(ObjectClass *oc, void *data)
 {
     object_class_property_add_str(oc, "personalization-value", rme_get_rpv,
@@ -327,6 +694,12 @@ static void rme_guest_class_init(ObjectClass *oc, void *data)
                                    rme_set_measurement_algo);
     object_class_property_set_description(oc, "measurement-algorithm",
             "Realm measurement algorithm ('sha256', 'sha512')");
+
+    object_class_property_add_bool(oc, "measurement-log",
+                                   rme_get_measurement_log,
+                                   rme_set_measurement_log);
+    object_class_property_set_description(oc, "measurement-log",
+            "Enable/disable Realm measurement log");
 }
 
 static void rme_guest_init(Object *obj)
@@ -370,6 +743,20 @@ static void rme_rom_load_notify(Notifier *notifier, void *data)
     region = g_new0(RmeRamRegion, 1);
     region->base = rom->addr;
     region->size = rom->len;
+    /*
+     * TODO: double-check lifetime. Is data is still available when we measure
+     * it, while writing the log. Should be fine since data is kept for the next
+     * reset.
+     */
+    region->blob_ptr = rom->blob_ptr;
+
+    /*
+     * rme_guest->images is destroyed after ram_regions, so we can store
+     * filetype even if we don't own the struct.
+     */
+    if (rme_guest->images) {
+        region->filetype = g_hash_table_lookup(rme_guest->images, rom->name);
+    }
 
     /*
      * The Realm Initial Measurement (RIM) depends on the order in which we
@@ -399,6 +786,12 @@ int kvm_arm_rme_init(MachineState *ms)
         return -ENODEV;
     }
 
+    if (rme_init_measurement_log(ms)) {
+        return -ENODEV;
+    }
+
+    rme_guest->num_cpus = ms->smp.max_cpus;
+
     error_setg(&rme_mig_blocker, "RME: migration is not implemented");
     migrate_add_blocker(&rme_mig_blocker, &error_fatal);
 
@@ -626,3 +1019,11 @@ static void realm_dma_region_class_init(ObjectClass *oc, void *data)
     imrc->translate = realm_dma_region_translate;
     imrc->replay = realm_dma_region_replay;
 }
+
+Object *kvm_arm_rme_get_measurement_log(void)
+{
+    if (rme_guest && rme_guest->log) {
+        return OBJECT(rme_guest->log);
+    }
+    return NULL;
+}
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index b4d54e816f..8e9b2039c4 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -451,6 +451,16 @@ void kvm_arm_rme_init_guest_ram(hwaddr base, size_t size);
  */
 void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa, PCIBus *pci_bus);
 
+/**
+ * kvm_arm_rme_get_measurement_log
+ *
+ * Obtain the measurement log object if enabled, in order to get its size and
+ * set its base address.
+ *
+ * Returns NULL if measurement log is disabled.
+ */
+Object *kvm_arm_rme_get_measurement_log(void);
+
 #else
 
 /*
@@ -486,6 +496,11 @@ static inline void kvm_arm_rme_init_gpa_space(hwaddr highest_gpa,
 {
 }
 
+static inline Object *kvm_arm_rme_get_measurement_log(void)
+{
+    return NULL;
+}
+
 /*
  * These functions should never actually be called without KVM support.
  */
-- 
Gitee


From f22ae2af5af021521084e40c848e5a0505ab7955 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 7 Nov 2024 17:42:02 +0000
Subject: [PATCH 39/40] hw/arm/virt: Add measurement log for confidential boot

Reference:https://git.codelinaro.org/linaro/dcap/qemu/-/commit/7905fe583633f1246a50324c77c39026136fac29

Create a measurement log describing operations performed by QEMU to
initialize the guest, and load it into guest memory above the DTB.

Cc: Stefan Berger <stefanb@linux.vnet.ibm.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Conflicts:
      hw/arm/virt.c
      include/hw/arm/virt.h
Signed-off-by: frankyj915 <yangjieyj.yang@huawei.com>
Signed-off-by: houmingyong <houmingyong@huawei.com>
---
 hw/arm/boot.c         | 47 +++++++++++++++++++++++++++++++++++++++++++
 hw/arm/virt.c         | 22 ++++++++++++++++++++
 include/hw/arm/boot.h |  3 +++
 include/hw/arm/virt.h |  1 +
 4 files changed, 73 insertions(+)

diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index 6980aebe1e..4f5bf6e77c 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -669,6 +669,24 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
 
     fdt_add_psci_node(fdt);
 
+    /* Add a reserved-memory node for the event log */
+    if (binfo->log_size) {
+        char *nodename;
+
+        qemu_fdt_add_subnode(fdt, "/reserved-memory");
+        qemu_fdt_setprop_cell(fdt, "/reserved-memory", "#address-cells", 0x2);
+        qemu_fdt_setprop_cell(fdt, "/reserved-memory", "#size-cells", 0x2);
+        qemu_fdt_setprop(fdt, "/reserved-memory", "ranges", NULL, 0);
+
+        nodename = g_strdup_printf("/reserved-memory/event-log@%" PRIx64,
+                                   binfo->log_paddr);
+        qemu_fdt_add_subnode(fdt, nodename);
+        qemu_fdt_setprop_string(fdt, nodename, "compatible", "cc-event-log");
+        qemu_fdt_setprop_sized_cells(fdt, nodename, "reg", 2, binfo->log_paddr,
+                                           2, binfo->log_size);
+        g_free(nodename);
+    }
+
     if (binfo->modify_dtb) {
         binfo->modify_dtb(binfo, fdt);
     }
@@ -941,6 +959,30 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base,
     return kernel_size;
 }
 
+static void add_event_log(struct arm_boot_info *info)
+{
+    if (!info->log_size) {
+        return;
+    }
+
+    if (!info->dtb_limit) {
+        int dtb_size = 0;
+
+        if (!info->get_dtb(info, &dtb_size) || dtb_size == 0) {
+            error_report("Board does not have a DTB");
+            exit(1);
+        }
+        info->dtb_limit = info->dtb_start + dtb_size;
+    }
+
+    info->log_paddr = info->dtb_limit;
+    if (info->log_paddr + info->log_size >
+        info->loader_start + info->ram_size) {
+        error_report("Not enough space for measurement log and DTB");
+        exit(1);
+    }
+}
+
 static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
                                          struct arm_boot_info *info)
 {
@@ -988,6 +1030,7 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
             }
             info->dtb_start = info->loader_start;
             info->dtb_limit = image_low_addr;
+            add_event_log(info);
         }
     }
     entry = elf_entry;
@@ -1126,6 +1169,8 @@ static void arm_setup_direct_kernel_boot(ARMCPU *cpu,
                 error_report("Not enough space for DTB after kernel/initrd");
                 exit(1);
             }
+            add_event_log(info);
+
             fixupcontext[FIXUP_ARGPTR_LO] = info->dtb_start;
             fixupcontext[FIXUP_ARGPTR_HI] = info->dtb_start >> 32;
         } else {
@@ -1212,6 +1257,8 @@ static void arm_setup_confidential_firmware_boot(ARMCPU *cpu,
         error_report("could not load firmware '%s'", firmware_filename);
         exit(EXIT_FAILURE);
     }
+
+    add_event_log(info);
 }
 
 static void arm_setup_firmware_boot(ARMCPU *cpu, struct arm_boot_info *info, const char *firmware_filename)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index e6053acec6..52789a3782 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -1989,6 +1989,11 @@ void virt_machine_done(Notifier *notifier, void *data)
         exit(1);
     }
 
+    if (vms->event_log) {
+        object_property_set_uint(vms->event_log, "load-addr",
+                                 vms->bootinfo.log_paddr, &error_fatal);
+    }
+
     fw_cfg_add_extra_pci_roots(vms->bus, vms->fw_cfg);
 
     virt_acpi_setup(vms);
@@ -2398,6 +2403,21 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem)
     }
 }
 
+static void create_measurement_log(VirtMachineState *vms)
+{
+    Error *err = NULL;
+
+    vms->event_log = kvm_arm_rme_get_measurement_log();
+    if (vms->event_log == NULL) {
+        return;
+    }
+    vms->bootinfo.log_size = object_property_get_uint(vms->event_log,
+                                                      "max-size", &err);
+    if (err != NULL) {
+        error_report_err(err);
+    }
+}
+
 static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot,
                                     Error **errp)
 {
@@ -2900,6 +2920,8 @@ static void machvirt_init(MachineState *machine)
 
     kvm_arm_rme_init_gpa_space(vms->highest_gpa, vms->bus);
 
+    create_measurement_log(vms);
+
     vms->bootinfo.ram_size = machine->ram_size;
     vms->bootinfo.board_id = -1;
     vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base;
diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h
index 326c92782e..8fed25706b 100644
--- a/include/hw/arm/boot.h
+++ b/include/hw/arm/boot.h
@@ -147,6 +147,9 @@ struct arm_boot_info {
      * Confidential guest boot loads everything into RAM so it can be measured.
      */
     bool confidential;
+    /* measurement log location in guest memory */
+    hwaddr log_paddr;
+    size_t log_size;
 };
 
 /**
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 9b43e72aac..fee7c27e0c 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -254,6 +254,7 @@ struct VirtMachineState {
     char *oem_table_id;
     char *kvm_type;
     NotifierList cpuhp_notifiers;
+    Object *event_log;
 };
 
 #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM)
-- 
Gitee


From 7916c32580dd8e887466fe597ba64dc6e212685f Mon Sep 17 00:00:00 2001
From: yxk <yangxiangkai@huawei.com>
Date: Wed, 16 Jul 2025 18:47:39 +0800
Subject: [PATCH 40/40] On the Adaptation of CCA and virtCCA.

We modified virtCCA to use the same Macros as CCA, but did not
change the values of these Macros to keep it compact.

Signed-off-by: yxk <yangxiangkai@huawei.com>
---
 accel/kvm/kvm-all.c           |  4 ----
 hw/arm/virt.c                 |  1 +
 linux-headers/asm-arm64/kvm.h |  3 +--
 linux-headers/linux/kvm.h     |  4 +---
 target/arm/kvm-tmm.c          | 12 ++++++------
 5 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 50047b9b71..f472fc4f69 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2491,10 +2491,6 @@ static int kvm_init(MachineState *ms)
         goto err;
     }
 
-    if (kvm_is_virtcca_cvm_type(type)) {
-        virtcca_cvm_allowed = true;
-    }
-
     do {
         ret = kvm_ioctl(s, KVM_CREATE_VM, type);
     } while (ret == -EINTR);
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 52789a3782..f12bc645d2 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3876,6 +3876,7 @@ static int virt_kvm_type(MachineState *ms, const char *type_str)
 
         if (!strcmp(kvm_type, "cvm")) {
             virtcca_cvm_type = VIRTCCA_CVM_TYPE;
+            virtcca_cvm_allowed = true;
         }
     }
     int rme_vm_type = kvm_arm_rme_vm_type(ms), type;
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index aed56ef371..777b668851 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -110,9 +110,8 @@ struct kvm_regs {
 #define KVM_ARM_VCPU_PTRAUTH_ADDRESS	5 /* VCPU uses address authentication */
 #define KVM_ARM_VCPU_PTRAUTH_GENERIC	6 /* VCPU uses generic authentication */
 #define KVM_ARM_VCPU_HAS_EL2		7 /* Support nested virtualization */
-#define KVM_ARM_VCPU_TEC		8 /* VCPU TEC state as part of cvm */
+#define KVM_ARM_VCPU_REC		8 /* VCPU REC state as part of Realm */
 #define KVM_ARM_VCPU_HAS_EL2_E2H0	9 /* Limit NV support to E2H RES0 */
-#define KVM_ARM_VCPU_REC		10 /* VCPU REC state as part of Realm */
 
 struct kvm_vcpu_init {
 	__u32 target;
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index beb41f7433..96bc60475e 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1218,9 +1218,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
 #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230
 #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
-#define KVM_CAP_ARM_RME 240
-
-#define KVM_CAP_ARM_TMM 300
+#define KVM_CAP_ARM_RME 300
 
 #define KVM_CAP_SEV_ES_GHCB 500
 #define KVM_CAP_HYGON_COCO_EXT 501
diff --git a/target/arm/kvm-tmm.c b/target/arm/kvm-tmm.c
index d18ac10896..d6dc8342c4 100644
--- a/target/arm/kvm-tmm.c
+++ b/target/arm/kvm-tmm.c
@@ -118,7 +118,7 @@ static int tmm_configure_one(TmmGuest *guest, uint32_t cfg, Error **errp)
             g_assert_not_reached();
     }
  
-    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0,
+    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0,
                             KVM_CAP_ARM_TMM_CONFIG_CVM, (intptr_t)&args);
     if (ret) {
         error_setg_errno(errp, -ret, "TMM: failed to configure %s", cfg_str);
@@ -167,7 +167,7 @@ static void tmm_populate_region(gpointer data, gpointer unused)
         return;
     }
 
-    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0,
+    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0,
                             KVM_CAP_ARM_TMM_POPULATE_CVM,
                             (intptr_t)&populate_args);
     if (ret) {
@@ -179,7 +179,7 @@ static void tmm_populate_region(gpointer data, gpointer unused)
 
 static int tmm_create_rd(Error **errp)
 {
-    int ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0,
+    int ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0,
                                 KVM_CAP_ARM_TMM_CREATE_RD);
     if (ret) {
         error_setg_errno(errp, -ret, "TMM: failed to create tmm Descriptor");
@@ -200,14 +200,14 @@ static void tmm_vm_state_change(void *opaque, bool running, RunState state)
     g_slist_free_full(g_steal_pointer(&tmm_guest->ram_regions), g_free);
 
     CPU_FOREACH(cs) {
-        ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_TEC);
+        ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_REC);
         if (ret) {
             error_report("TMM: failed to finalize vCPU: %s", strerror(-ret));
             exit(1);
         }
     }
 
-    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_TMM, 0,
+    ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_RME, 0,
                             KVM_CAP_ARM_TMM_ACTIVATE_CVM);
     if (ret) {
         error_report("TMM: failed to activate cvm: %s", strerror(-ret));
@@ -224,7 +224,7 @@ int kvm_arm_tmm_init(ConfidentialGuestSupport *cgs, Error **errp)
         return -ENODEV;
     }
  
-    if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_TMM)) {
+    if (!kvm_check_extension(kvm_state, KVM_CAP_ARM_RME)) {
         error_setg(errp, "KVM does not support TMM");
         return -ENODEV;
     }
-- 
Gitee