From fd4d603a453fa3f76e5ae415fda6c00f8a590421 Mon Sep 17 00:00:00 2001
From: xu_lei_123 <xulei@xfusion.com>
Date: Sun, 12 Nov 2023 23:50:27 +0800
Subject: [PATCH] support intel qat migration optimization

---
 qemu.spec                                     |   24 +-
 ...ort-intel-qat-migration-optimization.patch | 2204 +++++++++++++++++
 2 files changed, 2224 insertions(+), 4 deletions(-)
 create mode 100644 support-intel-qat-migration-optimization.patch

diff --git a/qemu.spec b/qemu.spec
index 7e42c71..4029251 100644
--- a/qemu.spec
+++ b/qemu.spec
@@ -1,9 +1,10 @@
 # Whether to support Ceph rbd storage backend
 %bcond_without rbd
+%bcond_with qat_migration
 
 Name: qemu
 Version: 6.2.0
-Release: 83
+Release: 84
 Epoch: 10
 Summary: QEMU is a generic and open source machine emulator and virtualizer
 License: GPLv2 and BSD and MIT and CC-BY-SA-4.0
@@ -599,7 +600,11 @@ Patch0584: disas-riscv-Fix-the-typo-of-inverted-order-of-pmpadd.patch
 Patch0585: softmmu-dirtylimit-Add-parameter-check-for-hmp-set_v.patch
 Patch0586: tests-Fix-printf-format-string-in-acpi-utils.c.patch
 Patch0587: hw-virtio-virtio-pmem-Replace-impossible-check-by-as.patch
-
+Patch0588: support-intel-qat-migration-optimization.patch
+%if %{with qat_migration}
+BuildRequires: QAT-devel
+Requires: QAT-devel
+%endif
 BuildRequires: flex
 BuildRequires: gcc
 BuildRequires: make
@@ -866,8 +871,10 @@ cd ../
     --enable-zstd \
     --disable-brlapi \
     --disable-plugins \
+%if %{with qat_migration}
+    --enable-qat-migration \
+%endif
     --enable-debug
-
 make %{?_smp_mflags} $buildldflags V=1
 
 cp ${qemubuilddir}/${buildarch}/qemu-system-* qemu-kvm
@@ -973,7 +980,13 @@ getent group qemu >/dev/null || groupadd -g 107 -r qemu
 getent passwd qemu >/dev/null || \
   useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \
     -c "qemu user" qemu
-
+%if %{with qat_migration}
+if [ ! `getent group qat` ]; then
+	echo "Creating qat group."
+	groupadd qat
+fi
+usermod -a -G qat qemu
+%endif
 %post guest-agent
 %systemd_post qemu-guest-agent.service
 %preun guest-agent
@@ -1172,6 +1185,9 @@ getent passwd qemu >/dev/null || \
 %endif
 
 %changelog
+* Sat Nov 11 2023 <lishaohui@xfusion.com> - 10:6.2.0-84
+- migration:support qat migration feature
+
 * Mon Oct 30 2023 <fengjiabo1@huawei.com> - 10:6.2.0-83
 - hw/virtio/virtio-pmem: Replace impossible check by assertion
 - tests: Fix printf format string in acpi-utils.c
diff --git a/support-intel-qat-migration-optimization.patch b/support-intel-qat-migration-optimization.patch
new file mode 100644
index 0000000..6581113
--- /dev/null
+++ b/support-intel-qat-migration-optimization.patch
@@ -0,0 +1,2204 @@
+From 8e12a445387aa3360431b620372f46583e14dcef Mon Sep 17 00:00:00 2001
+From: lishaohui <lishaohui@xfusion.com>
+Date: Fri, 10 Nov 2023 18:03:21 +0800
+Subject: [PATCH] intel qat migration optimization
+
+Signed-off-by: lishaohui <lishaohui@xfusion.com>
+Signed-off-by: Yi Wang <yi.c.wang@intel.com>
+Signed-off-by: Zeng Guang <guang.zeng@intel.com>
+Signed-off-by: Wei Wang <wei.w.wang@intel.com>
+---
+ configure             |   13 +-
+ migration/meson.build |    1 +
+ migration/migration.c |   30 +-
+ migration/migration.h |    4 +-
+ migration/qat.c       | 1050 ++++++++++++++++++++++++++++++
+ migration/qat.h       |   20 +
+ migration/ram.c       |  493 ++++++++++++--
+ migration/ram.h       |   21 +-
+ monitor/hmp-cmds.c    |   12 +
+ qapi/migration.json   |   11 +-
+ 10 files changed, 1588 insertions(+), 67 deletions(-)
+ create mode 100644 migration/qat.c
+ create mode 100644 migration/qat.h
+
+diff --git a/configure b/configure
+index d7a4502a8..9c9b557e3 100755
+--- a/configure
++++ b/configure
+@@ -357,6 +357,7 @@ debug_mutex="no"
+ plugins="$default_feature"
+ rng_none="no"
+ secret_keyring="$default_feature"
++qat_migration_opt=""
+ meson=""
+ ninja=""
+ gio="$default_feature"
+@@ -1133,6 +1134,8 @@ for opt do
+   --disable-vhost-kernel) vhost_kernel="no"
+   ;;
+   --enable-vhost-kernel) vhost_kernel="yes"
++  ;;
++  --enable-qat-migration) qat_migration_opt="yes"
+   ;;
+   --disable-capstone) capstone="disabled"
+   ;;
+@@ -3474,6 +3477,9 @@ fi
+ if test "$vhost_kernel" = "yes" ; then
+   echo "CONFIG_VHOST_KERNEL=y" >> $config_host_mak
+ fi
++if test "$qat_migration_opt" = "yes" ; then
++  echo "CONFIG_QAT_MIGRATION=y" >> $config_host_mak
++fi
+ if test "$vhost_user" = "yes" ; then
+   echo "CONFIG_VHOST_USER=y" >> $config_host_mak
+ fi
+@@ -3626,7 +3632,10 @@ fi
+ if test "$secret_keyring" = "yes" ; then
+   echo "CONFIG_SECRET_KEYRING=y" >> $config_host_mak
+ fi
+-
++if test "$qat_migration_opt" = "yes" ; then
++glib_cflags="$glib_cflags -I/usr/include/quickassist -I/usr/include/quickassist/include -I/usr/include/quickassist/include/dc -I/usr/include/quickassist/utilities/osal/include -I/usr/include/quickassist/utilities/osal/src/linux/user_space/include -I/usr/include/quickassist/utilities/libusdm_drv/include -I/usr/include/quickassist/lookaside/access_layer/include/ -I/usr/include/quickassist/utilities/libusdm_drv/"
++glib_libs="$glib_libs -lqat_s -lusdm_drv_s "
++fi
+ echo "ROMS=$roms" >> $config_host_mak
+ echo "MAKE=$make" >> $config_host_mak
+ echo "PYTHON=$python" >> $config_host_mak
+diff --git a/migration/meson.build b/migration/meson.build
+index f8714dcb1..f5d49d0af 100644
+--- a/migration/meson.build
++++ b/migration/meson.build
+@@ -33,3 +33,4 @@ softmmu_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
+ 
+ specific_ss.add(when: 'CONFIG_SOFTMMU',
+                 if_true: files('dirtyrate.c', 'ram.c', 'target.c'))
++specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_QAT_MIGRATION'], if_true: files('qat.c'))
+\ No newline at end of file
+diff --git a/migration/migration.c b/migration/migration.c
+index 2ec116f90..3c943eee7 100644
+--- a/migration/migration.c
++++ b/migration/migration.c
+@@ -852,6 +852,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
+     params->compress_level = s->parameters.compress_level;
+     params->has_compress_threads = true;
+     params->compress_threads = s->parameters.compress_threads;
++    params->has_compress_with_qat = true;
++    params->compress_with_qat = s->parameters.compress_with_qat;
+     params->has_compress_wait_thread = true;
+     params->compress_wait_thread = s->parameters.compress_wait_thread;
+     params->has_decompress_threads = true;
+@@ -1516,6 +1518,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
+         dest->compress_wait_thread = params->compress_wait_thread;
+     }
+ 
++    if (params->has_compress_with_qat) {
++        dest->compress_with_qat = params->compress_with_qat;
++    }
++
+     if (params->has_decompress_threads) {
+         dest->decompress_threads = params->decompress_threads;
+     }
+@@ -1617,6 +1623,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
+         s->parameters.compress_wait_thread = params->compress_wait_thread;
+     }
+ 
++    if (params->has_compress_with_qat) {
++        s->parameters.compress_with_qat = params->compress_with_qat;
++    }
++
+     if (params->has_decompress_threads) {
+         s->parameters.decompress_threads = params->decompress_threads;
+     }
+@@ -2477,6 +2487,15 @@ int migrate_compress_wait_thread(void)
+     return s->parameters.compress_wait_thread;
+ }
+ 
++bool migrate_compress_with_qat(void)
++{
++    MigrationState *s;
++
++    s = migrate_get_current();
++
++    return s->parameters.compress_with_qat;
++}
++
+ int migrate_decompress_threads(void)
+ {
+     MigrationState *s;
+@@ -3009,9 +3028,9 @@ static int postcopy_start(MigrationState *ms)
+     if (ret < 0) {
+         goto fail;
+     }
+-
++    
+     ret = bdrv_inactivate_all();
+-    if (ret < 0) {
++    if (ret < 0) {
+         goto fail;
+     }
+     restart_block = true;
+@@ -3272,7 +3291,7 @@ static void migration_completion(MigrationState *s)
+         migrate_set_state(&s->state, current_active_state,
+                           MIGRATION_STATUS_COMPLETED);
+     }
+-
++    
+     return;
+ 
+ fail_invalidate:
+@@ -3494,7 +3513,7 @@ static MigThrError migration_detect_error(MigrationState *s)
+         assert(!local_error);
+         return MIG_THR_ERR_NONE;
+     }
+-
++    
+     if (local_error) {
+         migrate_set_error(s, local_error);
+         error_free(local_error);
+@@ -4205,6 +4224,8 @@ static Property migration_properties[] = {
+                       DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
+     DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState,
+                       parameters.compress_wait_thread, true),
++    DEFINE_PROP_BOOL("x-compress-with-qat", MigrationState,
++                      parameters.compress_with_qat, false),
+     DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
+                       parameters.decompress_threads,
+                       DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
+@@ -4326,6 +4347,7 @@ static void migration_instance_init(Object *obj)
+     /* Set has_* up only for parameter checks */
+     params->has_compress_level = true;
+     params->has_compress_threads = true;
++    params->has_compress_with_qat = true;
+     params->has_decompress_threads = true;
+     params->has_compress_method = true;
+     params->has_throttle_trigger_threshold = true;
+diff --git a/migration/migration.h b/migration/migration.h
+index 4ed4f555d..f74857105 100644
+--- a/migration/migration.h
++++ b/migration/migration.h
+@@ -392,5 +392,7 @@ bool migration_rate_limit(void);
+ void migration_cancel(const Error *error);
+ 
+ void populate_vfio_info(MigrationInfo *info);
+-
++#ifdef CONFIG_QAT_MIGRATION
++bool migrate_compress_with_qat(void);
++#endif
+ #endif
+diff --git a/migration/qat.c b/migration/qat.c
+new file mode 100644
+index 000000000..8c4b33eec
+--- /dev/null
++++ b/migration/qat.c
+@@ -0,0 +1,1050 @@
++#include <sys/epoll.h>
++#include <unistd.h>
++#include <string.h>
++#include <stdlib.h>
++#include "qemu/osdep.h"
++#include "cpu.h"
++#include "qemu/thread.h"
++#include "qemu/log.h"
++#include "exec/ram_addr.h"
++#include "migration.h"
++#include "qemu-file.h"
++#include <cpa.h>
++#include <cpa_dc.h>
++#include <icp_sal_poll.h>
++#include <icp_sal_user.h>
++#include <qae_mem.h>
++#include <qae_mem_utils.h>
++#include "qat.h"
++#include "qemu-common.h"
++#define QAT_COMP_REQ_BUF_SIZE (RAM_SAVE_MULTI_PAGE_NUM << TARGET_PAGE_BITS)
++#define QAT_DECOMP_REQ_BUF_SIZE (RAM_SAVE_MAX_PAGE_NUM << TARGET_PAGE_BITS)
++
++#define MAX_PROCESS_NUM 4
++static int g_instReqCacheNum = 32;
++
++typedef struct _qat_inst_t {
++    uint16_t id; // debug purpose
++    int fd;
++    uint32_t node_affinity;
++    uint32_t req_cache_num;
++    CpaInstanceHandle inst_handle;
++    CpaDcSessionHandle sess_handle;
++    QLIST_HEAD(, qat_req_t) req_cache_list;
++    int src_buf_num;
++    int src_buf_size;
++    int dst_buf_num;
++    int dst_buf_size;
++    int queue_depth;
++} qat_inst_t;
++typedef struct qat_req_t {
++     /*
++      * For decompression, stores the checkum passed from the compression side.
++      * For compresssion, not used.
++      */
++     uint32_t checksum;
++     uint64_t id; // debug purpose
++     qat_inst_t *inst;
++     RAMBlock *block;
++     ram_addr_t offset;
++     MultiPageAddr mpa;
++     CpaBufferList *src_buf_list;
++     CpaBufferList *dst_buf_list;
++     CpaDcRqResults result;
++     uint32_t expect;
++     QLIST_ENTRY(qat_req_t) node;
++} qat_req_t;
++typedef struct _qat_dev_t {
++    bool svm_enabled;
++    bool zero_copy;
++    qat_setup_type_t type;
++    QEMUFile *f;
++    uint16_t inst_num;
++    CpaInstanceHandle *inst_handles;
++    uint32_t meta_buf_size;
++    qat_inst_t *insts;
++    QemuThread epoll_thread;
++    int efd;
++    /* Fill instances in round robin */
++    int rr_inst_id;
++    uint64_t requests;
++    uint64_t responses;
++    uint64_t overflow;
++    QLIST_HEAD(, qat_req_t) req_post_list;
++    QemuSpin lock;
++    uint32_t req_post_num;
++    bool flush_stage;
++    int state; // -1 indicates error state
++    QemuMutex mutex;
++    QemuCond cond;
++} qat_dev_t;
++static qat_dev_t *qat_dev = NULL;
++static bool epoll_thread_running = false;
++
++typedef void* (*qat_mem_alloc_t)(Cpa32U size, Cpa32U node, Cpa32U alignment);
++typedef void (*qat_mem_free_t)(void **p);
++typedef uint64_t (*qat_addr_translate_t)(void *virt);
++
++static qat_mem_alloc_t qat_mem_alloc = NULL;
++static qat_mem_free_t qat_mem_free = NULL;
++static qat_addr_translate_t qat_addr_translate = NULL;
++int qat_send_req(qat_req_t *req);
++static void *qat_mem_alloc_phy(Cpa32U size, Cpa32U node, Cpa32U alignment)
++{
++    return qaeMemAllocNUMA(size, node, alignment);
++}
++
++static void qat_mem_free_phy(void **p)
++{
++    if (NULL != *p) {
++        qaeMemFreeNUMA(p);
++        *p = NULL;
++    }
++}
++static void* qat_mem_alloc_virt(Cpa32U size, Cpa32U node, Cpa32U alignment)
++{
++    return malloc(size);
++}
++
++static void qat_mem_free_virt(void **p)
++{
++    if (p != NULL && NULL != *p) {
++        free((void*)*p);
++        *p = NULL;
++    }
++}
++static uint64_t qat_virt_to_phy_svm0(void *vaddr)
++{
++    uint64_t paddr = qaeVirtToPhysNUMA(vaddr);
++    if (!paddr)
++        error_report("%s: meta_buf fail to get pa for vaddr=%p", __func__, vaddr);
++    return paddr;
++}
++static uint64_t qat_virt_to_phy_svm1(void *vaddr)
++{
++    return (uint64_t)vaddr;
++}
++static CpaBufferList *qat_buf_list_alloc(int nodeid, int buf_num, int buf_size)
++{
++    CpaBufferList *buf_list = NULL;
++    Cpa8U *meta_buf = NULL;
++    CpaFlatBuffer *flat_buf = NULL;
++    Cpa32U buf_list_size;
++
++    buf_list_size = sizeof(CpaBufferList) + sizeof(CpaFlatBuffer) * buf_num;
++
++    buf_list = g_malloc0(buf_list_size);
++    if (unlikely(buf_list == NULL)) {
++        error_report("%s: unable to alloc buf list", __func__);
++        return NULL;
++    }
++
++    meta_buf = qat_mem_alloc(qat_dev->meta_buf_size, nodeid, 64);
++    if (unlikely(meta_buf == NULL)) {
++        error_report("%s: unable to alloc src_meta_buf", __func__);
++        goto err_free_buf_list;
++    }
++    flat_buf = (CpaFlatBuffer*)(buf_list + 1);
++    if (buf_size) {
++        for (int i = 0; i < buf_num; i++) {
++            flat_buf[i].pData = qat_mem_alloc(buf_size, nodeid, 64);
++            if (!flat_buf[i].pData) {
++                error_report("%s: unable to alloc src buf", __func__);
++                goto err_free_meta_buf;
++            }
++            flat_buf[i].dataLenInBytes = buf_size;
++        }
++    }
++
++    buf_list->pPrivateMetaData = meta_buf;
++    buf_list->pBuffers = flat_buf;
++    buf_list->numBuffers = buf_num;
++
++    return buf_list;
++err_free_buf_list:
++    g_free(buf_list);
++err_free_meta_buf:
++    qat_mem_free((void **)&meta_buf);
++    return NULL;
++}
++
++static void qat_buf_list_set_bufs_from_mpa(CpaBufferList *buf_list,
++                                           unsigned long addr_base,
++                                           MultiPageAddr *mpa)
++{
++    uint64_t start, offset, addr, pages;
++    CpaFlatBuffer *flat_buf;
++    uint8_t *p;
++    flat_buf = (CpaFlatBuffer *)(buf_list + 1);
++    flat_buf->dataLenInBytes = 0;
++    p = flat_buf->pData;
++    for (int i = 0; i < mpa->last_idx; i++) {
++        start = multi_page_addr_get_one(mpa, i);
++        pages = start & (~TARGET_PAGE_MASK);
++        start >>= TARGET_PAGE_BITS;
++        for (int j = 0; j < pages; j++) {
++            offset = (start + j) << TARGET_PAGE_BITS;
++            addr = addr_base + offset;
++            if (qat_dev->zero_copy) {
++                int b = ((int*)(addr))[0]; b--; // avoid page fault
++                flat_buf->pData = (uint8_t*)(addr);
++                flat_buf->dataLenInBytes = TARGET_PAGE_SIZE;
++                flat_buf++;
++            } else {
++                if (qat_dev->type == QAT_SETUP_COMPRESS) {
++                    // only compression needs this copy
++                    memcpy(p, (uint8_t*)(addr), TARGET_PAGE_SIZE);
++                    p += TARGET_PAGE_SIZE;
++                    flat_buf->dataLenInBytes += TARGET_PAGE_SIZE;
++                }
++            }
++        }
++    }
++
++    buf_list->numBuffers = qat_dev->zero_copy ? mpa->pages : 1;
++}
++
++static void qat_buf_list_free(CpaBufferList *buf_list, bool buf_allocated)
++{
++    CpaFlatBuffer *flat_buf;
++
++    if (unlikely(buf_list == NULL))
++        return;
++
++    if (buf_list->pPrivateMetaData)
++        qat_mem_free((void**)&buf_list->pPrivateMetaData);
++
++    flat_buf = buf_list->pBuffers;
++    if (unlikely(flat_buf == NULL))
++        return;
++
++    if (buf_allocated) {
++        for (int i = 0; i < buf_list->numBuffers; i++) {
++            if (!flat_buf[i].pData)
++                continue;
++            qat_mem_free((void**)&flat_buf[i].pData);
++        }
++    }
++
++    g_free(buf_list);
++}
++
++static void qat_inst_req_free(qat_req_t *req)
++{
++    qat_inst_t *inst = req->inst;
++    if (inst->req_cache_num < g_instReqCacheNum) {
++        QLIST_INSERT_HEAD(&inst->req_cache_list, req, node);
++        inst->req_cache_num++;
++    } else {
++        qat_buf_list_free(req->src_buf_list, inst->src_buf_size);
++        qat_buf_list_free(req->dst_buf_list, inst->dst_buf_size);
++        g_free(req);
++    }
++}
++static void qat_inst_req_free_lock(qat_req_t *req)
++{
++    qemu_spin_lock(&qat_dev->lock);
++    qat_inst_req_free(req);
++    qemu_spin_unlock(&qat_dev->lock);
++}
++static qat_req_t *qat_inst_req_alloc_cache(qat_inst_t *inst)
++{
++    qat_req_t *req = NULL;
++    if (!inst->req_cache_num)
++        return NULL;
++
++    req = QLIST_FIRST(&inst->req_cache_list);
++    QLIST_REMOVE(req, node);
++    inst->req_cache_num--;
++
++    return req;
++}
++
++static qat_req_t *qat_inst_req_alloc_slow(qat_inst_t *inst)
++{
++    qat_req_t *req;
++    CpaBufferList *src_buf_list, *dst_buf_list;
++
++    req = g_malloc0(sizeof(qat_req_t));
++    src_buf_list = qat_buf_list_alloc(inst->node_affinity,
++        inst->src_buf_num, inst->src_buf_size);
++    if (unlikely(src_buf_list == NULL))
++        goto err_src;
++
++    dst_buf_list = qat_buf_list_alloc(inst->node_affinity,
++        inst->dst_buf_num, inst->dst_buf_size);
++    if (unlikely(dst_buf_list == NULL))
++        goto err_dst;
++
++    req->src_buf_list = src_buf_list;
++    req->dst_buf_list = dst_buf_list;
++    req->inst = inst;
++
++    return req;
++err_dst:
++    qat_buf_list_free(src_buf_list, inst->src_buf_size);
++err_src:
++    g_free(req);
++    error_report("%s: fail to alloc a qat req", __func__);
++    return NULL;
++}
++
++static qat_req_t *qat_inst_req_alloc(qat_inst_t *inst)
++{
++    qat_req_t *req = qat_inst_req_alloc_cache(inst);
++
++    if (unlikely(req == NULL)) {
++        req = qat_inst_req_alloc_slow(inst);
++    }
++
++    return req;
++}
++
++static qat_req_t *qat_inst_req_alloc_lock(qat_inst_t *inst)
++{
++    qemu_spin_lock(&qat_dev->lock);
++    qat_req_t *req = qat_inst_req_alloc(inst);
++    qemu_spin_unlock(&qat_dev->lock);
++    return req;
++}
++
++static void compress_callback(void *cb_ctx, CpaStatus status)
++{
++    qat_req_t *req = (qat_req_t*)cb_ctx;
++    if (unlikely(req == NULL)) {
++        error_report("%s: Compression with NULL request ptr", __func__);
++        return;
++    }
++    req->inst->queue_depth--;
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        qat_inst_req_free(req);
++        qat_dev->responses++;
++        error_report("%s: Compress error: %x, ram addr: %lx", __func__, status, req->offset);
++        qat_dev->state = -1;
++        return;
++    }
++
++    // put the req into the send list
++    qemu_spin_lock(&qat_dev->lock);
++    if (!qat_dev->flush_stage) {
++        QLIST_INSERT_HEAD(&qat_dev->req_post_list, req, node);
++        qat_dev->req_post_num++;
++    } else {
++        qat_req_t *prev_req;
++        while (qat_dev->req_post_num > 0) {
++            prev_req = QLIST_FIRST(&qat_dev->req_post_list);
++            QLIST_REMOVE(prev_req, node);
++            qat_dev->req_post_num--;
++            qat_send_req(prev_req);
++        }
++        qat_send_req(req);
++        if (qat_dev->requests == qat_dev->responses) {
++            qemu_cond_signal(&qat_dev->cond);
++        }
++    }
++
++    qemu_spin_unlock(&qat_dev->lock);
++}
++
++static void decompress_copy_to_guest_memory(qat_req_t *req)
++{
++    MultiPageAddr *mpa = &req->mpa;
++    uint8_t *p = req->dst_buf_list->pBuffers[0].pData;
++    unsigned long start, pages;
++    uint8_t *dst_buf;
++
++    for (int i = 0; i < mpa->last_idx; i++) {
++        start = multi_page_addr_get_one(&req->mpa, i);
++        pages = start & (~TARGET_PAGE_MASK);
++        start &= TARGET_PAGE_MASK;
++        for (int j = 0; j < pages; j++) {
++            dst_buf = req->block->host + start + (j << TARGET_PAGE_BITS);
++            memcpy(dst_buf, p, TARGET_PAGE_SIZE);
++            p += TARGET_PAGE_SIZE;
++        }
++    }
++}
++
++static void decompress_callback(void *cb_ctx, CpaStatus status)
++{
++    qat_req_t *req = (qat_req_t*)cb_ctx;
++    CpaDcRqResults *result;
++
++    if (unlikely(req == NULL)) {
++        error_report("%s: Compression with NULL request ptr", __func__);
++        return;
++    }
++    req->inst->queue_depth--;
++    result = &req->result;
++
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: Decompress failed %d, ram addr=%lx, req->id=%ld",
++                  __func__, status, req->offset, req->id);
++        qat_dev->state = -1;
++        goto decompress_err;
++    } else if (unlikely(result->checksum != req->checksum)) {
++        error_report("%s: error, checksum unmatch", __func__);
++        qat_dev->state = -1;
++        goto decompress_err;
++    } else if (unlikely((result->status != CPA_DC_OK) &&
++        (result->status == CPA_DC_OVERFLOW))) {
++        error_report("%s: Decompress error: %d, consumed: %d, produced: %d",
++                __func__, result->status, result->consumed, result->produced);
++        qat_dev->state = -1;
++        goto decompress_err;
++    } else if (unlikely(result->produced != req->expect)) {
++        error_report("%s: unmatched, consumed=%d, produced=%d, expect=%d",
++                __func__, result->consumed, result->produced, req->expect);
++        qat_dev->state = -1;
++        goto decompress_err;
++    }
++
++    if (!qat_dev->zero_copy) {
++        decompress_copy_to_guest_memory(req);
++    }
++
++decompress_err:
++    qat_inst_req_free_lock(req);
++    qat_dev->responses++;
++    if (qat_dev->flush_stage && (qat_dev->requests == qat_dev->responses)) {
++        qemu_cond_signal(&qat_dev->cond);
++    }
++}
++
++static int qat_inst_session_setup(qat_inst_t *inst, qat_setup_type_t type)
++{
++    CpaInstanceHandle inst_handle = inst->inst_handle;
++    CpaDcInstanceCapabilities cap = {0};
++    CpaDcSessionHandle sess_handle = NULL;
++    Cpa32U session_size = 0, ctx_size = 0;
++    CpaDcSessionSetupData sd = { 0 };
++    CpaDcCallbackFn session_callback;
++    CpaStatus status;
++
++    sd.compLevel = migrate_compress_level();
++    sd.compType = CPA_DC_DEFLATE;
++    sd.huffType = CPA_DC_HT_FULL_DYNAMIC;
++    sd.autoSelectBestHuffmanTree = CPA_DC_ASB_DISABLED;
++    sd.sessState = CPA_DC_STATELESS;
++#if (CPA_DC_API_VERSION_NUM_MAJOR == 1 && CPA_DC_API_VERSION_NUM_MINOR < 6)
++    sd.deflateWindowSize = 7;
++#endif
++    sd.checksum = CPA_DC_CRC32;
++    if (type == QAT_SETUP_COMPRESS) {
++        sd.sessDirection = CPA_DC_DIR_COMPRESS;
++        session_callback = compress_callback;
++    } else {
++        sd.sessDirection = CPA_DC_DIR_DECOMPRESS;
++        session_callback = decompress_callback;
++    }
++
++    status = cpaDcQueryCapabilities(inst_handle, &cap);
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: fail to get cap", __func__);
++        return -1;
++    }
++    if (unlikely(!cap.checksumCRC32 || !cap.compressAndVerify)) {
++        error_report("%s: checksum isn't supported", __func__);
++        return -1;
++    }
++
++    status = cpaDcGetSessionSize(inst_handle, &sd, &session_size, &ctx_size);
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: fail to get session size", __func__);
++        return -1;
++    }
++
++    sess_handle = qat_mem_alloc(session_size + ctx_size,
++                                inst->node_affinity, 64);
++    if (unlikely(sess_handle == NULL)) {
++        error_report("%s: fail to alloc session handle", __func__);
++        return -1;
++    }
++
++    status = cpaDcInitSession(inst_handle, sess_handle, &sd,
++                              NULL, session_callback);
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: fail to init session", __func__);
++        goto err_free_sess_handle;
++    }
++    inst->sess_handle = sess_handle;
++
++    return 0;
++err_free_sess_handle:
++    qat_mem_free((void **)&sess_handle);
++    return -1;
++}
++
++static int qat_inst_add_to_epoll(qat_inst_t *inst)
++{
++    CpaStatus status;
++    struct epoll_event event;
++    int fd, ret;
++
++    status = icp_sal_DcGetFileDescriptor(inst->inst_handle, &fd);
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: fail to get instance poll fd", __func__);
++        return -1;
++    }
++
++    event.data.fd = fd;
++    event.events = EPOLLIN | EPOLLET;
++    ret = epoll_ctl(qat_dev->efd, EPOLL_CTL_ADD, fd, &event);
++    if (unlikely(ret < 0)) {
++        error_report("%s: fail to add to epoll list, ret=%d", __func__, ret);
++        return -1;
++    }
++    inst->fd = fd;
++
++    return 0;
++}
++
++static inline int qat_poll_insts(void)
++{
++    for (int i = 0; i < qat_dev->inst_num; i++) {
++        qat_inst_t *inst = &qat_dev->insts[i];
++        CpaStatus status = icp_sal_DcPollInstance(inst->inst_handle, 0);
++        if (unlikely((status != CPA_STATUS_SUCCESS) &&
++            (status != CPA_STATUS_RETRY))) {
++            error_report("%s: fail to poll instance, i=%d, status=%d", __func__, i, status);
++            qat_dev->state = -1;
++            continue;
++        }
++    }
++
++    return 0;
++}
++
++void *qat_epoll_thread_run(void *arg)
++{
++    int maxevents = (int)(qat_dev->inst_num);
++    struct epoll_event *events =
++                       g_malloc0(sizeof(struct epoll_event) * maxevents);
++    while (epoll_thread_running) {
++        epoll_wait(qat_dev->efd, events, maxevents, 100);
++        qat_poll_insts();
++    }
++
++    g_free(events);
++    return NULL;
++}
++
++static inline qat_inst_t *qat_select_inst_rr(void)
++{
++    qat_dev->rr_inst_id = (qat_dev->rr_inst_id + 1) % qat_dev->inst_num;
++    return &qat_dev->insts[qat_dev->rr_inst_id];
++}
++
++static qat_req_t *qat_get_compress_req(qat_inst_t *inst,
++                                    RAMBlock *block,
++                                    MultiPageAddr *mpa)
++{
++    qat_req_t *req = qat_inst_req_alloc(inst);
++    if (unlikely(req == NULL))
++        return NULL;
++
++    req->block = block;
++    req->offset = multi_page_addr_get_one(mpa, 0);
++
++    qat_buf_list_set_bufs_from_mpa(req->src_buf_list,
++                                   (uint64_t)block->host, mpa);
++    if (qat_dev->zero_copy) {
++        // avoid page fault
++        uint8_t *p = req->dst_buf_list->pBuffers[0].pData;
++        for (int i = 0; i < RAM_SAVE_MULTI_PAGE_NUM; i++) {
++            uint8_t a = p[0]; a--;
++            p += TARGET_PAGE_SIZE;
++        }
++    }
++    memcpy(&req->mpa, mpa, sizeof(MultiPageAddr));
++    return req;
++}
++static qat_req_t *qat_get_decompress_req(qat_inst_t *inst,
++                                      QEMUFile *f,
++                                      RAMBlock *block,
++                                      int src_bytes,
++                                      MultiPageAddr *mpa)
++{
++    qat_req_t *req = qat_inst_req_alloc_lock(inst);
++    if (unlikely(req == NULL))
++        return NULL;
++    
++    req->block = block;
++    req->offset = multi_page_addr_get_one(mpa, 0);
++    req->expect = mpa->pages * TARGET_PAGE_SIZE;
++    if (qat_dev->zero_copy) {
++        qat_buf_list_set_bufs_from_mpa(req->dst_buf_list,
++                                   (uint64_t)block->host, mpa);
++    } else {
++        memcpy(&req->mpa, mpa, sizeof(MultiPageAddr));
++    }
++ 
++    size_t size = qemu_get_buffer(f, req->src_buf_list->pBuffers[0].pData, src_bytes);
++    if (unlikely(size != src_bytes)) {
++        error_report("%s: not read enough data, %d, %lu", __func__, src_bytes, size);
++        return NULL;
++    }
++    req->src_buf_list->pBuffers[0].dataLenInBytes = src_bytes;
++
++    return req;
++}
++int qat_send_req(qat_req_t *req)
++{
++    CpaBufferList *buf_list;
++    CpaDcRqResults *result;
++    buf_list = req->dst_buf_list;
++    result = &req->result; 
++    if (likely(result->status == CPA_DC_OK)) {
++        save_compressed_page_header(req->block,
++                                    &req->mpa,
++                                    (uint64_t)result->produced,
++                                    result->checksum);
++        save_compressed_data((void*)buf_list->pBuffers[0].pData, result->produced);
++        compression_counters.compressed_size += result->produced;
++        compression_counters.pages += req->mpa.pages;
++    } else if (result->status == CPA_DC_OVERFLOW) {
++        qat_dev->overflow++;
++        save_uncompressed_page(req->block, &req->mpa);
++    }
++ 
++    qat_dev->responses++;
++    qat_inst_req_free(req);
++    return 0;
++}
++
++void qat_flush_data_compress(void)
++{
++    qat_req_t *req;
++    if (qat_dev->responses == qat_dev->requests) {
++        return;
++    }
++    qemu_spin_lock(&qat_dev->lock);
++    qat_dev->flush_stage = true;
++    while (qat_dev->req_post_num > 0) {
++        req = QLIST_FIRST(&qat_dev->req_post_list);
++        QLIST_REMOVE(req, node);
++        qat_dev->req_post_num--;
++        qat_send_req(req);
++    }
++    qemu_spin_unlock(&qat_dev->lock);
++ 
++    while (qat_dev->responses != qat_dev->requests) {
++        qemu_cond_timedwait(&qat_dev->cond, &qat_dev->mutex, 1);
++    }
++    qemu_spin_lock(&qat_dev->lock);
++    qat_dev->flush_stage = false;
++    qemu_spin_unlock(&qat_dev->lock);
++}
++void qat_flush_data_decompress(void)
++{
++    if (qat_dev->responses == qat_dev->requests)
++        return;
++
++    qat_dev->flush_stage = true;
++    while (qat_dev->responses != qat_dev->requests)
++        qemu_cond_timedwait(&qat_dev->cond, &qat_dev->mutex, 1);
++
++    qat_dev->flush_stage = false;
++}
++
++int qat_compress_page(RAMBlock *block, MultiPageAddr *mpa)
++{
++    CpaStatus status;
++    qat_inst_t *inst;
++    qat_req_t *req;
++    int ret = qat_save_zero_page(block, mpa);
++    if (ret < 0) {
++        error_report("%s: qat_save_zero_page failed", __func__);
++        return -1;
++    }
++    if (mpa->pages == 0) {
++        // all zero-pages
++        return 0;
++    }
++    while (1) {
++        qemu_spin_lock(&qat_dev->lock);
++        if (!qat_dev->req_post_num) {
++            qemu_spin_unlock(&qat_dev->lock);
++            break;
++        }
++        req = QLIST_FIRST(&qat_dev->req_post_list);
++        QLIST_REMOVE(req, node);
++        qat_dev->req_post_num--;
++        qemu_spin_unlock(&qat_dev->lock);
++
++        qat_send_req(req);
++    }
++
++    inst = qat_select_inst_rr();
++    req = qat_get_compress_req(inst, block, mpa);
++    if (unlikely(req == NULL)) {
++        error_report("%s: qat get NULL request ptr for compression!", __func__);
++        return -1;
++    }
++    qat_dev->requests++;
++    req->id = qat_dev->requests;
++    req->result.checksum = 0;
++    while (inst->queue_depth >= g_instReqCacheNum)
++        usleep(100);
++    do {
++        status = cpaDcCompressData(inst->inst_handle,
++                               inst->sess_handle,
++                               req->src_buf_list,
++                               req->dst_buf_list,
++                               &req->result,
++                               CPA_DC_FLUSH_FINAL,
++                               req);
++        if (likely(status == CPA_STATUS_SUCCESS)) {
++            inst->queue_depth++;
++            break;
++        } else if (status == CPA_STATUS_RETRY) {
++            usleep(100);
++        } else {        
++            error_report("%s: requests=%ld, fail to compress, status=%d",
++                     __func__, qat_dev->requests, status);
++            qat_inst_req_free(req);
++            qat_dev->requests--;
++            return -1;
++        }
++    } while (status == CPA_STATUS_RETRY);
++
++    return 0;
++}
++
++int qat_decompress_page(QEMUFile *f, RAMBlock *block, int bytes,
++                        MultiPageAddr *mpa, uint32_t checksum)
++{
++    CpaStatus status;
++    if (qat_dev->state < 0) {
++        error_report("%s: error state", __func__);
++        return -1;
++    }
++    if (unlikely((block == NULL) || (bytes == 0))) {
++        error_report("%s: invalid param, block=%p, bytes=%d", __func__, block, bytes);
++        return -1;
++    }
++    qat_dev->requests++;
++    qat_inst_t *inst = qat_select_inst_rr();
++    qat_req_t *req = qat_get_decompress_req(inst, f, block, bytes, mpa);
++    if (unlikely(req == NULL)) {
++        error_report("%s: fail to get a req", __func__);
++        return -1;
++    }
++    req->id = qat_dev->requests;
++    req->checksum = checksum;
++    req->result.checksum = 0;
++    while (inst->queue_depth >= g_instReqCacheNum)
++        usleep(100);
++    do {
++        status = cpaDcDecompressData(inst->inst_handle,
++                                 inst->sess_handle,
++                                 req->src_buf_list,
++                                 req->dst_buf_list,
++                                 &req->result,
++                                 CPA_DC_FLUSH_FINAL,
++                                 req);
++        if (likely(status == CPA_STATUS_SUCCESS)) {
++            inst->queue_depth++;
++            return 0;
++        } else if (status == CPA_STATUS_RETRY) {
++            usleep(100);
++        } else {
++            error_report("%s: requests=%ld, fail to decompress, status=%d",
++                __func__, qat_dev->requests, status);
++            qat_inst_req_free_lock(req);
++            return -1;
++        }
++    } while (status == CPA_STATUS_RETRY);
++
++    return 0;
++}
++
++static void qat_inst_req_cache_list_cleanup(qat_inst_t *inst)
++{
++    qat_req_t *req = NULL, *req_next = NULL;
++    QLIST_FOREACH_SAFE(req, &inst->req_cache_list, node, req_next) {
++        QLIST_REMOVE(req, node);
++        qat_buf_list_free(req->src_buf_list, inst->src_buf_size);
++        qat_buf_list_free(req->dst_buf_list, inst->dst_buf_size);
++        g_free(req);
++        inst->req_cache_num--;
++    }
++
++    /* Sanity check */
++    if (inst->req_cache_num) {
++        error_report("%s: req_cache_num incorrect :%u", __func__, inst->req_cache_num);
++    }
++}
++
++static int qat_inst_req_cache_list_setup(qat_inst_t *inst)
++{
++    qat_req_t *req;
++    inst->req_cache_num = 0;
++    QLIST_INIT(&inst->req_cache_list);
++
++    for (int i = 0; i < g_instReqCacheNum; i++) {
++        req = qat_inst_req_alloc_slow(inst);
++        if (unlikely(req == NULL)) {
++            error_report("%s: req pre-alloc failed", __func__);
++            return -1;
++        }
++
++        QLIST_INSERT_HEAD(&inst->req_cache_list, req, node);
++        inst->req_cache_num++;
++    }
++
++    return 0;
++}
++
++static int qat_inst_setup(qat_inst_t *inst, qat_setup_type_t type)
++{
++    CpaInstanceInfo2 inst_info;
++    CpaInstanceHandle inst_handle = inst->inst_handle;
++    CpaStatus status;
++
++    status = cpaDcInstanceGetInfo2(inst_handle, &inst_info);
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: fail to get instance info, status = %x", __func__, status);
++        return -1;
++    }
++    inst->node_affinity = inst_info.nodeAffinity;
++    if (type == QAT_SETUP_DECOMPRESS) {
++        inst->src_buf_num = 1;
++        inst->src_buf_size = QAT_DECOMP_REQ_BUF_SIZE;
++        inst->dst_buf_num = qat_dev->zero_copy ? RAM_SAVE_MAX_PAGE_NUM: 1;
++        inst->dst_buf_size = qat_dev->zero_copy ? 0: QAT_DECOMP_REQ_BUF_SIZE;
++    } else {
++        inst->src_buf_num = qat_dev->zero_copy ? RAM_SAVE_MULTI_PAGE_NUM: 1;
++        inst->src_buf_size = qat_dev->zero_copy ? 0: QAT_COMP_REQ_BUF_SIZE;
++        inst->dst_buf_num = 1;
++        inst->dst_buf_size = QAT_COMP_REQ_BUF_SIZE;
++    }
++    status = cpaDcSetAddressTranslation(inst_handle, qat_addr_translate);
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: unable to set address translation", __func__);
++        return -1;
++    }
++
++    status = cpaDcStartInstance(inst_handle, 0, NULL);
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: fail to start", __func__);
++        return -1;
++    }
++
++    if (qat_inst_session_setup(inst, type) < 0)
++        return -1;
++
++    if (qat_inst_add_to_epoll(inst) < 0)
++        return -1;
++
++    if (qat_inst_req_cache_list_setup(inst) < 0)
++        return -1;
++    inst->queue_depth = 0;
++    return 0;
++}
++
++static void qat_inst_cleanup(qat_inst_t *inst)
++{
++    CpaDcSessionHandle sess_handle = inst->sess_handle;
++    CpaInstanceHandle inst_handle = inst->inst_handle;
++    CpaStatus status;
++
++    qat_inst_req_cache_list_cleanup(inst);
++    /* Close the DC Session */
++    status = cpaDcRemoveSession(inst_handle, sess_handle);
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: fail to remove session, status=%d", __func__, status);
++        return;
++    }
++
++    status = cpaDcStopInstance(inst_handle);
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: fail to remove session, status=%d", __func__, status);
++        return;
++    }
++
++    qat_mem_free((void **)&sess_handle);
++}
++
++static int check_qat_svm_status(CpaInstanceHandle inst_handle,
++                                bool *svm_enabled)
++{
++    CpaInstanceInfo2 inst_info;
++    CpaStatus status;
++    status = cpaDcInstanceGetInfo2(inst_handle, &inst_info);
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: cpaDcInstanceGetInfo2() failed", __func__);
++        return -1;
++    }
++    *svm_enabled = inst_info.requiresPhysicallyContiguousMemory? false : true;
++    return 0;
++}
++
++static int get_meta_buf_size(CpaInstanceHandle inst_handle,
++                            uint32_t *meta_buf_size)
++{
++    CpaStatus status;
++    status = cpaDcBufferListGetMetaSize(inst_handle, RAM_SAVE_MAX_PAGE_NUM,
++                                        meta_buf_size);
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: fail to get memory size for meta data", __func__);
++        return -1;
++    }
++    return 0;
++}
++int qat_setup(qat_setup_type_t type)
++{
++    uint16_t inst_num;
++    int ret, processNum, i;
++    CpaStatus status;
++    char ProcessNamePrefix[] = "SSL";
++    char ProcessName[10] = "\0";
++    if (!migrate_use_compression()) {
++        return 0;
++    }
++    status = qaeMemInit();
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: unable to init qaeMEM", __func__);
++        return -1;
++    }
++
++    for (processNum = 0; processNum < MAX_PROCESS_NUM; processNum++) {
++        sprintf(ProcessName, "%s%d", ProcessNamePrefix, processNum);
++        status = icp_sal_userStart(processNum ? ProcessName : ProcessNamePrefix);
++        if (status == CPA_STATUS_SUCCESS) {
++            break;
++        }
++    }
++
++    if (processNum == MAX_PROCESS_NUM && status != CPA_STATUS_SUCCESS) {
++        error_report("%s: unable to start SAL, status=%d", __func__, status);
++        return -1;
++    }
++    qat_dev = g_malloc0(sizeof(qat_dev_t));
++    qat_dev->type = type;
++    qemu_spin_init(&qat_dev->lock);
++    QLIST_INIT(&qat_dev->req_post_list);
++    qat_dev->req_post_num = 0;
++    qat_dev->flush_stage = false;
++    qat_dev->state = 0;
++    qemu_cond_init(&qat_dev->cond);
++    qemu_mutex_init(&qat_dev->mutex);
++
++    status = cpaDcGetNumInstances(&inst_num);
++    if (unlikely((status != CPA_STATUS_SUCCESS) || (inst_num == 0))) {
++        error_report("%s: no qat instance available", __func__);
++        goto err_free_qat_dev;
++    }
++    qat_dev->inst_num = inst_num;
++
++    qat_dev->inst_handles = g_malloc0(sizeof(CpaInstanceHandle) * inst_num);
++    qat_dev->insts = g_malloc0(sizeof(qat_inst_t) * inst_num);
++    status = cpaDcGetInstances(inst_num, qat_dev->inst_handles);
++    if (unlikely(status != CPA_STATUS_SUCCESS)) {
++        error_report("%s: unable to get instance handles", __func__);
++        goto err_free_qat_dev;
++    }
++
++    // Here we only check the first instance for simplicity. System administrator
++    // should make sure all instances have the same configuration.
++    ret = check_qat_svm_status(qat_dev->inst_handles[0], &qat_dev->svm_enabled);
++    if (unlikely(ret != 0)) {
++        error_report("%s: failed to check qat svm status", __func__);
++        goto err_free_qat_dev;
++    }
++
++    if (qat_dev->svm_enabled) {
++        qat_dev->zero_copy = true;
++        qat_mem_alloc = qat_mem_alloc_virt;
++        qat_mem_free = qat_mem_free_virt;
++        qat_addr_translate = qat_virt_to_phy_svm1;
++    } else {
++        qat_dev->zero_copy = false;
++        qat_mem_alloc = qat_mem_alloc_phy;
++        qat_mem_free = qat_mem_free_phy;
++        qat_addr_translate = qat_virt_to_phy_svm0;
++    }
++
++    ret = get_meta_buf_size(qat_dev->inst_handles[0], &qat_dev->meta_buf_size);
++    if (unlikely(ret != 0)) {
++        error_report("%s: unable to get instance handles", __func__);
++        goto err_free_qat_dev;
++    }
++
++    qat_dev->efd = epoll_create1(0);
++    if (unlikely(qat_dev->efd < 0)) {
++        error_report("%s: fail to create epoll fd", __func__);
++        goto err_free_qat_dev;
++    }
++    epoll_thread_running = true;
++
++    for (i = 0; i < inst_num; i++) {
++        qat_dev->insts[i].id = i;
++        qat_dev->insts[i].inst_handle = qat_dev->inst_handles[i];
++        ret = qat_inst_setup(&qat_dev->insts[i], type);
++        if (unlikely(ret != 0)) {
++            goto err_inst_cleanup;
++        }
++    }
++    qemu_thread_create(&qat_dev->epoll_thread, "qat_epoll_thread",
++                       qat_epoll_thread_run, qat_dev, QEMU_THREAD_JOINABLE);
++    if (unlikely(ret != 0)) {
++        goto err_inst_cleanup;
++    }
++
++    info_report("%s: section=SSL%d, inst_num=%d, zero_copy=%d",
++            __func__, processNum, inst_num, qat_dev->zero_copy);
++    info_report("%s: cache_req_num=%d, MULTI_PAGE_NUM=%d, MAX_PAGE_NUM=%d",
++            __func__, g_instReqCacheNum, RAM_SAVE_MULTI_PAGE_NUM, RAM_SAVE_MAX_PAGE_NUM);
++    return 0;
++err_inst_cleanup:
++    while (i >= 0) {
++        if (qat_dev->insts[i].inst_handle)
++            qat_inst_cleanup(&qat_dev->insts[i]);
++        i--;
++    }
++
++err_free_qat_dev:
++    if (qat_dev) {
++        if (qat_dev->inst_handles)
++            g_free(qat_dev->inst_handles);
++        if (qat_dev->insts)
++            g_free(qat_dev->insts);
++        if (qat_dev->efd)
++            close(qat_dev->efd);
++        qemu_cond_destroy(&qat_dev->cond);
++        qemu_mutex_destroy(&qat_dev->mutex);
++        g_free(qat_dev);
++    }
++    return -1;
++}
++
++void qat_cleanup(void)
++{
++    if (!migrate_use_compression())
++        return;
++    if (unlikely(qat_dev == NULL))
++        return;
++
++    while (likely(qat_dev->responses != qat_dev->requests)) {
++        cpu_relax();
++    }
++
++    epoll_thread_running = false;
++    qemu_thread_join(&qat_dev->epoll_thread);
++    info_report("%s: requests=%ld, responses=%ld, overflow=%ld",
++             __func__, qat_dev->requests, qat_dev->responses, qat_dev->overflow);
++    close(qat_dev->efd);
++
++    while (qat_dev->inst_num) {
++        qat_inst_cleanup(&qat_dev->insts[--qat_dev->inst_num]);
++    }
++    g_free(qat_dev->inst_handles);
++    g_free(qat_dev->insts);
++    qemu_cond_destroy(&qat_dev->cond);
++    qemu_mutex_destroy(&qat_dev->mutex);
++    g_free(qat_dev);
++    qat_dev = NULL;
++    icp_sal_userStop();
++    qaeMemDestroy();
++}
+diff --git a/migration/qat.h b/migration/qat.h
+new file mode 100644
+index 000000000..0d7525e98
+--- /dev/null
++++ b/migration/qat.h
+@@ -0,0 +1,20 @@
++#ifndef QEMU_MIGRATION_QAT_H
++#define QEMU_MIGRATION_QAT_H
++
++#include "ram.h"
++
++typedef enum _qat_setup_type_t {
++    QAT_SETUP_COMPRESS = 0,
++    QAT_SETUP_DECOMPRESS = 1,
++    QAT_SETUP_MAX,
++} qat_setup_type_t;
++
++int qat_setup(qat_setup_type_t type);
++void qat_cleanup(void);
++int qat_compress_page(RAMBlock *block, MultiPageAddr *mpa);
++int qat_decompress_page(QEMUFile *f, RAMBlock *block, int bytes,
++                        MultiPageAddr *mpa, uint32_t checksum);
++void qat_flush_data_compress(void);
++void qat_flush_data_decompress(void);
++void *qat_epoll_thread_run(void *arg);
++#endif
+diff --git a/migration/ram.c b/migration/ram.c
+index 862955f5b..1585108b8 100644
+--- a/migration/ram.c
++++ b/migration/ram.c
+@@ -55,7 +55,9 @@
+ #include "qemu/iov.h"
+ #include "multifd.h"
+ #include "sysemu/runstate.h"
+-
++#ifdef CONFIG_QAT_MIGRATION
++#include "qat.h"
++#endif
+ #include "hw/boards.h" /* for machine_dump_guest_core() */
+ 
+ #if defined(__linux__)
+@@ -391,7 +393,7 @@ typedef struct RAMState RAMState;
+ static RAMState *ram_state;
+ 
+ static NotifierWithReturnList precopy_notifier_list;
+-
++static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset);
+ void precopy_infrastructure_init(void)
+ {
+     notifier_with_return_list_init(&precopy_notifier_list);
+@@ -430,6 +432,10 @@ struct PageSearchStatus {
+     RAMBlock    *block;
+     /* Current page to search from */
+     unsigned long page;
++#ifdef CONFIG_QAT_MIGRATION
++    MultiPageAddr mpa;
++#endif
++    bool first_page_in_block;
+     /* Set once we wrap around */
+     bool         complete_round;
+ };
+@@ -899,6 +905,25 @@ exit:
+     return -1;
+ }
+ 
++static void compress_save_cleanup(void)
++{
++#ifdef CONFIG_QAT_MIGRATION
++    if (migrate_compress_with_qat()) {
++        qat_cleanup();
++        return;
++    }
++#endif
++    compress_threads_save_cleanup();
++}
++static int compress_save_setup(void)
++{
++#ifdef CONFIG_QAT_MIGRATION
++    if (migrate_compress_with_qat()) {
++        return qat_setup(QAT_SETUP_COMPRESS);
++    }
++#endif
++    return compress_threads_save_setup();
++}
+ /**
+  * save_page_header: write page header to wire
+  *
+@@ -1063,8 +1088,8 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
+ 
+     /* XBZRLE encoding (if there is no overflow) */
+     encoded_len = xbzrle_encode_buffer_func(prev_cached_page, XBZRLE.current_buf,
+-                                            TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+-                                            TARGET_PAGE_SIZE);
++                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
++                                       TARGET_PAGE_SIZE);
+ 
+     /*
+      * Update the cache contents, so that it corresponds to the data
+@@ -1107,9 +1132,86 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
+ 
+     return 1;
+ }
++#ifdef CONFIG_QAT_MIGRATION
++static inline void multi_page_addr_put_one(MultiPageAddr *mpa,
++                                           unsigned long offset,
++                                           unsigned long pages)
++{
++    unsigned long idx = mpa->last_idx;
++    unsigned long *addr = mpa->addr;
++
++    addr[idx] = (offset << TARGET_PAGE_BITS) | pages;
++    mpa->last_idx = idx + 1;
++    mpa->pages += pages;
++}
++
++inline unsigned long multi_page_addr_get_one(MultiPageAddr *mpa,
++                                            unsigned long idx)
++{
++    return mpa->addr[idx];
++}
++
++static inline unsigned long
++migration_bitmap_find_dirty_multiple(RAMState *rs,
++                                     RAMBlock *rb,
++                                     unsigned long start,
++                                     MultiPageAddr *mpa)
++{
++    unsigned long *bitmap = rb->bmap;
++    unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
++    unsigned long pages = 0;
+ 
++    mpa->last_idx = 0;
++    mpa->pages = 0;
++    if (ramblock_is_ignored(rb)) {
++        return size;
++    }
++
++    if (start >= size) {
++        return size;
++    }
++
++    /* Second stage etc. */
++    while ((mpa->pages < RAM_SAVE_MULTI_PAGE_NUM)) {
++        start = find_next_bit(bitmap, size, start);
++        if (start >= size) {
++            start = find_next_bit(bitmap, size, 0);
++            return size;
++        }
++
++        uint64_t bit_offset = start & 0x3f;
++        if (unlikely(bit_offset == 63)) {
++            pages = 1;
++        } else {
++            uint64_t bitmap_offset;
++            uint64_t mask;
++            unsigned long value;
++            bitmap_offset = start >> 6;
++            value = bitmap[bitmap_offset];
++            mask = ~(((uint64_t)1 << bit_offset) - 1);
++            if (mask == (mask&value)) {
++                pages = 64 - bit_offset;
++                if ((start + pages) > size) {
++                    pages = size - start;
++                }
++            } else {
++                unsigned long end = find_next_zero_bit(bitmap, size, start);
++                pages = end - start;
++            }
++        }
++
++        if (mpa->pages + pages > RAM_SAVE_MULTI_PAGE_NUM) {
++            pages = RAM_SAVE_MULTI_PAGE_NUM - mpa->pages;
++        }
++        multi_page_addr_put_one(mpa, start, pages);
++        start += pages;
++    }
++
++    return start - 1;
++}
++#endif
+ /**
+- * migration_bitmap_find_dirty: find the next dirty page from start
++ * migration_bitmap_find_dirty_single: find the next dirty page from start
+  *
+  * Returns the page offset within memory region of the start of a dirty page
+  *
+@@ -1118,8 +1220,8 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
+  * @start: page where we start the search
+  */
+ static inline
+-unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
+-                                          unsigned long start)
++unsigned long migration_bitmap_find_dirty_single(RAMState *rs, RAMBlock *rb,
++                                                 unsigned long start)
+ {
+     unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
+     unsigned long *bitmap = rb->bmap;
+@@ -1130,7 +1232,66 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
+ 
+     return find_next_bit(bitmap, size, start);
+ }
++static inline unsigned long
++migration_bitmap_find_dirty(RAMState *rs, PageSearchStatus *pss)
++{
++#ifdef CONFIG_QAT_MIGRATION
++    if (!pss->first_page_in_block && migrate_compress_with_qat()) {
++        return migration_bitmap_find_dirty_multiple(rs, pss->block, pss->page, &pss->mpa);;
++    }
++#endif
++    return migration_bitmap_find_dirty_single(rs, pss->block, pss->page);
++}
+ 
++#ifdef CONFIG_QAT_MIGRATION
++int qat_save_zero_page(RAMBlock *block, MultiPageAddr *mpa)
++{
++    uint64_t start, offset, pages;
++    RAMState *rs = ram_state;
++    MultiPageAddr new_mpa = {0};
++    int ret;
++    int pre_zero_page;
++    int non_zero_page_num;
++    int zero_page_num = 0;
++    int checked_page_num = 0;
++    int check_zero_page = 1;
++    for (int i = 0; i < mpa->last_idx; i++) {
++        start = multi_page_addr_get_one(mpa, i);
++        pages = start & (~TARGET_PAGE_MASK);
++        start >>= TARGET_PAGE_BITS;
++        pre_zero_page = -1;
++        if (check_zero_page) {
++            for (int j = 0; j < pages; j++) {
++                offset = (start + j) << TARGET_PAGE_BITS;
++                ret = save_zero_page(rs, block, offset);
++                if (ret > 0) {
++                    non_zero_page_num = j - pre_zero_page - 1;
++                    if (non_zero_page_num) {
++                        multi_page_addr_put_one(&new_mpa, start + pre_zero_page + 1, non_zero_page_num);
++                    }
++                    pre_zero_page = j;
++                    zero_page_num++;
++                }
++                checked_page_num++;
++                if (unlikely(checked_page_num == 10)) {
++                    if (zero_page_num <= 1) {
++                        check_zero_page = 0;
++                        break;
++                    }
++                 }
++            }
++        }
++        non_zero_page_num = pages - pre_zero_page - 1;
++        if (non_zero_page_num) {
++            multi_page_addr_put_one(&new_mpa, start + pre_zero_page + 1, non_zero_page_num);
++        }
++    }
++    if (zero_page_num != 0) {
++        memcpy(mpa, &new_mpa, 2 * sizeof(unsigned long) + (new_mpa.last_idx + 1) * sizeof(unsigned long));
++    }
++    return 0;
++}
++#endif
+ static void migration_clear_memory_region_dirty_bitmap(RAMBlock *rb,
+                                                        unsigned long page)
+ {
+@@ -1211,7 +1372,7 @@ unsigned long colo_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
+     return first;
+ }
+ 
+-static inline bool migration_bitmap_clear_dirty(RAMState *rs,
++static inline bool migration_bitmap_clear_dirty_single(RAMState *rs,
+                                                 RAMBlock *rb,
+                                                 unsigned long page)
+ {
+@@ -1234,7 +1395,40 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
+ 
+     return ret;
+ }
++#ifdef CONFIG_QAT_MIGRATION
++static inline void migration_bitmap_clear_dirty_multiple(RAMState *rs,
++                                                         RAMBlock *rb,
++                                                         MultiPageAddr *mpa)
++{
++    unsigned long start, multi_pages, page, i, j;
++    bool ret;
+ 
++    for (i = 0; i < mpa->last_idx; i++) {
++        start = multi_page_addr_get_one(mpa, i);
++        multi_pages = start & (~TARGET_PAGE_MASK);
++        start = start >> TARGET_PAGE_BITS;
++        for (j = 0; j < multi_pages; j++) {
++            page = start + j;
++
++            if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
++                uint8_t shift = rb->clear_bmap_shift;
++                hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
++                hwaddr start_addr = (page << TARGET_PAGE_BITS) & (-size);
++
++                assert(shift >= 6);
++                trace_migration_bitmap_clear_dirty(rb->idstr, start_addr, size, page);
++                memory_region_clear_dirty_bitmap(rb->mr, start_addr, size);
++            }
++
++            ret = test_and_clear_bit(page, rb->bmap);
++
++            if (ret) {
++                rs->migration_dirty_pages--;
++            }
++        }
++    }
++}
++#endif
+ static void dirty_bitmap_clear_section(MemoryRegionSection *section,
+                                        void *opaque)
+ {
+@@ -1672,7 +1866,60 @@ static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
+ 
+     return 1;
+ }
++#ifdef CONFIG_QAT_MIGRATION
++void save_compressed_page_header(RAMBlock *block,
++                                 MultiPageAddr *mpa,
++                                 uint64_t bytes,
++                                 uint32_t checksum)
++{
++    int i, header_bytes;
++    QEMUFile *f = ram_state->f;
++    ram_addr_t offset = multi_page_addr_get_one(mpa, 0) & TARGET_PAGE_MASK;
++
++    offset |= (RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_CONTINUE);
++    qemu_put_be64(f, offset);
++
++    qemu_put_be64(f, bytes);
++    qemu_put_be64(f, mpa->pages);
++    qemu_put_be64(f, mpa->last_idx);
++    qemu_put_be32(f, checksum);
++    for (i = 0; i < mpa->last_idx; i++) {
++        qemu_put_be64(f, mpa->addr[i]);
++    }
++    header_bytes = sizeof(offset) + sizeof(bytes) +
++                   sizeof(mpa->pages) +  sizeof(mpa->last_idx) +
++                   mpa->last_idx * sizeof(mpa->addr[0]) + sizeof(checksum);
++    ram_counters.transferred += header_bytes;
++}
+ 
++void save_compressed_data(void *data, uint32_t bytes)
++{
++    QEMUFile *f = ram_state->f;
++
++    qemu_put_buffer(f, data, bytes);
++    ram_counters.transferred += bytes;
++}
++
++void save_uncompressed_page(RAMBlock *block, MultiPageAddr *mpa)
++{
++    int i, j, pages;
++    ram_addr_t start, offset;
++    QEMUFile *f = ram_state->f;
++
++    for (i = 0; i < mpa->last_idx; i++) {
++        start = multi_page_addr_get_one(mpa, i);
++        pages = start & (~TARGET_PAGE_MASK);
++        start &= TARGET_PAGE_MASK;
++        for (j = 0; j < pages; j++) {
++            offset = start + (j << TARGET_PAGE_BITS);
++            qemu_put_be64(f, offset | (RAM_SAVE_FLAG_CONTINUE | RAM_SAVE_FLAG_PAGE));
++            qemu_put_buffer(f, block->host + offset, TARGET_PAGE_SIZE);
++            ram_counters.transferred += (sizeof(offset) + TARGET_PAGE_SIZE);
++            ram_counters.normal++;
++        }
++    }
++}
++#endif
+ static bool do_compress_ram_page(CompressParam *param, RAMBlock *block)
+ {
+     RAMState *rs = ram_state;
+@@ -1731,6 +1978,11 @@ static void flush_compressed_data(RAMState *rs)
+     if (!save_page_use_compression(rs)) {
+         return;
+     }
++#ifdef CONFIG_QAT_MIGRATION
++    if (migrate_compress_with_qat()) {
++        qat_flush_data_compress();
++    }
++#endif
+     thread_count = migrate_compress_threads();
+ 
+     qemu_mutex_lock(&comp_done_lock);
+@@ -1746,10 +1998,10 @@ static void flush_compressed_data(RAMState *rs)
+         if (!comp_param[idx].quit) {
+             len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
+             /*
+-             * it's safe to fetch zero_page without holding comp_done_lock
+-             * as there is no further request submitted to the thread,
+-             * i.e, the thread should be waiting for a request at this point.
+-             */
++            * it's safe to fetch zero_page without holding comp_done_lock
++            * as there is no further request submitted to the thread,
++            * i.e, the thread should be waiting for a request at this point.
++            */
+             update_compress_thread_counts(&comp_param[idx], len);
+         }
+         qemu_mutex_unlock(&comp_param[idx].mutex);
+@@ -1811,7 +2063,7 @@ retry:
+  */
+ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
+ {
+-    pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
++    pss->page = migration_bitmap_find_dirty(rs, pss);
+     if (pss->complete_round && pss->block == rs->last_seen_block &&
+         pss->page >= rs->last_page) {
+         /*
+@@ -1821,21 +2073,29 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
+         *again = false;
+         return false;
+     }
++#ifdef CONFIG_QAT_MIGRATION
++    if (pss->mpa.pages) {
++        /* Can go around again, but... */
++        *again = true;
++        /* We've found something so probably don't need to */
++        return true;
++    }
++#endif
+     if (!offset_in_ramblock(pss->block,
+-                            ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
++            ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)) {
+         /* Didn't find anything in this RAM Block */
+         pss->page = 0;
+         pss->block = QLIST_NEXT_RCU(pss->block, next);
+         if (!pss->block) {
+             /*
+-             * If memory migration starts over, we will meet a dirtied page
+-             * which may still exists in compression threads's ring, so we
+-             * should flush the compressed data to make sure the new page
+-             * is not overwritten by the old one in the destination.
+-             *
+-             * Also If xbzrle is on, stop using the data compression at this
+-             * point. In theory, xbzrle can do better than compression.
+-             */
++            * If memory migration starts over, we will meet a dirtied page
++            * which may still exists in compression threads's ring, so we
++            * should flush the compressed data to make sure the new page
++            * is not overwritten by the old one in the destination.
++            *
++            * Also If xbzrle is on, stop using the data compression at this
++            * point. In theory, xbzrle can do better than compression.
++            */
+             flush_compressed_data(rs);
+ 
+             /* Hit the end of the list */
+@@ -2020,7 +2280,7 @@ out:
+ static inline void populate_read_range(RAMBlock *block, ram_addr_t offset,
+                                        ram_addr_t size)
+ {
+-    const ram_addr_t end = offset + size;
++    const ram_addr_t end = offset + size;
+ 
+     /*
+      * We read one byte of each page; this will preallocate page tables if
+@@ -2150,7 +2410,7 @@ int ram_write_tracking_start(void)
+                 block->max_length, true, false)) {
+             goto fail;
+         }
+-
++        
+         trace_ram_write_tracking_ramblock_start(block->idstr, block->page_size,
+                 block->host, block->max_length);
+     }
+@@ -2433,7 +2693,7 @@ static bool save_page_use_compression(RAMState *rs)
+  * has been properly handled by compression, otherwise needs other
+  * paths to handle it
+  */
+-static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
++static bool save_compress_page(RAMState *rs, PageSearchStatus *pss)
+ {
+     if (!save_page_use_compression(rs)) {
+         return false;
+@@ -2449,14 +2709,26 @@ static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
+      * We post the fist page as normal page as compression will take
+      * much CPU resource.
+      */
+-    if (block != rs->last_sent_block) {
++    if (pss->first_page_in_block) {
+         flush_compressed_data(rs);
+         return false;
+     }
+ 
+-    if (compress_page_with_multi_thread(rs, block, offset) > 0) {
+-        return true;
++    int ret;
++#ifdef CONFIG_QAT_MIGRATION
++    if (migrate_compress_with_qat()) {
++        ret = qat_compress_page(pss->block, &pss->mpa);
++        if (ret < 0) {
++            error_report("%s: qat_compress_page failed", __func__);
++        }
++    } else {
++#endif
++        ret = compress_page_with_multi_thread(rs, pss->block, pss->page << TARGET_PAGE_BITS);
++#ifdef CONFIG_QAT_MIGRATION
+     }
++#endif
++    if (ret >= 0)
++        return true;
+ 
+     compression_counters.busy++;
+     return false;
+@@ -2482,7 +2754,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
+         return res;
+     }
+ 
+-    if (save_compress_page(rs, block, offset)) {
++    if (save_compress_page(rs, pss)) {
+         return 1;
+     }
+ 
+@@ -2513,7 +2785,19 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
+ 
+     return ram_save_page(rs, pss, last_stage);
+ }
++#ifdef CONFIG_QAT_MIGRATION
++static int ram_save_host_page_multiple(RAMState *rs, PageSearchStatus *pss,
++                                       bool last_stage)
++{
++    migration_bitmap_clear_dirty_multiple(rs, pss->block, &pss->mpa);
++    if (save_compress_page(rs, pss)) {
++        return pss->mpa.pages;
++    }
+ 
++    error_report("%s: ERROR", __func__);
++    return -1;
++}
++#endif
+ /**
+  * ram_save_host_page: save a whole host page
+  *
+@@ -2532,7 +2816,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
+  * @pss: data about the page we want to send
+  * @last_stage: if we are at the completion stage
+  */
+-static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
++static int ram_save_host_page_single(RAMState *rs, PageSearchStatus *pss,
+                               bool last_stage)
+ {
+     int tmppages, pages = 0;
+@@ -2543,14 +2827,9 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
+     unsigned long start_page = pss->page;
+     int res;
+ 
+-    if (ramblock_is_ignored(pss->block)) {
+-        error_report("block %s should not be migrated !", pss->block->idstr);
+-        return 0;
+-    }
+-
+     do {
+         /* Check the pages is dirty and if it is send it */
+-        if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
++        if (migration_bitmap_clear_dirty_single(rs, pss->block, pss->page)) {
+             tmppages = ram_save_target_page(rs, pss, last_stage);
+             if (tmppages < 0) {
+                 return tmppages;
+@@ -2565,7 +2844,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
+                 migration_rate_limit();
+             }
+         }
+-        pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
++        pss->page = migration_bitmap_find_dirty(rs, pss);
+     } while ((pss->page < hostpage_boundary) &&
+              offset_in_ramblock(pss->block,
+                                 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
+@@ -2575,7 +2854,21 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
+     res = ram_save_release_protection(rs, pss, start_page);
+     return (res < 0 ? res : pages);
+ }
++static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
++                              bool last_stage)
++{
+ 
++    if (ramblock_is_ignored(pss->block)) {
++        error_report("block %s should not be migrated !", pss->block->idstr);
++        return 0;
++    }
++#ifdef CONFIG_QAT_MIGRATION
++    if (!pss->first_page_in_block && migrate_compress_with_qat()) {
++        return ram_save_host_page_multiple(rs, pss, last_stage);
++    }
++#endif
++    return ram_save_host_page_single(rs, pss, last_stage);
++}
+ /**
+  * ram_find_and_save_block: finds a dirty page and sends it to f
+  *
+@@ -2605,7 +2898,11 @@ static int ram_find_and_save_block(RAMState *rs, bool last_stage)
+     pss.block = rs->last_seen_block;
+     pss.page = rs->last_page;
+     pss.complete_round = false;
+-
++    pss.first_page_in_block = false;
++#ifdef CONFIG_QAT_MIGRATION
++    pss.mpa.pages = 0;
++    pss.mpa.last_idx = 0;
++#endif
+     if (!pss.block) {
+         pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
+     }
+@@ -2615,6 +2912,11 @@ static int ram_find_and_save_block(RAMState *rs, bool last_stage)
+         found = get_queued_page(rs, &pss);
+ 
+         if (!found) {
++            if (pss.block != rs->last_seen_block) {
++                pss.first_page_in_block = true;
++            } else {
++                pss.first_page_in_block = false;
++            }
+             /* priority queue empty, so just search for something dirty */
+             found = find_dirty_block(rs, &pss, &again);
+         }
+@@ -2733,7 +3035,7 @@ static void ram_save_cleanup(void *opaque)
+     }
+ 
+     xbzrle_cleanup();
+-    compress_threads_save_cleanup();
++    compress_save_cleanup();
+     ram_state_cleanup(rsp);
+ }
+ 
+@@ -3328,14 +3630,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
+     RAMState **rsp = opaque;
+     RAMBlock *block;
+ 
+-    if (compress_threads_save_setup()) {
++    if (compress_save_setup()) {
+         return -1;
+     }
+ 
+     /* migration has already setup the bitmap, reuse it. */
+     if (!migration_in_colo_state()) {
+         if (ram_init_all(rsp) != 0) {
+-            compress_threads_save_cleanup();
++            compress_save_cleanup();
+             return -1;
+         }
+     }
+@@ -3471,6 +3773,7 @@ out:
+     if (ret >= 0
+         && migration_is_setup_or_active(migrate_get_current()->state)) {
+         multifd_send_sync_main(rs->f);
++        flush_compressed_data(rs);
+         qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+         qemu_fflush(f);
+         ram_counters.transferred += 8;
+@@ -3512,7 +3815,6 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
+         /* flush all remaining blocks regardless of rate limiting */
+         while (true) {
+             int pages;
+-
+             pages = ram_find_and_save_block(rs, !migration_in_colo_state());
+             /* no more blocks to sent */
+             if (pages == 0) {
+@@ -3968,6 +4270,26 @@ void colo_release_ram_cache(void)
+     ram_state_cleanup(&ram_state);
+ }
+ 
++static int decompress_load_setup(QEMUFile *f)
++{
++#ifdef CONFIG_QAT_MIGRATION
++    if (migrate_compress_with_qat()) {
++        return qat_setup(QAT_SETUP_DECOMPRESS);
++    }
++#endif
++    return compress_threads_load_setup(f);
++}
++
++static void decompress_load_cleanup(void)
++{
++#ifdef CONFIG_QAT_MIGRATION
++    if (migrate_compress_with_qat()) {
++        qat_cleanup();
++    }
++#endif
++    compress_threads_load_cleanup();
++}
++
+ /**
+  * ram_load_setup: Setup RAM for migration incoming side
+  *
+@@ -3978,7 +4300,7 @@ void colo_release_ram_cache(void)
+  */
+ static int ram_load_setup(QEMUFile *f, void *opaque)
+ {
+-    if (compress_threads_load_setup(f)) {
++    if (decompress_load_setup(f)) {
+         return -1;
+     }
+ 
+@@ -3997,7 +4319,7 @@ static int ram_load_cleanup(void *opaque)
+     }
+ 
+     xbzrle_load_cleanup();
+-    compress_threads_load_cleanup();
++    decompress_load_cleanup();
+ 
+     RAMBLOCK_FOREACH_NOT_IGNORED(rb) {
+         g_free(rb->receivedmap);
+@@ -4253,7 +4575,7 @@ void colo_flush_ram_cache(void)
+                 unsigned long i = 0;
+ 
+                 for (i = 0; i < num; i++) {
+-                    migration_bitmap_clear_dirty(ram_state, block, offset + i);
++                    migration_bitmap_clear_dirty_single(ram_state, block, offset + i);
+                 }
+                 dst_host = block->host
+                          + (((ram_addr_t)offset) << TARGET_PAGE_BITS);
+@@ -4267,7 +4589,51 @@ void colo_flush_ram_cache(void)
+     trace_colo_flush_ram_cache_end();
+     qemu_mutex_unlock(&ram_state->bitmap_mutex);
+ }
++static int decompress_page_single(QEMUFile *f, void *host)
++{
++    int len = qemu_get_be32(f);
++    if (decompress_ops->check_len(len)) {
++        error_report("Invalid compressed data length: %d", len);
++        return -EINVAL;
++    }
++    decompress_data_with_multi_threads(f, host, len);
++    return 0;
++}
++#ifdef CONFIG_QAT_MIGRATION
++static int decompress_page_multiple(QEMUFile *f, RAMBlock *block,
++                                    ram_addr_t addr)
++{
++    unsigned long bytes;
++    unsigned long i;
++    MultiPageAddr mpa;
++    uint32_t checksum;
++    int ret;
++
++    bytes = qemu_get_be64(f);
++    mpa.pages = qemu_get_be64(f);
++    mpa.last_idx = qemu_get_be64(f);
++    mpa.addr[0] = 0;
++    checksum = qemu_get_be32(f);
++
++    for (i = 0; i < mpa.last_idx; i++) {
++        mpa.addr[i] = qemu_get_be64(f);
++    }
++
++    /* Sanity check */
++    if ((mpa.addr[0] & TARGET_PAGE_MASK) != addr) {
++        error_report("%s: unmatched addr recerived", __func__);
++        return -EINVAL;
++    }
++    ret = 0;
++    if (migrate_compress_with_qat()) {
++        ret = qat_decompress_page(f, block, bytes, &mpa, checksum);
++    } else {
++        error_report("%s: CPU doesn't supported multipage compress", __func__);
++    }
+ 
++    return ret;
++}
++#endif
+ /**
+  * ram_load_precopy: load pages in precopy case
+  *
+@@ -4280,6 +4646,7 @@ void colo_flush_ram_cache(void)
+  */
+ static int ram_load_precopy(QEMUFile *f)
+ {
++    RAMBlock *block = NULL;
+     int flags = 0, ret = 0, invalid_flags = 0, len = 0, i = 0;
+     /* ADVISE is earlier, it shows the source has the postcopy capability on */
+     bool postcopy_advised = postcopy_is_advised();
+@@ -4318,7 +4685,7 @@ static int ram_load_precopy(QEMUFile *f)
+ 
+         if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
+                      RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
+-            RAMBlock *block = ram_block_from_stream(f, flags);
++            block = ram_block_from_stream(f, flags);
+ 
+             host = host_from_ram_block_offset(block, addr);
+             /*
+@@ -4361,7 +4728,6 @@ static int ram_load_precopy(QEMUFile *f)
+             /* Synchronize RAM block list */
+             total_ram_bytes = addr;
+             while (!ret && total_ram_bytes) {
+-                RAMBlock *block;
+                 char id[256];
+                 ram_addr_t length;
+ 
+@@ -4429,13 +4795,12 @@ static int ram_load_precopy(QEMUFile *f)
+             break;
+ 
+         case RAM_SAVE_FLAG_COMPRESS_PAGE:
+-            len = qemu_get_be32(f);
+-            if (decompress_ops->check_len(len)) {
+-                error_report("Invalid compressed data length: %d", len);
+-                ret = -EINVAL;
+-                break;
++#ifdef CONFIG_QAT_MIGRATION
++            if (migrate_compress_with_qat()) {
++                ret = decompress_page_multiple(f, block, addr);
+             }
+-            decompress_data_with_multi_threads(f, host, len);
++#endif
++            ret = decompress_page_single(f, host);
+             break;
+ 
+         case RAM_SAVE_FLAG_XBZRLE:
+@@ -4466,11 +4831,22 @@ static int ram_load_precopy(QEMUFile *f)
+             memcpy(host_bak, host, TARGET_PAGE_SIZE);
+         }
+     }
+-
+-    ret |= wait_for_decompress_done();
++#ifdef CONFIG_QAT_MIGRATION
++    if (!migrate_compress_with_qat()) {
++        ret |= wait_for_decompress_done();
++    }
++#endif
+     return ret;
+ }
+-
++#ifdef CONFIG_QAT_MIGRATION
++static void flush_decompressed_data(void)
++{
++    if (!migrate_compress_with_qat()) {
++        return;
++    }
++    qat_flush_data_decompress();
++}
++#endif
+ static int ram_load(QEMUFile *f, void *opaque, int version_id)
+ {
+     int ret = 0;
+@@ -4499,6 +4875,11 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
+         } else {
+             ret = ram_load_precopy(f);
+         }
++#ifdef CONFIG_QAT_MIGRATION
++        if (migrate_use_compression()) {
++            flush_decompressed_data();
++        }
++#endif
+     }
+     trace_ram_load_complete(ret, seq_iter);
+ 
+diff --git a/migration/ram.h b/migration/ram.h
+index c515396a9..7b891ba2c 100644
+--- a/migration/ram.h
++++ b/migration/ram.h
+@@ -36,7 +36,16 @@
+ extern MigrationStats ram_counters;
+ extern XBZRLECacheStats xbzrle_counters;
+ extern CompressionStats compression_counters;
++#ifdef CONFIG_QAT_MIGRATION
++#define RAM_SAVE_MAX_PAGE_NUM 256
++#define RAM_SAVE_MULTI_PAGE_NUM 63
+ 
++typedef struct MultiPageAddr {
++    unsigned long pages;
++    unsigned long last_idx;
++    unsigned long addr[RAM_SAVE_MAX_PAGE_NUM];
++} MultiPageAddr;
++#endif
+ bool ramblock_is_ignored(RAMBlock *block);
+ /* Should be holding either ram_list.mutex, or the RCU lock. */
+ #define RAMBLOCK_FOREACH_NOT_IGNORED(block)            \
+@@ -87,5 +96,15 @@ bool ram_write_tracking_compatible(void);
+ void ram_write_tracking_prepare(void);
+ int ram_write_tracking_start(void);
+ void ram_write_tracking_stop(void);
+-
++#ifdef CONFIG_QAT_MIGRATION
++void qat_zero_copy_cleanup(void);
++void save_compressed_page_header(RAMBlock *block,
++                                 MultiPageAddr *mpa,
++                                 uint64_t bytes,
++                                 uint32_t checksum);
++void save_compressed_data(void *data, uint32_t bytes);
++void save_uncompressed_page(RAMBlock *block, MultiPageAddr *mpa);
++unsigned long multi_page_addr_get_one(MultiPageAddr *mpa, unsigned long idx);
++int qat_save_zero_page(RAMBlock *block, MultiPageAddr *mpa);
++#endif
+ #endif
+diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
+index 957001123..7167a37a2 100644
+--- a/monitor/hmp-cmds.c
++++ b/monitor/hmp-cmds.c
+@@ -417,6 +417,12 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
+         monitor_printf(mon, "%s: %u\n",
+             MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_LEVEL),
+             params->compress_level);
++#ifdef CONFIG_QAT_MIGRATION
++        assert(params->has_compress_with_qat);
++        monitor_printf(mon, "%s: %u\n",
++            MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_WITH_QAT),
++            params->compress_with_qat);
++#endif
+         assert(params->has_compress_threads);
+         monitor_printf(mon, "%s: %u\n",
+             MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_THREADS),
+@@ -1217,6 +1223,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
+         p->has_compress_wait_thread = true;
+         visit_type_bool(v, param, &p->compress_wait_thread, &err);
+         break;
++#ifdef CONFIG_QAT_MIGRATION
++    case MIGRATION_PARAMETER_COMPRESS_WITH_QAT:
++        p->has_compress_with_qat = true;
++        visit_type_bool(v, param, &p->compress_with_qat, &err);
++        break;
++#endif
+     case MIGRATION_PARAMETER_DECOMPRESS_THREADS:
+         p->has_decompress_threads = true;
+         visit_type_uint8(v, param, &p->decompress_threads, &err);
+diff --git a/qapi/migration.json b/qapi/migration.json
+index e965f4329..b4f1229ed 100644
+--- a/qapi/migration.json
++++ b/qapi/migration.json
+@@ -766,6 +766,9 @@
+ #                        block device name if there is one, and to their node name
+ #                        otherwise. (Since 5.2)
+ #
++# @compress-with-qat: Select QAT to compress/decompress data for LV
++#                     migration
++#
+ # Features:
+ # @unstable: Member @x-checkpoint-delay is experimental.
+ #
+@@ -786,7 +789,7 @@
+            'xbzrle-cache-size', 'max-postcopy-bandwidth',
+            'max-cpu-throttle', 'multifd-compression',
+            'multifd-zlib-level' ,'multifd-zstd-level',
+-           'block-bitmap-mapping' ] }
++           'block-bitmap-mapping', 'compress-with-qat' ] }
+ 
+ ##
+ # @MigrateSetParameters:
+@@ -970,7 +973,8 @@
+             '*multifd-compression': 'MultiFDCompression',
+             '*multifd-zlib-level': 'uint8',
+             '*multifd-zstd-level': 'uint8',
+-            '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
++            '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ],
++            '*compress-with-qat': 'bool' } }
+ 
+ ##
+ # @migrate-set-parameters:
+@@ -1172,7 +1176,8 @@
+             '*multifd-compression': 'MultiFDCompression',
+             '*multifd-zlib-level': 'uint8',
+             '*multifd-zstd-level': 'uint8',
+-            '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
++            '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] ,
++            '*compress-with-qat':'bool' } }
+ 
+ ##
+ # @query-migrate-parameters:
+-- 
+2.33.0
-- 
Gitee