From 52d7713c7494e5703fec3c808581575a340bdeb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E5=87=A4?= Date: Wed, 16 Jul 2025 07:31:08 +0000 Subject: [PATCH 1/6] refactor: change func position MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 白凤 --- KAELz4/src/v1/kaelz4_comp.c | 11 ------ build.sh | 68 ++++++++++++++++++------------------- 2 files changed, 34 insertions(+), 45 deletions(-) diff --git a/KAELz4/src/v1/kaelz4_comp.c b/KAELz4/src/v1/kaelz4_comp.c index 19be62a..9f36768 100644 --- a/KAELz4/src/v1/kaelz4_comp.c +++ b/KAELz4/src/v1/kaelz4_comp.c @@ -633,17 +633,6 @@ static void kaelz4_async_compress_cb(int status, void *param) kaelz4_ctx_t* kaelz4_ctx = (kaelz4_ctx_t*)zc->kaeConfig; struct wcrypto_comp_op_data *op_data = &kaelz4_ctx->op_data; - if (kaelz4_ctx->q_node->is_sgl) { - if (kaelz4_ctx->src_sgl != NULL) { - wd_destory_sgl(kaelz4_ctx->q_node->kae_wd_queue, kaelz4_ctx->q_node->kae_queue_mem_pool, kaelz4_ctx->src_sgl); - kaelz4_ctx->src_sgl = NULL; - } - if (kaelz4_ctx->dst_sgl_usr != NULL) { - wd_destory_sgl(kaelz4_ctx->q_node->kae_wd_queue, kaelz4_ctx->q_node->kae_queue_mem_pool, kaelz4_ctx->dst_sgl_usr); - kaelz4_ctx->dst_sgl_usr = NULL; - } - } - if (status != 0) { US_ERR("kaelz4_async_compress_cb status %d !\n", status); req->compress_ctx->status = KAE_LZ4_COMP_FAIL; diff --git a/build.sh b/build.sh index 2c5d2e1..0cd96e4 100644 --- a/build.sh +++ b/build.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash set -e SRC_PATH=$(pwd) KAE_KERNEL_DIR="" @@ -38,7 +38,7 @@ function build_check_OS_version() KAE_KERNEL_DIR=${SRC_PATH}/KAEKernelDriver/KAEKernelDriver-OLK-5.4 KAE_SPEC_FILE=${SRC_PATH}/scripts/specFile/kae.spec OPENSSL_CONFIGURE_FLAG="--libdir=/usr/local/lib/engines-1.1/ --enable-kae" - else + else echo "[KAE error]:unsupport kernel version $KERNEL_VERSION" fi } @@ -65,8 +65,8 @@ function build_all_comp_sva() # 编译uadk cd $KAE_UADK_DIR - sh autogen.sh - sh conf.sh + bash autogen.sh + bash conf.sh make -j cp ${KAE_UADK_DIR}/.libs/lib* $KAE_BUILD_LIB @@ -87,14 +87,14 @@ function build_all_comp_sva() # 编译zlib cd $KAE_ZLIB_DIR - sh setup.sh devbuild KAE2 + bash setup.sh devbuild KAE2 cp $KAE_ZLIB_DIR/lib* $KAE_BUILD_LIB cp $KAE_ZLIB_DIR/open_source/zlib-1.2.11/lib* $KAE_BUILD_LIB # 编译zstd cd $KAE_ZSTD_DIR - sh build.sh devbuild + bash build.sh devbuild cp $KAE_ZSTD_DIR/lib* $KAE_BUILD_LIB cp $KAE_ZSTD_DIR/open_source/zstd/programs/zstd $KAE_BUILD_LIB @@ -147,8 +147,8 @@ function build_rpm() cd $KAE_UADK_DIR - sh autogen.sh - # sh conf.sh + bash autogen.sh + # bash conf.sh # 在 conf.sh中的内容后添加 --prefix 参数,为了使uadk编译生成的pkgconfig/*.pc文件中动态库的路径为RPM包编译时的临时目录,这样Opensslengine编译时才能够找到uadk动态库。 ac_cv_func_malloc_0_nonnull=yes ac_cv_func_realloc_0_nonnull=yes ./configure \ --enable-perf=yes \ @@ -198,7 +198,7 @@ function build_rpm() # 编译 zlib cd $KAE_ZLIB_DIR - sh setup.sh devbuild KAE2 + bash setup.sh devbuild KAE2 mkdir -p $KAE_BUILD/kaezip mkdir -p $KAE_BUILD/kaezip/include @@ -217,7 +217,7 @@ function build_rpm() # 编译 zstd cd $KAE_ZSTD_DIR - sh build.sh devbuild + bash build.sh devbuild mkdir -p $KAE_BUILD/kaezstd/lib/pkgconfig mkdir -p $KAE_BUILD/kaezstd/bin @@ -242,7 +242,7 @@ function build_rpm() # 编译 lz4 cd ${SRC_PATH}/KAELz4 - sh build.sh devbuild + bash build.sh devbuild mkdir -p $KAE_BUILD/kaelz4/lib mkdir -p $KAE_BUILD/kaelz4/bin @@ -559,40 +559,40 @@ function gzip_clean() function help() { echo "build KAE" - echo "sh build.sh all -- install all component(not include gmssl)" - echo "sh build.sh rpmpack -- build rpm pack(not include gmssl)" + echo "bash build.sh all -- install all component(not include gmssl)" + echo "bash build.sh rpmpack -- build rpm pack(not include gmssl)" - echo "sh build.sh driver -- install KAE driver" - echo "sh build.sh driver clean -- uninstall KAE driver" + echo "bash build.sh driver -- install KAE driver" + echo "bash build.sh driver clean -- uninstall KAE driver" - echo "sh build.sh uadk -- install uadk" - echo "sh build.sh uadk clean -- uninstall uadk" + echo "bash build.sh uadk -- install uadk" + echo "bash build.sh uadk clean -- uninstall uadk" - echo "sh build.sh engine -- install KAE openssl engine" - echo "sh build.sh engine clean -- uninstall KAE openssl engine" + echo "bash build.sh engine -- install KAE openssl engine" + echo "bash build.sh engine clean -- uninstall KAE openssl engine" - echo "sh build.sh engine3 -- install KAE openssl3.0 engine" - echo "sh build.sh engine3 clean -- uninstall KAE openssl3.0 engine" + echo "bash build.sh engine3 -- install KAE openssl3.0 engine" + echo "bash build.sh engine3 clean -- uninstall KAE openssl3.0 engine" - echo "sh build.sh engine_gmssl -- install KAE gmssl engine" - echo "sh build.sh engine_gmssl clean -- uninstall KAE gmssl engine" + echo "bash build.sh engine_gmssl -- install KAE gmssl engine" + echo "bash build.sh engine_gmssl clean -- uninstall KAE gmssl engine" - echo "sh build.sh engine3_tongsuo -- install KAE tongsuo engine" - echo "sh build.sh engine3_tongsuo clean -- uninstall KAE tongsuo engine" + echo "bash build.sh engine3_tongsuo -- install KAE tongsuo engine" + echo "bash build.sh engine3_tongsuo clean -- uninstall KAE tongsuo engine" - echo "sh build.sh engine_boringssl -- install KAE boringssl engine" - echo "sh build.sh engine_boringssl clean -- uninstall KAE boringssl engine" + echo "bash build.sh engine_boringssl -- install KAE boringssl engine" + echo "bash build.sh engine_boringssl clean -- uninstall KAE boringssl engine" - echo "sh build.sh zlib -- install zlib using KAE" - echo "sh build.sh zlib clean -- uninstall zlib using KAE" + echo "bash build.sh zlib -- install zlib using KAE" + echo "bash build.sh zlib clean -- uninstall zlib using KAE" - echo "sh build.sh zstd -- install zstd using KAE" - echo "sh build.sh zstd clean -- uninstall zstd using KAE" + echo "bash build.sh zstd -- install zstd using KAE" + echo "bash build.sh zstd clean -- uninstall zstd using KAE" - echo "sh build.sh gzip -- install gzip using KAE" - echo "sh build.sh gzip clean -- uninstall gzip using KAE" + echo "bash build.sh gzip -- install gzip using KAE" + echo "bash build.sh gzip clean -- uninstall gzip using KAE" - echo "sh build.sh cleanup -- clean up all component" + echo "bash build.sh cleanup -- clean up all component" } function check_environment() -- Gitee From ed52c565fd91f78c64535ef29de954a4b3378a16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E5=87=A4?= Date: Wed, 16 Jul 2025 07:31:14 +0000 Subject: [PATCH 2/6] refactor: change func position MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 白凤 --- KAELz4/src/v1/kaelz4_ctx.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/KAELz4/src/v1/kaelz4_ctx.c b/KAELz4/src/v1/kaelz4_ctx.c index b5a3b67..5ed392d 100644 --- a/KAELz4/src/v1/kaelz4_ctx.c +++ b/KAELz4/src/v1/kaelz4_ctx.c @@ -8,6 +8,7 @@ #include "kaelz4_ctx.h" #include "kaelz4_utils.h" #include "kaelz4_log.h" +#include "uadk/v1/wd_sgl.h" static KAE_QUEUE_POOL_HEAD_S* g_kaelz4_deflate_qp = NULL; static KAE_QUEUE_POOL_HEAD_S* g_kaelz4_inflate_qp = NULL; @@ -376,6 +377,16 @@ void kaelz4_free_ctx(kaelz4_ctx_t* kz_ctx) struct kaelz4_instance *instance = (struct kaelz4_instance *)kz_ctx->q_node->priv_ctx; KAE_QUEUE_DATA_NODE_S *q_node = kz_ctx->q_node; + if (kz_ctx->q_node->is_sgl) { + if (kz_ctx->src_sgl) { + wd_destory_sgl(kz_ctx->q_node->kae_wd_queue, kz_ctx->q_node->kae_queue_mem_pool, kz_ctx->src_sgl); + kz_ctx->src_sgl = NULL; + } + if (kz_ctx->dst_sgl_usr) { + wd_destory_sgl(kz_ctx->q_node->kae_wd_queue, kz_ctx->q_node->kae_queue_mem_pool, kz_ctx->dst_sgl_usr); + kz_ctx->dst_sgl_usr = NULL; + } + } instance->kz_ctx[kz_ctx->index] = NULL; kaelz4_free_kz_ctx(kz_ctx); -- Gitee From 27db33f802119eb3de328533858cb6beb3245250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E5=87=A4?= Date: Wed, 16 Jul 2025 07:54:36 +0000 Subject: [PATCH 3/6] feat: kaezip async support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 白凤 --- KAEZlib/Makefile | 5 +- KAEZlib/include/kaezip.h | 99 +++- KAEZlib/setup.sh | 2 +- KAEZlib/src/kaezip_adapter.c | 7 +- KAEZlib/src/kaezip_adapter.h | 1 + KAEZlib/src/utils/kaezip_utils.h | 42 ++ KAEZlib/src/utils/kaezlib_common.h | 23 + KAEZlib/src/v1/kaezip_ctx.c | 428 ++++++++++++------ KAEZlib/src/v1/kaezip_ctx.h | 29 +- KAEZlib/src/v1/kaezip_deflate.c | 4 +- KAEZlib/src/v1/kaezip_inflate.c | 4 +- KAEZlib/src/v1/kaezip_init.c | 22 + KAEZlib/src/v1/kaezip_init.h | 16 + ...adk-support-sgl-zero-copy-for-kaelz4.patch | 329 +++++++++++++- 14 files changed, 843 insertions(+), 168 deletions(-) create mode 100644 KAEZlib/src/utils/kaezlib_common.h create mode 100644 KAEZlib/src/v1/kaezip_init.c create mode 100644 KAEZlib/src/v1/kaezip_init.h diff --git a/KAEZlib/Makefile b/KAEZlib/Makefile index ae6955c..d0306e6 100644 --- a/KAEZlib/Makefile +++ b/KAEZlib/Makefile @@ -38,9 +38,6 @@ INCDIR += -I $(WORK_PATH)/ INCDIR += -I $(WORK_PATH)/include INCDIR += -I $(WORK_PATH)/open_source/zlib-1.2.11 INCDIR += -I ${WORK_PATH}/../uadk/include -INCDIR += -I /usr/local/include/uadk/ -INCDIR += -I ${WORK_PATH}/../uadk -INCDIR += -I ${WORK_PATH}/../ INCDIR += -I ${WORK_PATH}/src/utils INCDIR += -I ${WORK_PATH}/src/v1 INCDIR += -I ${WORK_PATH}/src/v2 @@ -56,7 +53,7 @@ LIBS := -lwd -lwd_comp -pthread LIBS += -lc_nonshared # The flags -CFLAGS := -Wall -Werror -fstack-protector-all -fPIC -D_GNU_SOURCE -shared +CFLAGS := -Wall -Werror -fstack-protector-all -fPIC -D_GNU_SOURCE -shared -O2 -march=armv8.2-a+crypto LDFLAGS := $(LIBDIR) LDFLAGS += $(LIBS) LDFLAGS += -Wl,-z,relro,-z,now,-z,noexecstack #safe link option diff --git a/KAEZlib/include/kaezip.h b/KAEZlib/include/kaezip.h index fe4b812..55260bc 100644 --- a/KAEZlib/include/kaezip.h +++ b/KAEZlib/include/kaezip.h @@ -1,12 +1,12 @@ /* * Copyright (C) 2019. Huawei Technologies Co., Ltd. All rights reserved. - * + * * This program is free software; you can redistribute it and/or modify - * it under the terms of the zlib License. + * it under the terms of the zlib License. * You may obtain a copy of the License at - * + * * https://www.zlib.net/zlib_license.html - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -22,6 +22,8 @@ #ifndef KAEZIP_H #define KAEZIP_H +#include +#include #include "zlib.h" #define Z_CALL_SOFT 10 @@ -33,6 +35,40 @@ typedef struct { char componentName[VERSION_STRUCT_MAXLEN]; char componentVersion[VERSION_STRUCT_MAXLEN]; } KAEZlibVersion; + +#define KAE_ZLIB_SUCC 0 +#define KAE_ZLIB_INVAL_PARA 1 +#define KAE_ZLIB_INIT_FAIL 2 +#define KAE_ZLIB_COMP_FAIL 3 +#define KAE_ZLIB_RELEASE_FAIL 4 +#define KAE_ZLIB_ALLOC_FAIL 5 +#define KAE_ZLIB_SET_FAIL 6 +#define KAE_ZLIB_HW_TIMEOUT_FAIL 7 + +struct kaezip_result { + int status; + unsigned int rsvd; + void *user_data; + size_t src_size; + size_t dst_len; + uint32_t *ibuf_crc; + uint32_t *obuf_crc; +}; + +struct kaezip_buffer { + size_t buf_len; + void *data; +}; + +struct kaezip_buffer_list { + unsigned int buf_num; + unsigned int rsvd; + struct kaezip_buffer *buf; + void *usr_data; +}; + +typedef void (*kaezip_async_callback)(struct kaezip_result *result); +typedef void *(*iova_map_fn)(void *usr, void *vaddr, size_t sz); extern int kaezlib_get_version(KAEZlibVersion* ver); extern int kz_get_devices(void); @@ -64,5 +100,60 @@ extern unsigned long getInflateKaezipCtx(z_streamp strm); extern void setInflateKaezipCtx(z_streamp strm, unsigned long kaezip_ctx); extern unsigned long getDeflateKaezipCtx(z_streamp strm); extern void setDeflateKaezipCtx(z_streamp strm, unsigned long kaezip_ctx); +/** + * @brief: block compress async api + * @param: sess : session + * @param: src [IN] : input data + * @param: dst [OUT] : output data, only support buf_num == 1 now. + * @param: callback [IN] : async callback function,it can not be NULL, must be typedef void (*kaezip_async_callback)(struct kaezip_result *result); + * @param: result [IN OUT] : async callback result,it can not be NULL. must be pointer of struct kaezip_result. + * @return: 0 success, other fail + */ +int KAEZIP_compress_async_in_session(void *sess, const struct kaezip_buffer_list *src, struct kaezip_buffer_list *dst, + kaezip_async_callback callback, struct kaezip_result *result); +/** + * @brief: Polling hardware result in session. + * @param: sess : session + * @param: budget : process packet num per call. + */ + void KAEZIP_compress_async_polling_in_session(void *sess, int budget); + +/** + * @brief: Initialize Task Queues and Threads on the KAE Side. + * @param: usr_map : function to translate src/dst buf's VA to PA/IOVA + * @return: session, NULL if fail + */ +void *KAEZIP_create_async_compress_session(iova_map_fn usr_map); + +/** + * @brief: Destroy session and hardware ctx. + * @param: sess : session + */ +void KAEZIP_destroy_async_compress_session(void *sess); + +/** + * @brief: Initialize Task Queues and Threads on the KAE Side for decompress. + * @param: usr_map : function to translate src/dst buf's VA to PA/IOVA + * @return: session, NULL if fail + */ +void *KAEZIP_create_async_decompress_session(iova_map_fn usr_map); + +/** + * @brief: Destroy decompress session and hardware ctx. + * @param: sess : session + */ +void KAEZIP_destroy_async_decompress_session(void *sess); + +/** + * @brief: block decompress async api + * @param: sess : session + * @param: src [IN] : input data + * @param: dst [OUT] : output data, only support buf_num == 1 now. + * @param: callback [IN] : async callback function,it can not be NULL, must be typedef void (*kaezip_async_callback)(struct kaezip_result *result); + * @param: result [IN OUT] : async callback result,it can not be NULL. must be pointer of struct kaezip_result. + * @return: 0 success, other fail + */ +int KAEZIP_decompress_async_in_session(void *sess, const struct kaezip_buffer_list *src, struct kaezip_buffer_list *dst, + kaezip_async_callback callback, struct kaezip_result *result); #endif diff --git a/KAEZlib/setup.sh b/KAEZlib/setup.sh index b34fa55..e93217e 100644 --- a/KAEZlib/setup.sh +++ b/KAEZlib/setup.sh @@ -17,7 +17,7 @@ function Install_warpdrive() tar -zxvf libwd-"${BUILDVERSION}".tar.gz cd warpdrive/ - sh autogen.sh + bash autogen.sh ./configure make clean && make make install diff --git a/KAEZlib/src/kaezip_adapter.c b/KAEZlib/src/kaezip_adapter.c index 4c7314d..c828a94 100644 --- a/KAEZlib/src/kaezip_adapter.c +++ b/KAEZlib/src/kaezip_adapter.c @@ -8,7 +8,7 @@ #include "kaezip.h" #include "wd_comp.h" #include "kaezip_adapter.h" -#include "kaezip_init.h" +#include "v2/kaezip_init.h" #include "kaezip_comp.h" #include "kaezip_deflate.h" #include "kaezip_inflate.h" @@ -22,10 +22,10 @@ enum { }; static int g_platform = -1; -static void uadk_get_accel_platform(void) +int uadk_get_accel_platform(void) { if (g_platform >= 0) { - return; + return g_platform; } // init log kaezip_debug_init_log(); @@ -49,6 +49,7 @@ static void uadk_get_accel_platform(void) g_platform = HW_NONE; end: US_INFO("g_platform is %d, inited!\n", g_platform); + return g_platform; } /* -----------------------------------------------DEFLATE----------------------------------------------- */ diff --git a/KAEZlib/src/kaezip_adapter.h b/KAEZlib/src/kaezip_adapter.h index 6e1a678..c8841c0 100644 --- a/KAEZlib/src/kaezip_adapter.h +++ b/KAEZlib/src/kaezip_adapter.h @@ -26,5 +26,6 @@ int kz_inflateInit2_(z_streamp strm, int windowBits, const char *version, int st int kz_inflate(z_streamp strm, int flush); int kz_inflateEnd(z_streamp strm); int kz_inflateReset(z_streamp strm); +int uadk_get_accel_platform(void); #endif diff --git a/KAEZlib/src/utils/kaezip_utils.h b/KAEZlib/src/utils/kaezip_utils.h index ba18cb3..6978881 100644 --- a/KAEZlib/src/utils/kaezip_utils.h +++ b/KAEZlib/src/utils/kaezip_utils.h @@ -32,6 +32,7 @@ #include #include #include "wd_comp.h" +#include #define gettid() syscall(SYS_gettid) #define PRINTPID \ @@ -40,6 +41,14 @@ #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef TRUE +#define TRUE 1 +#endif + #ifndef true #define true (0 == 0) #endif @@ -53,6 +62,11 @@ #define KAEZIP_DRIVER_DO_TASK_NOW (1) #define KAEZIP_SAVE_DATA_TO_BUFFER (2) +#define COMP_BLOCK_NUM (4) +#define COMP_BLOCK_SIZE (2 * 1024 * 1024) +#define KAEZIP_STREAM_CHUNK_IN ((COMP_BLOCK_SIZE) >> 3) // change the input size would change the performace +#define KAEZIP_STREAM_CHUNK_OUT (COMP_BLOCK_SIZE) + #define KAEZIP_RETURN_FAIL_IF(cond, mesg, ret) \ if (unlikely(cond)) {\ US_ERR(mesg); \ @@ -76,6 +90,12 @@ } \ } while (0) +typedef uint8_t BYTE; +typedef uint8_t U8; +typedef uint16_t U16; +typedef uint32_t U32; +typedef uint64_t U64; + static inline void *kae_malloc(unsigned int size) { return malloc(size); @@ -140,4 +160,26 @@ static inline int kz_zlib_analy_alg(int windowbits, int *alg, int *windowsize, i return 0; } +#define NSEC_TO_SEC 1000000000L +static inline void get_time_out_spec(struct timespec *start, struct timespec *polling_timeout) +{ + clock_gettime(CLOCK_REALTIME, start); /* Get current real time. */ + start->tv_sec += polling_timeout->tv_sec; + start->tv_nsec += polling_timeout->tv_nsec; + start->tv_sec += start->tv_nsec / NSEC_TO_SEC; + start->tv_nsec = start->tv_nsec % NSEC_TO_SEC; +} + +static inline int check_time_out(struct timespec *time) +{ + struct timespec now; + clock_gettime(CLOCK_REALTIME, &now); /* Get current real time. */ + + if ((now.tv_sec < time->tv_sec) || (now.tv_sec == time->tv_sec && now.tv_nsec <= time->tv_nsec)) { + return 0; + } + + return 1; +} + #endif diff --git a/KAEZlib/src/utils/kaezlib_common.h b/KAEZlib/src/utils/kaezlib_common.h new file mode 100644 index 0000000..b56b695 --- /dev/null +++ b/KAEZlib/src/utils/kaezlib_common.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2021-2021. All rights reserved. + * Description: contain kae common defines + * Author: DSA + * Create: 2024-7-6 + */ + +#ifndef KAEZLIB_COMMON_H +#define KAEZLIB_COMMON_H + +#define CONF_KAEZLIB + +#define KAE_ZLIB_PROCESS_IDLE 0 +#define KAE_ZLIB_PROCESS_HW_BUSY -1 + +enum kaezip_async_data_format { + KAEZIP_ASYNC_BLOCK = 0, + KAEZIP_ASYNC_BUTT, +}; + +#include "kaezip.h" + +#endif \ No newline at end of file diff --git a/KAEZlib/src/v1/kaezip_ctx.c b/KAEZlib/src/v1/kaezip_ctx.c index 74838db..23a91a1 100644 --- a/KAEZlib/src/v1/kaezip_ctx.c +++ b/KAEZlib/src/v1/kaezip_ctx.c @@ -24,6 +24,7 @@ #include "kaezip_common.h" #include "kaezip_utils.h" #include "kaezip_log.h" +#include "uadk/v1/wd_sgl.h" static KAE_QUEUE_POOL_HEAD_S* g_kaezip_deflate_qp = NULL; static KAE_QUEUE_POOL_HEAD_S* g_kaezip_inflate_qp = NULL; @@ -32,14 +33,50 @@ static pthread_mutex_t g_kaezip_inflate_pool_init_mutex = PTHREAD_MUTEX_INITIALI static enum kaezip_mode g_kaezip_mode = KAEZIP_SYNC; static KAE_QUEUE_POOL_HEAD_S* kaezip_get_qp(int algtype); -static kaezip_ctx_t* kaezip_new_ctx(KAE_QUEUE_DATA_NODE_S* q_node, int alg_comp_type, int comp_optype, int level); -static int kaezip_create_wd_ctx(kaezip_ctx_t *kz_ctx, int alg_comp_type, int comp_optype); +static kaezip_ctx_t *kaezip_new_ctx(struct kaezip_instance *instance, int alg_comp_type, int comp_optype, int is_sgl); +static int kaezip_create_wd_ctx(struct kaezip_instance *instance, int alg_comp_type, int comp_optype); static int kaezip_driver_do_comp_impl(kaezip_ctx_t *kz_ctx); static int kaezip_set_comp_input_data(kaezip_ctx_t *kz_ctx); static void kaezip_get_buffer_remain_data(kaezip_ctx_t *kz_ctx); static void kaezip_get_comp_output_data(kaezip_ctx_t *kz_ctx); static void kaezip_get_decomp_output_data(kaezip_ctx_t *kz_ctx); +int kaezip_get_win_size(void) +{ + char *env_str = getenv("KAE_ZLIB_WINTYPE"); + if (env_str == NULL) { + US_DEBUG("KAE_ZLIB_WINTYPE is NULL, use default winsize 8\n"); + return WCRYPTO_COMP_WS_8K; + } + int winsize = atoi(env_str); + + int wintype = 0; + + switch (winsize) { + case 4: + wintype = WCRYPTO_COMP_WS_4K; + break; + case 8: + wintype = WCRYPTO_COMP_WS_8K; + break; + case 16: + wintype = WCRYPTO_COMP_WS_16K; + break; + case 24: + wintype = WCRYPTO_COMP_WS_24K; + break; + case 32: + wintype = WCRYPTO_COMP_WS_32K; + break; + default: + wintype = WCRYPTO_COMP_WS_32K; + US_DEBUG("KAE_ZLIB_WINTYPE value out of range :%d ,use default winsize 32", winsize); + break; + } + + US_DEBUG("KAE_ZLIB_WINTYPE wintype is :%d ", wintype); + return wintype; +} static void __attribute((constructor)) kaezip_getmode_from_env(void) { @@ -56,26 +93,22 @@ static void __attribute((constructor)) kaezip_getmode_from_env(void) } } -void kaezip_free_ctx(void* kz_ctx) +static void kaezip_free_kz_ctx(void* kz_ctx) { kaezip_ctx_t* kaezip_ctx = (kaezip_ctx_t *)kz_ctx; if (kaezip_ctx == NULL) { return; } - if (kaezip_ctx->op_data.in && kaezip_ctx->setup.br.usr) { - kaezip_ctx->setup.br.free(kaezip_ctx->setup.br.usr, (void *)kaezip_ctx->op_data.in); - kaezip_ctx->op_data.in = NULL; - } - - if (kaezip_ctx->op_data.out && kaezip_ctx->setup.br.usr) { - kaezip_ctx->setup.br.free(kaezip_ctx->setup.br.usr, (void *)kaezip_ctx->op_data.out); - kaezip_ctx->op_data.out = NULL; - } - - if (kaezip_ctx->wd_ctx != NULL) { - wcrypto_del_comp_ctx(kaezip_ctx->wd_ctx); - kaezip_ctx->wd_ctx = NULL; + if (!kaezip_ctx->q_node->is_sgl) { + if (kaezip_ctx->op_data.in && kaezip_ctx->setup.br.usr) { + kaezip_ctx->setup.br.free(kaezip_ctx->setup.br.usr, (void *)kaezip_ctx->op_data.in); + kaezip_ctx->op_data.in = NULL; + } + if (kaezip_ctx->op_data.out && kaezip_ctx->setup.br.usr) { + kaezip_ctx->setup.br.free(kaezip_ctx->setup.br.usr, (void *)kaezip_ctx->op_data.out); + kaezip_ctx->op_data.out = NULL; + } } kae_free(kaezip_ctx); @@ -83,7 +116,8 @@ void kaezip_free_ctx(void* kz_ctx) return; } -static kaezip_ctx_t* kaezip_new_ctx(KAE_QUEUE_DATA_NODE_S* q_node, int alg_comp_type, int comp_optype, int level) +static kaezip_ctx_t *kaezip_new_ctx(struct kaezip_instance *instance, + int alg_comp_type, int comp_optype, int is_sgl) { kaezip_ctx_t *kz_ctx = NULL; kz_ctx = (kaezip_ctx_t *)kae_malloc(sizeof(kaezip_ctx_t)); @@ -93,49 +127,45 @@ static kaezip_ctx_t* kaezip_new_ctx(KAE_QUEUE_DATA_NODE_S* q_node, int alg_comp_ } memset(kz_ctx, 0, sizeof(kaezip_ctx_t)); - kz_ctx->setup.br.alloc = kaezip_wd_alloc_blk; - kz_ctx->setup.br.free = kaezip_wd_free_blk; - kz_ctx->setup.br.iova_map = kaezip_dma_map; - kz_ctx->setup.br.iova_unmap = kaezip_dma_unmap; - kz_ctx->setup.br.usr = q_node->kae_queue_mem_pool; - - int windowsize, alg; - (void)kz_zlib_analy_alg(-15, &alg, &windowsize, level); - kz_ctx->setup.win_size = windowsize; - - kz_ctx->op_data.in = kz_ctx->setup.br.alloc(kz_ctx->setup.br.usr, COMP_BLOCK_SIZE); - if (kz_ctx->op_data.in == NULL) { - US_ERR("alloc opdata in buf failed"); - goto err; - } - - kz_ctx->op_data.out = kz_ctx->setup.br.alloc(kz_ctx->setup.br.usr, COMP_BLOCK_SIZE); - if (kz_ctx->op_data.out == NULL) { - US_ERR("alloc opdata out buf failed"); - goto err; - } - kz_ctx->op_data.avail_out = KAEZIP_STREAM_CHUNK_OUT; - - kz_ctx->q_node = q_node; - q_node->priv_ctx = kz_ctx; + kz_ctx->setup = instance->setup; + kz_ctx->comp_alg_type = alg_comp_type; + kz_ctx->comp_type = comp_optype; + kz_ctx->q_node = instance->q_node; + kz_ctx->wd_ctx = instance->wd_ctx; + + if (is_sgl) { + // in 以及 out 均为 SGL + kz_ctx->op_data.in = (void *)kz_ctx->src_sgl_buf; + kz_ctx->op_data.out = (void *)kz_ctx->dst_sgl_buf; + } else { + kz_ctx->op_data.in = kz_ctx->setup.br.alloc(kz_ctx->setup.br.usr, COMP_BLOCK_SIZE); + if (kz_ctx->op_data.in == NULL) { + US_ERR("alloc opdata in buf failed"); + goto err; + } - if (kaezip_create_wd_ctx(kz_ctx, alg_comp_type, comp_optype) == KAEZIP_FAILED) { - US_ERR("create wd ctx fail!"); - goto err; + kz_ctx->op_data.out = kz_ctx->setup.br.alloc(kz_ctx->setup.br.usr, COMP_BLOCK_SIZE); + if (kz_ctx->op_data.out == NULL) { + US_ERR("alloc opdata out buf failed"); + goto err; + } + kz_ctx->op_data.avail_out = KAEZIP_STREAM_CHUNK_OUT; } return kz_ctx; err: - kaezip_free_ctx(kz_ctx); + kaezip_free_kz_ctx(kz_ctx); return NULL; } -void kaezip_callback(const void *msg, void *tag) +static void kaezip_callback(const void *msg, void *tag) { const struct wcrypto_comp_msg *respmsg = msg; - struct wcrypto_comp_op_data *op_data = (struct wcrypto_comp_op_data *)tag; + kaezip_ctx_t *kz_ctx = (kaezip_ctx_t *)tag; + struct wcrypto_comp_op_data *op_data = &kz_ctx->op_data; + op_data->consumed = respmsg->in_cons; op_data->produced = respmsg->produced; op_data->status = respmsg->status; @@ -143,67 +173,11 @@ void kaezip_callback(const void *msg, void *tag) op_data->flush = respmsg->flush_type; op_data->isize = respmsg->isize; op_data->checksum = respmsg->checksum; -} -static int kaezip_create_wd_ctx(kaezip_ctx_t *kz_ctx, int alg_comp_type, int comp_optype) -{ - if (kz_ctx->wd_ctx != NULL) { - US_WARN("wd ctx is in used by other comp"); - return KAEZIP_FAILED; - } - - struct wd_queue *q = kz_ctx->q_node->kae_wd_queue; - - kz_ctx->setup.alg_type = (enum wcrypto_comp_alg_type)alg_comp_type; - kz_ctx->setup.op_type = (enum wcrypto_comp_optype)comp_optype; - kz_ctx->setup.stream_mode = (enum wcrypto_comp_state)WCRYPTO_COMP_STATEFUL; - kz_ctx->setup.cb = (g_kaezip_mode == KAEZIP_ASYNC ? kaezip_callback : NULL); - - kz_ctx->wd_ctx = wcrypto_create_comp_ctx(q, &kz_ctx->setup); - if (kz_ctx->wd_ctx == NULL) { - US_ERR("wd create kae comp ctx fail!"); - return KAEZIP_FAILED; - } - - kz_ctx->comp_alg_type = alg_comp_type; - kz_ctx->comp_type = comp_optype; - - return KAEZIP_SUCCESS; -} - -kaezip_ctx_t* kaezip_get_ctx(int alg_comp_type, int comp_optype, int level) -{ - KAE_QUEUE_DATA_NODE_S *q_node = NULL; - kaezip_ctx_t *kz_ctx = NULL; - - KAE_QUEUE_POOL_HEAD_S* qp = kaezip_get_qp(comp_optype); - if(unlikely(!qp)) { - US_ERR("failed to get hardware queue pool"); - return NULL; - } + if (kz_ctx->callback) + kz_ctx->callback(respmsg->status, kz_ctx->param); - kaezip_queue_pool_check_and_release(qp, kaezip_free_ctx); - - q_node = kaezip_get_node_from_pool(qp, alg_comp_type, comp_optype); - if (q_node == NULL) { - US_ERR("failed to get hardware queue"); - return NULL; - } - - kz_ctx = (kaezip_ctx_t *)q_node->priv_ctx; - if (kz_ctx == NULL) { - kz_ctx = kaezip_new_ctx(q_node, alg_comp_type, comp_optype, level); - if (kz_ctx == NULL) { - US_ERR("kaezip new engine ctx fail!"); - (void)kaezip_put_node_to_pool(qp, q_node); - return NULL; - } - } - - kz_ctx->q_node = q_node; - kaezip_init_ctx(kz_ctx); - - return kz_ctx; + return; } void kaezip_init_ctx(kaezip_ctx_t* kz_ctx) @@ -226,29 +200,12 @@ void kaezip_init_ctx(kaezip_ctx_t* kz_ctx) kz_ctx->header_pos = 0; kz_ctx->flush = 0; kz_ctx->status = 0; + kz_ctx->callback = NULL; + kz_ctx->param = NULL; memset(&kz_ctx->end_block, 0, sizeof(struct wcrypto_end_block)); } -void kaezip_put_ctx(kaezip_ctx_t* kz_ctx) -{ - KAE_QUEUE_DATA_NODE_S* temp = NULL; - if (unlikely(kz_ctx == NULL)) { - US_ERR("kae zip ctx NULL!"); - return; - } - - if (kz_ctx->q_node != NULL) { - temp = kz_ctx->q_node; - kz_ctx->q_node = NULL; - (void)kaezip_put_node_to_pool(kaezip_get_qp(kz_ctx->comp_type), temp); - } - - kz_ctx = NULL; - - return; -} - static int kaezip_should_add_rate(struct kaezip_async_sleep_info *sleep_info) { if (!sleep_info) { @@ -273,7 +230,7 @@ static int kaezip_driver_do_comp_impl(kaezip_ctx_t* kz_ctx) struct wcrypto_comp_op_data *op_data = &kz_ctx->op_data; - int ret = wcrypto_do_comp(kz_ctx->wd_ctx, op_data, g_kaezip_mode == KAEZIP_ASYNC ? op_data : NULL); + int ret = wcrypto_do_comp(kz_ctx->wd_ctx, op_data, g_kaezip_mode == KAEZIP_ASYNC ? kz_ctx : NULL); if (unlikely(ret < 0)) { US_ERR("wd_do_comp fail! ret = %d", ret); return KAEZIP_FAILED; @@ -423,7 +380,7 @@ int kaezip_driver_do_decomp(kaezip_ctx_t *kaezip_ctx) return KAEZIP_SUCCESS; } -static void kaezip_set_comp_status(kaezip_ctx_t *kz_ctx) +void kaezip_set_comp_status(kaezip_ctx_t *kz_ctx) { if (kz_ctx->comp_type == WCRYPTO_INFLATE) { switch (kz_ctx->op_data.status) { @@ -573,6 +530,217 @@ int kaezip_get_remain_data(kaezip_ctx_t *kz_ctx) return KAEZIP_SUCCESS; } +static int kaezip_create_wd_ctx(struct kaezip_instance *instance, int alg_comp_type, int comp_optype) +{ + if (instance->wd_ctx != NULL) { + US_WARN("wd ctx is in used by other comp"); + return KAEZIP_FAILED; + } + + struct wd_queue *q = instance->q_node->kae_wd_queue; + + instance->setup.alg_type = (enum wcrypto_comp_alg_type)alg_comp_type; + instance->setup.op_type = (enum wcrypto_comp_optype)comp_optype; + instance->setup.stream_mode = (enum wcrypto_comp_state)WCRYPTO_COMP_STATEFUL; + if (instance->q_node->is_sgl) { + instance->setup.stream_mode = (enum wcrypto_comp_state)WCRYPTO_COMP_STATELESS; + instance->setup.data_fmt = WD_SGL_BUF; + } + + instance->wd_ctx = wcrypto_create_comp_ctx(q, &instance->setup); + if (instance->wd_ctx == NULL) { + US_ERR("wd create kae comp ctx fail!"); + return KAEZIP_FAILED; + } + + return KAEZIP_SUCCESS; +} + +static struct kaezip_instance *kaezip_new_instance(KAE_QUEUE_DATA_NODE_S* q_node, int alg_comp_type, int comp_optype, int win_size, int is_sgl) +{ + struct kaezip_instance *instance = (struct kaezip_instance *)kae_malloc(sizeof(struct kaezip_instance)); + + if (instance == NULL) { + US_ERR("failed to alloc kaelz4 instance"); + return NULL; + } + + memset(instance, 0, sizeof(struct kaezip_instance)); + + instance->q_node = q_node; + instance->total_num = MAX_KAE_CTX_DEPTH; + instance->setup.win_size = win_size; + instance->setup.br.usr = q_node->kae_queue_mem_pool; + instance->setup.cb = kaezip_callback; + + if (is_sgl) { + instance->setup.br.alloc = kaezip_wd_alloc_sgl; + instance->setup.br.free = kaezip_wd_free_sgl; + instance->setup.br.iova_map = kaezip_dma_map_sgl; + instance->setup.br.iova_unmap = kaezip_dma_unmap_sgl; + } else { + instance->setup.br.alloc = kaezip_wd_alloc_blk; + instance->setup.br.free = kaezip_wd_free_blk; + instance->setup.br.iova_map = kaezip_dma_map; + instance->setup.br.iova_unmap = kaezip_dma_unmap; + } + + if (kaezip_create_wd_ctx(instance, alg_comp_type, comp_optype) == KAEZIP_FAILED) { + US_ERR("create wd ctx fail!"); + kae_free(instance); + return NULL; + } + return instance; +} + +void kaezip_free_instance(void *arg) +{ + struct kaezip_instance *instance = arg; + + for (int i = 0; i < instance->total_num; i++) { + if (instance->kz_ctx[i]) { + kaezip_free_kz_ctx(instance->kz_ctx[i]); + instance->kz_ctx[i] = NULL; + } + } + + if (instance->wd_ctx != NULL) { + wcrypto_del_comp_ctx(instance->wd_ctx); // scy: TBM + instance->wd_ctx = NULL; + } + + kae_free(instance); +} + +#define COMP_OPTYPE_NUM (2) +__thread struct kaezip_instance *g_cur_instance[COMP_OPTYPE_NUM]; +kaezip_ctx_t* kaezip_get_ctx(int alg_comp_type, int comp_optype, int win_size, int is_sgl) +{ + KAE_QUEUE_DATA_NODE_S *q_node = NULL; + kaezip_ctx_t *kz_ctx = NULL; + KAE_QUEUE_POOL_HEAD_S* qp = kaezip_get_qp(comp_optype); + struct kaezip_instance *cur_instance = g_cur_instance[comp_optype % COMP_OPTYPE_NUM]; + + if(unlikely(!qp)) { + US_ERR("failed to get hardware queue pool"); + return NULL; + } + + // check cur_instance + if (cur_instance == NULL || cur_instance->q_node->comp_alg_type != alg_comp_type \ + || cur_instance->q_node->win_size != win_size || cur_instance->q_node->is_sgl != is_sgl) { + q_node = kaezip_get_node_from_pool(qp, alg_comp_type, comp_optype, win_size, is_sgl); + if (q_node == NULL) { + kaezip_queue_pool_check_and_release(qp, kaezip_free_instance); + q_node = kaezip_get_node_from_pool(qp, alg_comp_type, comp_optype, win_size, is_sgl); + + if (q_node == NULL) { + kae_free(cur_instance); + US_ERR("failed to get hardware queue"); + return NULL; + } + } + + if (q_node->priv_ctx == NULL) { + cur_instance = kaezip_new_instance(q_node, alg_comp_type, comp_optype, win_size, is_sgl); + if (cur_instance == NULL) { + US_ERR("create instance fail!"); + (void)kaezip_put_node_to_pool(qp, q_node, kaezip_free_instance); + return NULL; + } + q_node->priv_ctx = cur_instance; + } else { + cur_instance = q_node->priv_ctx; + } + g_cur_instance[comp_optype % COMP_OPTYPE_NUM] = cur_instance; + } else { + q_node = cur_instance->q_node; + } + + kz_ctx = cur_instance->kz_ctx[cur_instance->cur_idx]; + if (kz_ctx == NULL) { + kz_ctx = kaezip_new_ctx(cur_instance, alg_comp_type, comp_optype, is_sgl); + if (kz_ctx == NULL) { + if (cur_instance->cur_idx == 0) { + (void)kaezip_put_node_to_pool(qp, q_node, kaezip_free_instance); + } + g_cur_instance[comp_optype % COMP_OPTYPE_NUM] = NULL; + return NULL; + } + cur_instance->kz_ctx[cur_instance->cur_idx] = kz_ctx; + } + + kaezip_init_ctx(kz_ctx); + kz_ctx->index = cur_instance->cur_idx; + cur_instance->cur_idx++; + if (cur_instance->cur_idx == cur_instance->total_num) { + g_cur_instance[comp_optype % COMP_OPTYPE_NUM] = NULL; + } + + return kz_ctx; +} + +void kaezip_put_ctx(kaezip_ctx_t* kz_ctx) +{ + KAE_QUEUE_DATA_NODE_S* temp = NULL; + if (unlikely(kz_ctx == NULL)) { + US_ERR("kae zip ctx NULL!"); + return; + } + + if (kz_ctx->q_node != NULL) { + struct kaezip_instance *instance = (struct kaezip_instance *)kz_ctx->q_node->priv_ctx; + + temp = kz_ctx->q_node; + instance->free_num++; + if (instance->free_num == instance->cur_idx) { + (void)kaezip_put_node_to_pool(kaezip_get_qp(kz_ctx->comp_type), temp, kaezip_free_instance); + instance->cur_idx = 0; + instance->free_num = 0; + if (instance == g_cur_instance[kz_ctx->comp_type % COMP_OPTYPE_NUM]) { + g_cur_instance[kz_ctx->comp_type % COMP_OPTYPE_NUM] = NULL; + } + } + } + + kz_ctx = NULL; + + return; +} + +void kaezip_free_ctx(kaezip_ctx_t* kz_ctx) +{ + if (unlikely(kz_ctx == NULL)) { + US_ERR("kae zip ctx NULL!"); + return; + } + + struct kaezip_instance *instance = (struct kaezip_instance *)kz_ctx->q_node->priv_ctx; + KAE_QUEUE_DATA_NODE_S *q_node = kz_ctx->q_node; + int comp_optype = kz_ctx->comp_type % COMP_OPTYPE_NUM; + + if (kz_ctx->q_node->is_sgl) { + if (kz_ctx->src_sgl) { + wd_destory_sgl(kz_ctx->q_node->kae_wd_queue, kz_ctx->q_node->kae_queue_mem_pool, kz_ctx->src_sgl); + kz_ctx->src_sgl = NULL; + } + if (kz_ctx->dst_sgl_usr) { + wd_destory_sgl(kz_ctx->q_node->kae_wd_queue, kz_ctx->q_node->kae_queue_mem_pool, kz_ctx->dst_sgl_usr); + kz_ctx->dst_sgl_usr = NULL; + } + } + instance->kz_ctx[kz_ctx->index] = NULL; + kaezip_free_kz_ctx(kz_ctx); + + instance->free_num++; + if (instance->free_num == instance->cur_idx) { + kaezip_free_wd_queue_memory(q_node, kaezip_free_instance); + if (instance == g_cur_instance[comp_optype]) { + g_cur_instance[comp_optype] = NULL; + } + } +} + static KAE_QUEUE_POOL_HEAD_S* kaezip_get_qp(int algtype) { if ((algtype != WCRYPTO_DEFLATE) && (algtype != WCRYPTO_INFLATE) ) { @@ -589,7 +757,7 @@ static KAE_QUEUE_POOL_HEAD_S* kaezip_get_qp(int algtype) pthread_mutex_unlock(&g_kaezip_deflate_pool_init_mutex); return g_kaezip_deflate_qp; } - kaezip_queue_pool_destroy(g_kaezip_deflate_qp, kaezip_free_ctx); + kaezip_queue_pool_destroy(g_kaezip_deflate_qp, kaezip_free_instance); g_kaezip_deflate_qp = kaezip_init_queue_pool(algtype); pthread_mutex_unlock(&g_kaezip_deflate_pool_init_mutex); @@ -603,7 +771,7 @@ static KAE_QUEUE_POOL_HEAD_S* kaezip_get_qp(int algtype) pthread_mutex_unlock(&g_kaezip_inflate_pool_init_mutex); return g_kaezip_inflate_qp; } - kaezip_queue_pool_destroy(g_kaezip_inflate_qp, kaezip_free_ctx); + kaezip_queue_pool_destroy(g_kaezip_inflate_qp, kaezip_free_instance); g_kaezip_inflate_qp = kaezip_init_queue_pool(algtype); pthread_mutex_unlock(&g_kaezip_inflate_pool_init_mutex); diff --git a/KAEZlib/src/v1/kaezip_ctx.h b/KAEZlib/src/v1/kaezip_ctx.h index e8d0d4f..fb1eeac 100644 --- a/KAEZlib/src/v1/kaezip_ctx.h +++ b/KAEZlib/src/v1/kaezip_ctx.h @@ -55,6 +55,9 @@ struct wcrypto_end_block { unsigned int b_set; }; +#define MAX_KAE_CTX_DEPTH 64 +#define REQ_BUFFER_MAX 255 // uadk支持最大的sgl buf数量 +#define REQ_BUFFER_SIZE (8*1024*1024) // uadk支持最大sge的大小 #define KAE_ASYNC_MAX_RECV_TIMES (2000000) #define FLAG_NUM (10) struct kaezip_async_sleep_info { @@ -82,23 +85,45 @@ struct kaezip_ctx { unsigned int buffer_len; // input data length in buffer unsigned int buffer_remain; // remain data in buffer which not send to driver for 4Byte alignment int status; // enum kaezip_comp_status + unsigned int index; struct wcrypto_end_block end_block; KAE_QUEUE_DATA_NODE_S* q_node; struct wcrypto_comp_ctx_setup setup; struct wcrypto_comp_op_data op_data; void* wd_ctx; + wd_map usr_map; + unsigned char src_sgl_buf[32 + (32 * (REQ_BUFFER_MAX + 1))]; // 32: sizeof(struct wd_sgl) + sizeof(struct wd_sge) * 60 + 1 * sizeof(struct wd_sge) for hisi_sge + unsigned char dst_sgl_buf[32 + (32 * (REQ_BUFFER_MAX + 1))]; // 32: sizeof(struct wd_sgl) + sizeof(struct wd_sge) * 60 + 1 * sizeof(struct wd_sge) for hisi_sge + void *src_sgl; + void *dst_sgl_usr; + void *dst_sgl_kernel; + void (*callback)(int status, void *param); + void* param; }; + +struct kaezip_instance { + KAE_QUEUE_DATA_NODE_S *q_node; + void *wd_ctx; + struct kaezip_ctx *kz_ctx[MAX_KAE_CTX_DEPTH]; + struct wcrypto_comp_ctx_setup setup; + unsigned int total_num; + unsigned int cur_idx; + unsigned int free_num; +}; + typedef struct kaezip_ctx kaezip_ctx_t; -kaezip_ctx_t* kaezip_get_ctx(int alg_comp_type, int comp_optype, int level); +kaezip_ctx_t* kaezip_get_ctx(int alg_comp_type, int comp_optype, int win_size, int is_sgl); void kaezip_put_ctx(kaezip_ctx_t* kz_ctx); void kaezip_init_ctx(kaezip_ctx_t* kz_ctx); -void kaezip_free_ctx(void* kz_ctx); +void kaezip_free_ctx(kaezip_ctx_t* kz_ctx); int kaezip_get_remain_data(kaezip_ctx_t *kz_ctx); int kaezip_driver_do_comp(kaezip_ctx_t *kaezip_ctx); int kaezip_driver_do_decomp(kaezip_ctx_t *kaezip_ctx); +void kaezip_set_comp_status(kaezip_ctx_t *kz_ctx); +int kaezip_get_win_size(void); #endif diff --git a/KAEZlib/src/v1/kaezip_deflate.c b/KAEZlib/src/v1/kaezip_deflate.c index 85a87f4..6696934 100644 --- a/KAEZlib/src/v1/kaezip_deflate.c +++ b/KAEZlib/src/v1/kaezip_deflate.c @@ -68,7 +68,9 @@ int kz_deflateInit2_v1(z_streamp strm, int level, return Z_OK; } - kaezip_ctx_t* kaezip_ctx = kaezip_get_ctx(alg_comp_type, WCRYPTO_DEFLATE, level); + int win_size, alg; + (void)kz_zlib_analy_alg(-15, &alg, &win_size, level); + kaezip_ctx_t* kaezip_ctx = kaezip_get_ctx(alg_comp_type, WCRYPTO_DEFLATE, win_size, 0); if (kaezip_ctx == NULL) { US_ERR("failed to get kaezip ctx, windowbits %d!", windowBits); setDeflateKaezipCtx(strm, 0); diff --git a/KAEZlib/src/v1/kaezip_inflate.c b/KAEZlib/src/v1/kaezip_inflate.c index c32e2a8..941f5ef 100644 --- a/KAEZlib/src/v1/kaezip_inflate.c +++ b/KAEZlib/src/v1/kaezip_inflate.c @@ -208,7 +208,9 @@ int kz_do_inflateInit(z_streamp strm, int alg_comp_type) return Z_OK; } - kaezip_ctx_t* kaezip_ctx = kaezip_get_ctx(alg_comp_type, WCRYPTO_INFLATE, -1); + int win_size, alg; + (void)kz_zlib_analy_alg(-15, &alg, &win_size, -1); + kaezip_ctx_t* kaezip_ctx = kaezip_get_ctx(alg_comp_type, WCRYPTO_INFLATE, win_size, 0); if (kaezip_ctx == NULL) { US_ERR("failed to get kaezip ctx, alg_comp_type %d!", alg_comp_type); setInflateKaezipCtx(strm, 0); diff --git a/KAEZlib/src/v1/kaezip_init.c b/KAEZlib/src/v1/kaezip_init.c new file mode 100644 index 0000000..7c4f6e8 --- /dev/null +++ b/KAEZlib/src/v1/kaezip_init.c @@ -0,0 +1,22 @@ +/* + * @Copyright: Copyright (c) Huawei Technologies Co., Ltd. 2025-2026. All rights reserved. + * @Description: kaezip nosva init head file + * @Author: MaXiaoFeng + * @Date: 2025-07-09 + * @LastEditTime: 2025-07-09 + */ + +#include "kaezip.h" +#include "kaezip_init.h" +#include "kaezip_log.h" + +void *kaezip_init_v1(int win_size, int is_sgl, int comp_type) +{ + kaezip_ctx_t *kaezip_ctx = kaezip_get_ctx(WCRYPTO_RAW_DEFLATE, comp_type, win_size, is_sgl); + if (!kaezip_ctx) { + US_ERR("kaezlib failed to get kaezip ctx!"); + return NULL; + } + US_INFO("kaezlib deflate init success, kaezip_ctx %p!", kaezip_ctx); + return kaezip_ctx; +} \ No newline at end of file diff --git a/KAEZlib/src/v1/kaezip_init.h b/KAEZlib/src/v1/kaezip_init.h new file mode 100644 index 0000000..95fbc6f --- /dev/null +++ b/KAEZlib/src/v1/kaezip_init.h @@ -0,0 +1,16 @@ +/* + * @Copyright: Copyright (c) Huawei Technologies Co., Ltd. 2025-2026. All rights reserved. + * @Description: kaezip nosva init head file + * @Author: MaXiaoFeng + * @Date: 2025-07-09 + * @LastEditTime: 2025-07-09 + */ + +#ifndef KAEZIP_INIT_H +#define KAEZIP_INIT_H + +#include "kaezip_common.h" + +void *kaezip_init_v1(int win_size, int is_sgl, int comp_type); + +#endif \ No newline at end of file diff --git a/scripts/patches/0008-uadk-support-sgl-zero-copy-for-kaelz4.patch b/scripts/patches/0008-uadk-support-sgl-zero-copy-for-kaelz4.patch index f7f1fda..f956c7b 100644 --- a/scripts/patches/0008-uadk-support-sgl-zero-copy-for-kaelz4.patch +++ b/scripts/patches/0008-uadk-support-sgl-zero-copy-for-kaelz4.patch @@ -1,5 +1,5 @@ diff --git a/uadk/Makefile.am b/uadk/Makefile.am -index 9b36ae9..2f5cbd5 100644 +index 9b36ae9..0df1dfd 100644 --- a/uadk/Makefile.am +++ b/uadk/Makefile.am @@ -41,6 +41,7 @@ pkginclude_HEADERS = include/wd.h include/wd_cipher.h include/wd_aead.h \ @@ -11,19 +11,178 @@ index 9b36ae9..2f5cbd5 100644 uadk_driversdir=$(libdir)/uadk diff --git a/uadk/v1/drv/hisi_qm_udrv.c b/uadk/v1/drv/hisi_qm_udrv.c -index 7b0183b..deb9ff0 100644 +index 7b0183b..4959aa3 100644 --- a/uadk/v1/drv/hisi_qm_udrv.c +++ b/uadk/v1/drv/hisi_qm_udrv.c -@@ -61,6 +61,9 @@ static int qm_hw_sgl_sge_init(struct wd_sgl *sgl, struct hisi_sgl *hisi_sgl, +@@ -53,11 +53,11 @@ static int qm_hw_sgl_info(struct hw_sgl_info *sgl_info) + + /* 'num' starts from 1 */ + static int qm_hw_sgl_sge_init(struct wd_sgl *sgl, struct hisi_sgl *hisi_sgl, +- struct wd_mm_br *br, int num, __u32 buf_sz) ++ struct wd_mm_br *br, int num) + { + void *buf; +- +- buf = wd_get_sge_buf(sgl, num); ++ __u32 buf_sz = sgl->sge[num - 1].data_len; ++ buf = sgl->sge[num - 1].buf; if (!buf) return -WD_EINVAL; -+ if (wd_get_sge_datalen(sgl, num, &buf_sz) != WD_SUCCESS) -+ return -WD_EINVAL; +@@ -69,16 +69,17 @@ static int qm_hw_sgl_sge_init(struct wd_sgl *sgl, struct hisi_sgl *hisi_sgl, + } + + hisi_sgl->sge_entries[num - 1].len = buf_sz; +- drv_set_sgl_sge_pri(sgl, num - 1, &hisi_sgl->sge_entries[num - 1]); ++ sgl->sge[num - 1].priv = &hisi_sgl->sge_entries[num - 1]; + + return WD_SUCCESS; + } + + /* 'num' starts from 1 */ + static void qm_hw_sgl_sge_uninit(struct wd_sgl *sgl, struct hisi_sgl *hisi_sgl, +- int num, struct wd_mm_br *br, __u32 buf_sz) ++ int num, struct wd_mm_br *br) + { + void *buf; ++ __u32 buf_sz = sgl->sge[num - 1].data_len; + + buf = wd_get_sge_buf(sgl, num); + if (!buf) +@@ -90,24 +91,17 @@ static void qm_hw_sgl_sge_uninit(struct wd_sgl *sgl, struct hisi_sgl *hisi_sgl, + + static int qm_hw_sgl_init(void *pool, struct wd_sgl *sgl) + { +- int buf_num = wd_get_sgl_buf_num(sgl); +- int sge_num = wd_get_sgl_sge_num(sgl); ++ int buf_num = sgl->buf_num; ++ int sge_num = sgl->sge_num; + struct hisi_sgl *hisi_sgl; + struct wd_mm_br *br; + int i, j, ret; +- __u32 buf_sz; + + if (!pool || buf_num < 0 || sge_num < 0) { + WD_ERR("hw_sgl_init init param err!\n"); + return -WD_EINVAL; + } + +- ret = wd_get_sgl_bufsize(sgl, &buf_sz); +- if (ret) { +- WD_ERR("failed to get sgl bufsize!\n"); +- return ret; +- } +- + buf_num = MIN(buf_num, HISI_SGL_SGE_NUM_MAX); + sge_num = MIN(sge_num, HISI_SGL_SGE_NUM_MAX); + br = drv_get_br(pool); +@@ -123,7 +117,7 @@ static int qm_hw_sgl_init(void *pool, struct wd_sgl *sgl) + hisi_sgl->next_dma = 0; + + for (i = 0; i < buf_num; i++) { +- ret = qm_hw_sgl_sge_init(sgl, hisi_sgl, br, i + 1, buf_sz); ++ ret = qm_hw_sgl_sge_init(sgl, hisi_sgl, br, i + 1); + if (ret) { + WD_ERR("failed to map buf, ret = %d.\n", ret); + goto sgl_sge_init_err; +@@ -136,12 +130,12 @@ static int qm_hw_sgl_init(void *pool, struct wd_sgl *sgl) + drv_set_sgl_sge_pri(sgl, i, &hisi_sgl->sge_entries[i]); + } + +- drv_set_sgl_pri(sgl, hisi_sgl); ++ sgl->priv = hisi_sgl; + return WD_SUCCESS; + + sgl_sge_init_err: + for (j = i - 1; j >= 0; j--) +- qm_hw_sgl_sge_uninit(sgl, hisi_sgl, j + 1, br, buf_sz); ++ qm_hw_sgl_sge_uninit(sgl, hisi_sgl, j + 1, br); + + br->free(br->usr, hisi_sgl); + +@@ -609,10 +603,15 @@ int qm_send(struct wd_queue *q, void **req, __u32 num) + int ret; + __u32 i; + +- wd_fair_lock(&info->sd_lock); ++ if (!q->capa.priv.is_single_thread) { ++ wd_fair_lock(&info->sd_lock); ++ } ++ + if (unlikely((__u32)__atomic_load_n(&info->used, __ATOMIC_RELAXED) > +- info->sq_depth - num - 1)) { +- wd_fair_unlock(&info->sd_lock); ++ info->sq_depth - num - 1)) { ++ if (!q->capa.priv.is_single_thread) ++ wd_fair_unlock(&info->sd_lock); ++ + WD_ERR("queue is full!\n"); + return -WD_EBUSY; + } +@@ -621,7 +620,9 @@ int qm_send(struct wd_queue *q, void **req, __u32 num) + ret = info->sqe_fill[qinfo->atype](req[i], qinfo->priv, + info->sq_tail_index); + if (unlikely(ret != WD_SUCCESS)) { +- wd_fair_unlock(&info->sd_lock); ++ if (!q->capa.priv.is_single_thread) ++ wd_fair_unlock(&info->sd_lock); ++ + WD_ERR("sqe fill error, ret %d!\n", ret); + return -WD_EINVAL; + } +@@ -633,7 +634,8 @@ int qm_send(struct wd_queue *q, void **req, __u32 num) + } + + ret = qm_tx_update(info, num); +- wd_fair_unlock(&info->sd_lock); ++ if (!q->capa.priv.is_single_thread) ++ wd_fair_unlock(&info->sd_lock); + + return ret; + } +@@ -709,7 +711,9 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) + if (unlikely(ret)) + return ret; + +- wd_fair_lock(&info->rc_lock); ++ if (!q->capa.priv.is_single_thread) ++ wd_fair_lock(&info->rc_lock); ++ + for (i = 0; i < num; i++) { + cqe = info->cq_base + info->cq_head_index * sizeof(struct cqe); + if (info->cqc_phase != CQE_PHASE(cqe)) +@@ -718,7 +722,9 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) + mb(); /* make sure the data is all in memory before read */ + sq_head = CQE_SQ_HEAD_INDEX(cqe); + if (unlikely(sq_head >= info->sq_depth)) { +- wd_fair_unlock(&info->rc_lock); ++ if (!q->capa.priv.is_single_thread) ++ wd_fair_unlock(&info->rc_lock); ++ + WD_ERR("CQE_SQ_HEAD_INDEX(%u) error\n", sq_head); + return -WD_EIO; + } +@@ -730,7 +736,9 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) + if (!ret) { + break; + } else if (ret < 0) { +- wd_fair_unlock(&info->rc_lock); ++ if (!q->capa.priv.is_single_thread) ++ wd_fair_unlock(&info->rc_lock); + - hisi_sgl->sge_entries[num - 1].buf = (uintptr_t)br->iova_map(br->usr, - buf, buf_sz); - if (!hisi_sgl->sge_entries[num - 1].buf) { + WD_ERR("recv sqe error %u\n", sq_head); + return ret; + } +@@ -751,7 +759,8 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) + ret = i; + } + +- wd_fair_unlock(&info->rc_lock); ++ if (!q->capa.priv.is_single_thread) ++ wd_fair_unlock(&info->rc_lock); + + return ret; + } diff --git a/uadk/v1/libwd.map b/uadk/v1/libwd.map index d53201b..a43a884 100644 --- a/uadk/v1/libwd.map @@ -37,11 +196,116 @@ index d53201b..a43a884 100644 wd_sglpool_destroy; wd_alloc_sgl; wd_free_sgl; +diff --git a/uadk/v1/wd.h b/uadk/v1/wd.h +index 4618a8c..51a2061 100644 +--- a/uadk/v1/wd.h ++++ b/uadk/v1/wd.h +@@ -80,6 +80,7 @@ struct wcrypto_paras { + */ + __u8 direction; + __u8 is_poll; ++ __u8 is_single_thread; + + /* to be extended */ + }; +diff --git a/uadk/v1/wd_comp.c b/uadk/v1/wd_comp.c +index 169f1b4..87393ae 100644 +--- a/uadk/v1/wd_comp.c ++++ b/uadk/v1/wd_comp.c +@@ -253,16 +253,22 @@ int wcrypto_do_comp(void *ctx, struct wcrypto_comp_op_data *opdata, void *tag) + struct wcrypto_comp_ctx *cctx = ctx; + struct wcrypto_comp_msg *msg, *resp; + __u64 recv_count = 0; +- int ret; ++ int ret = 0; + + if (unlikely(!ctx || !opdata || !opdata->in || !opdata->out)) { + WD_ERR("invalid: comp input parameter err!\n"); + return -EINVAL; + } + +- ret = wd_get_cookies(&cctx->pool, (void **)&cookie, 1); +- if (ret) +- return ret; ++ if (!opdata->cookie || !cctx->q->capa.priv.is_single_thread) { ++ ret = wd_get_cookies(&cctx->pool, (void **)&cookie, 1); ++ if (ret) ++ return ret; ++ ++ opdata->cookie = cookie; ++ } else { ++ cookie = opdata->cookie; ++ } + + msg = &cookie->msg; + if (tag) { +@@ -314,6 +320,7 @@ int wcrypto_do_comp(void *ctx, struct wcrypto_comp_op_data *opdata, void *tag) + + err_put_cookie: + wd_put_cookies(&cctx->pool, (void **)&cookie, 1); ++ opdata->cookie = NULL; + return ret; + } + +@@ -355,7 +362,9 @@ int wcrypto_comp_poll(struct wd_queue *q, unsigned int num) + tag = (void *)(uintptr_t)resp->udata; + ctx = tag->wcrypto_tag.ctx; + ctx->cb(resp, tag->wcrypto_tag.tag); +- wd_put_cookies(&ctx->pool, (void **)&tag, 1); ++ if (!q->capa.priv.is_single_thread) ++ wd_put_cookies(&ctx->pool, (void **)&tag, 1); ++ + resp = NULL; + } while (--tmp); + +diff --git a/uadk/v1/wd_comp.h b/uadk/v1/wd_comp.h +index 4c84ea3..8c0d9e5 100644 +--- a/uadk/v1/wd_comp.h ++++ b/uadk/v1/wd_comp.h +@@ -168,6 +168,7 @@ struct wcrypto_comp_op_data { + __u32 isize; + __u32 checksum; + void *priv; ++ void *cookie; + }; + + struct wcrypto_comp_msg { diff --git a/uadk/v1/wd_sgl.c b/uadk/v1/wd_sgl.c -index cb3b8ee..51dbe9c 100644 +index cb3b8ee..fc1f329 100644 --- a/uadk/v1/wd_sgl.c +++ b/uadk/v1/wd_sgl.c -@@ -167,6 +167,7 @@ static int sgl_chain_build(struct wd_queue *q, struct wd_sglpool *pool) +@@ -44,30 +44,6 @@ + #define ALIGN_SIZE_MAX 0x800 + #define ALIGN_SIZE 64 + +-struct wd_sge { +- /* 'priv' is used by driver, which may be a hardware sgl address */ +- void *priv; +- void *buf; +- __u32 data_len; +- __u32 flag; +- void *sgl; +-}; +- +-struct wd_sgl { +- /* 'priv' is hardware sgl address */ +- void *priv; +- __u8 sge_num; +- __u8 buf_num; +- __u16 buf_sum; +- __u32 sum_data_bytes; +- +- struct wd_sglpool *pool; +- struct wd_sgl *next; +- +- /* user configuration, 60 sges max */ +- struct wd_sge sge[]; +-}; +- + struct wd_sglpool { + struct wd_queue *q; + struct wd_lock sgl_lock; +@@ -167,6 +143,7 @@ static int sgl_chain_build(struct wd_queue *q, struct wd_sglpool *pool) goto alloc_buf_err; } sgl_sge_init(sgl_blk[i], j, buf); @@ -49,7 +313,7 @@ index cb3b8ee..51dbe9c 100644 } ret = drv_init_sgl(q, pool, sgl_blk[i]); -@@ -1029,3 +1030,61 @@ void wd_sgl_memset(struct wd_sgl *sgl, int ch) +@@ -1029,3 +1006,67 @@ void wd_sgl_memset(struct wd_sgl *sgl, int ch) for (i = 0; i < sgl->buf_num; i++) memset(sgl->sge[i].buf, ch, sgl->pool->setup.buf_size); } @@ -57,12 +321,17 @@ index cb3b8ee..51dbe9c 100644 +struct mix_pool { + void *buf_pool; + void *usr_pool; ++ struct wd_sgl *sgl; + wd_map iova_map; +}; + +static void *wd_alloc_blk_in_build_sgl(void *pool, size_t size) +{ + struct mix_pool *mix_pool = pool; ++ ++ if (mix_pool->sgl->priv) ++ return mix_pool->sgl->priv; ++ + return wd_alloc_blk_sgl(mix_pool->buf_pool, size); +} + @@ -93,6 +362,7 @@ index cb3b8ee..51dbe9c 100644 + mix_pool.buf_pool = sgl_pool->buf_br.usr; + mix_pool.usr_pool = buf_list->usr_data; + mix_pool.iova_map = usr_map; ++ mix_pool.sgl = sgl; + sgl_pool->buf_br.iova_map = wd_iova_map_in_build_sgl; + sgl_pool->buf_br.alloc = wd_alloc_blk_in_build_sgl; + sgl_pool->buf_br.usr = &mix_pool; @@ -112,19 +382,10 @@ index cb3b8ee..51dbe9c 100644 + return drv_uninit_sgl(q, pool, sgl); +} diff --git a/uadk/v1/wd_sgl.h b/uadk/v1/wd_sgl.h -index e2e82f2..a23b665 100644 +index e2e82f2..44cbfb3 100644 --- a/uadk/v1/wd_sgl.h +++ b/uadk/v1/wd_sgl.h -@@ -25,6 +25,8 @@ - extern "C" { - #endif - -+#define SGL_MAX_BUF_NUM 60U -+ - struct wd_sgl; - struct wd_sglpool_setup { - /* Total number of SGEs with buffer slices */ -@@ -43,7 +45,21 @@ struct wd_sglpool_setup { +@@ -43,7 +45,45 @@ struct wd_sglpool_setup { struct wd_mm_br br; }; @@ -139,6 +400,30 @@ index e2e82f2..a23b665 100644 + struct wd_buf *buf; + void *usr_data; +}; ++ ++struct wd_sge { ++ /* 'priv' is used by driver, which may be a hardware sgl address */ ++ void *priv; ++ void *buf; ++ __u32 data_len; ++ __u32 flag; ++ void *sgl; ++}; ++struct wd_sglpool; ++struct wd_sgl { ++ /* 'priv' is hardware sgl address */ ++ void *priv; ++ __u8 sge_num; ++ __u8 buf_num; ++ __u16 buf_sum; ++ __u32 sum_data_bytes; ++ ++ struct wd_sglpool *pool; ++ struct wd_sgl *next; ++ ++ /* user configuration, 60 sges max */ ++ struct wd_sge sge[]; ++}; + void *wd_sglpool_create(struct wd_queue *q, struct wd_sglpool_setup *setup); +int wd_build_sgl(struct wd_queue *q, void *pool, struct wd_sgl *sgl, const struct wd_buf_list *buf_list, wd_map usr_map); -- Gitee From 50ea447ac534e45a82de5e701a5f576333cb1c31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E5=87=A4?= Date: Wed, 16 Jul 2025 07:55:02 +0000 Subject: [PATCH 4/6] feat: kaezip async support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 白凤 --- KAEZlib/src/kaezip_async_adapter.c | 287 +++++++++++++ KAEZlib/src/kaezip_async_adapter.h | 86 ++++ KAEZlib/src/v1/kaezip_async_comp.c | 662 +++++++++++++++++++++++++++++ KAEZlib/src/v1/kaezip_async_comp.h | 111 +++++ KAEZlib/src/v1/wd_queue_memory.c | 139 ++++-- KAEZlib/src/v1/wd_queue_memory.h | 29 +- 6 files changed, 1266 insertions(+), 48 deletions(-) create mode 100644 KAEZlib/src/kaezip_async_adapter.c create mode 100644 KAEZlib/src/kaezip_async_adapter.h create mode 100644 KAEZlib/src/v1/kaezip_async_comp.c create mode 100644 KAEZlib/src/v1/kaezip_async_comp.h diff --git a/KAEZlib/src/kaezip_async_adapter.c b/KAEZlib/src/kaezip_async_adapter.c new file mode 100644 index 0000000..78b9f16 --- /dev/null +++ b/KAEZlib/src/kaezip_async_adapter.c @@ -0,0 +1,287 @@ +/* + * @Copyright: Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * @Description: kaezlib adapter for sva(v2) and nosva(v1) + * @Author: LiuYongYang + * @Date: 2024-02-22 + * @LastEditTime: 2024-02-26 + */ + +#include +#include +#include +#include "kaezlib_common.h" +#include "kaezip_ctx.h" +#include "kaezip.h" +#include "kaezip_utils.h" +#include "kaezip_async_adapter.h" +#include "kaezip_log.h" +#include "uadk/wd.h" + + +static void kaezip_dequeue_process(struct kaezip_async_ctrl *ctrl, kaezip_task_queue *task_queue, int budget, int comp_optype) +{ + int cnt = 0; + // 等待任务 + while (task_queue->pi != task_queue->ci && cnt < budget) { + + // 如果要停止线程 + if (task_queue->stop) { + break; + } + if (kaezip_async_is_thread_do_comp_full(ctrl) == 1) { + break; + } + + // 获取任务 + unsigned int ci = task_queue->ci % KAEZLIB_TASK_QUEUE_DEPTH; + kaezip_async_task_t task; + + if (!atomic_load_explicit(&task_queue->tasks[ci].ready, memory_order_acquire)) { + continue; + } + task = task_queue->tasks[ci]; + atomic_store_explicit(&task_queue->tasks[ci].ready, false, memory_order_release); + // 更新 ci,复用空闲位置 + task_queue->ci++; + // 执行压缩操作 + kaezip_compress_async(ctrl, task.src, task.dst, task.callback, task.result, + task.data_format, comp_optype); + cnt++; + } + return; +} + +static int kaezip_task_queue_init(kaezip_task_queue *task_queue, int index, task_queue_process_fn func) +{ + task_queue->tasks = malloc(KAEZLIB_TASK_QUEUE_DEPTH * sizeof(kaezip_async_task_t)); + if (task_queue->tasks == NULL) { + return KAE_ZLIB_ALLOC_FAIL; + } + task_queue->pi = 0; + task_queue->ci = 0; + task_queue->stop = 0; + task_queue->index = index; + for (int i = 0; i < KAEZLIB_TASK_QUEUE_DEPTH; i++) { + atomic_store_explicit(&task_queue->tasks[i].ready, false, memory_order_release); + } + pthread_mutex_init(&task_queue->mutex, NULL); + pthread_cond_init(&task_queue->cond, NULL); + + if (!func) { + task_queue->is_polling = TRUE; + return KAE_ZLIB_SUCC; + } + + task_queue->is_polling = FALSE; + if (pthread_create(&task_queue->worker_thread, NULL, func, task_queue) != 0) { + US_ERR("Error: Failed to create compression worker thread"); + pthread_mutex_destroy(&task_queue->mutex); + free(task_queue->tasks); + task_queue->tasks = NULL; + return KAE_ZLIB_INIT_FAIL; + } + return KAE_ZLIB_SUCC; +} + +static void kaezip_task_queue_free(kaezip_task_queue *task_queue) +{ + + if (!task_queue->is_polling) { + pthread_mutex_lock(&task_queue->mutex); + task_queue->stop = 1; + pthread_cond_signal(&task_queue->cond); + pthread_mutex_unlock(&task_queue->mutex); + while (task_queue->stop) { + pthread_cond_signal(&task_queue->cond); + } + + pthread_join(task_queue->worker_thread, NULL); + } + + pthread_mutex_destroy(&task_queue->mutex); + + free(task_queue->tasks); + task_queue->tasks = NULL; +} + +static inline int kaezip_enqueue(kaezip_task_queue *task_queue, kaezip_async_task_t *task) +{ + uint32_t pos = atomic_fetch_add(&task_queue->pi, 1); + kaezip_async_task_t *cell = &task_queue->tasks[pos % KAEZLIB_TASK_QUEUE_DEPTH]; + while (atomic_load_explicit(&cell->ready, memory_order_acquire)); // 等待槽空 + *cell = *task; + atomic_store_explicit(&cell->ready, true, memory_order_release); + pthread_cond_signal(&task_queue->cond); + return 0; +} + +static int kaezip_check_param_valid(const struct kaezip_buffer_list *src, struct kaezip_buffer_list *dst, + kaezip_async_callback callback, struct kaezip_result *result) +{ + if (unlikely(src == NULL || dst == NULL || callback == NULL || result == NULL)) { + return KAE_ZLIB_INVAL_PARA; + } + result->src_size = 0; + for (unsigned int i = 0; i < src->buf_num; i++) { + if (unlikely(src->buf[i].data == NULL || src->buf[i].buf_len == 0 || src->buf[i].buf_len > REQ_BUFFER_SIZE)) { + return KAE_ZLIB_INVAL_PARA; + } + result->src_size += src->buf[i].buf_len; + } + + for (unsigned int i = 0; i < dst->buf_num; i++) { + if (unlikely(dst->buf[i].data == NULL || dst->buf[i].buf_len == 0 || dst->buf[i].buf_len > REQ_BUFFER_SIZE)) { + return KAE_ZLIB_INVAL_PARA; + } + result->dst_len += dst->buf[i].buf_len; + } + + if (unlikely(src->buf_num > REQ_BUFFER_MAX || dst->buf_num > REQ_BUFFER_MAX)) { + return KAE_ZLIB_INVAL_PARA; + } + + return KAE_ZLIB_SUCC; +} + +static int kaezip_check_session_valid(kaezip_session *sess, int comp_optype) +{ + if (sess == NULL) { + return KAE_ZLIB_INVAL_PARA; + } + if (sess->comp_optype != comp_optype) { + return KAE_ZLIB_INVAL_PARA; + } + return KAE_ZLIB_SUCC; +} + +static int kaezip_async_do_comp_in_session(kaezip_session *sess, const struct kaezip_buffer_list *src, struct kaezip_buffer_list *dst, + kaezip_async_callback callback, struct kaezip_result *result, + enum kaezip_async_data_format data_format, int comp_optype) +{ + kaezip_task_queue *task_queue = &sess->task_queue; + kaezip_async_task_t task = {0}; + task.src = src; + task.dst = dst; + task.callback = callback; + task.result = result; + task.data_format = data_format; + + if (task_queue->pi != task_queue->ci && !kaezip_async_is_thread_do_comp_full(sess->ctrl)) { + kaezip_dequeue_process(sess->ctrl, task_queue, ASYNC_DEQUEUE_PROCESS_DEFAULT_BUDGET, comp_optype); + } + + if (task_queue->pi != task_queue->ci || kaezip_async_is_thread_do_comp_full(sess->ctrl)) { + return kaezip_enqueue(task_queue, &task); + } else { + return kaezip_compress_async(sess->ctrl, task.src, task.dst, task.callback, task.result, + task.data_format, comp_optype); + } +} + + +int KAEZIP_compress_async_in_session(void *sess, const struct kaezip_buffer_list *src, struct kaezip_buffer_list *dst, + kaezip_async_callback callback, struct kaezip_result *result) +{ + if (unlikely(kaezip_check_session_valid(sess, WCRYPTO_DEFLATE) != KAE_ZLIB_SUCC || kaezip_check_param_valid(src, dst, callback, result) != KAE_ZLIB_SUCC)) { + return KAE_ZLIB_INVAL_PARA; + } + + return kaezip_async_do_comp_in_session(sess, src, dst, callback, result, KAEZIP_ASYNC_BLOCK, WCRYPTO_DEFLATE); +} + +void KAEZIP_compress_async_polling_in_session(void *sess, int budget) +{ + struct kaezip_async_ctrl *ctrl = NULL; + kaezip_task_queue *task_queue = NULL; + int ret = 1; + int cnt = 0; + + if (kaezip_check_session_valid(sess, WCRYPTO_DEFLATE) != KAE_ZLIB_SUCC && + kaezip_check_session_valid(sess, WCRYPTO_INFLATE) != KAE_ZLIB_SUCC) + return; + + ctrl = ((kaezip_session *)sess)->ctrl; + task_queue = &((kaezip_session *)sess)->task_queue; + + while (ret > 0 && cnt < budget) { + ret = kaezip_async_compress_polling(ctrl, ASYNC_POLLING_DEFAULT_BUDGET); + if (!kaezip_async_is_thread_do_comp_full(ctrl)) { + kaezip_dequeue_process(ctrl, task_queue, ASYNC_DEQUEUE_PROCESS_DEFAULT_BUDGET, ((kaezip_session *)sess)->comp_optype); + } + cnt += ret; + } +} + +void *KAEZIP_create_async_compress_session(iova_map_fn usr_map) +{ + kaezip_session *sess = (kaezip_session *)kae_malloc(sizeof(kaezip_session)); + int ret = 0; + + if (!sess) + return NULL; + + sess->usr_map = usr_map; + sess->comp_optype = WCRYPTO_DEFLATE; + ret = kaezip_task_queue_init(&sess->task_queue, 0, NULL); + if (ret != 0) { + free(sess); + return NULL; + } + ret = kaezip_async_instances_init(&sess->ctrl, usr_map, sess->comp_optype); + if (ret != 0) { + kaezip_task_queue_free(&sess->task_queue); + free(sess); + return NULL; + } + + return sess; +} + +void KAEZIP_destroy_async_compress_session(void *sess) +{ + if (sess) { + kaezip_async_instances_deinit(((kaezip_session *)sess)->ctrl); + kaezip_task_queue_free(&((kaezip_session *)sess)->task_queue); + free(sess); + } +} + +void *KAEZIP_create_async_decompress_session(iova_map_fn usr_map) +{ + kaezip_session *sess = (kaezip_session *)kae_malloc(sizeof(kaezip_session)); + int ret = 0; + + if (!sess) + return NULL; + + sess->usr_map = usr_map; + sess->comp_optype = WCRYPTO_INFLATE; + ret = kaezip_task_queue_init(&sess->task_queue, 0, NULL); + if (ret != 0) { + free(sess); + return NULL; + } + ret = kaezip_async_instances_init(&sess->ctrl, usr_map, sess->comp_optype); + if (ret != 0) { + kaezip_task_queue_free(&sess->task_queue); + free(sess); + return NULL; + } + + return sess; +} + +void KAEZIP_destroy_async_decompress_session(void *sess) +{ + KAEZIP_destroy_async_compress_session(sess); +} + +int KAEZIP_decompress_async_in_session(void *sess, const struct kaezip_buffer_list *src, struct kaezip_buffer_list *dst, + kaezip_async_callback callback, struct kaezip_result *result) +{ + if (unlikely(kaezip_check_session_valid(sess, WCRYPTO_INFLATE) != KAE_ZLIB_SUCC || kaezip_check_param_valid(src, dst, callback, result) != KAE_ZLIB_SUCC)) { + return KAE_ZLIB_INVAL_PARA; + } + + return kaezip_async_do_comp_in_session(sess, src, dst, callback, result, KAEZIP_ASYNC_BLOCK, WCRYPTO_INFLATE); +} diff --git a/KAEZlib/src/kaezip_async_adapter.h b/KAEZlib/src/kaezip_async_adapter.h new file mode 100644 index 0000000..f0e4f1e --- /dev/null +++ b/KAEZlib/src/kaezip_async_adapter.h @@ -0,0 +1,86 @@ +/* + * @Copyright: Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * @Description: kaezlib adapter for sva(v2) and nosva(v1) header file + * @Author: LiuYongYang + * @Date: 2024-02-22 + * @LastEditTime: 2024-02-22 + */ + +#ifndef KAEZIP_ASYNC_ADAPTER_H +#define KAEZIP_ASYNC_ADAPTER_H +#include +#include +#include "kaezlib_common.h" +#include "kaezip_adapter.h" +#include "kaezip.h" + +enum { + HW_NONE, + HW_V1, + HW_V2, // unused now + HW_V3 +}; + +#define MAX_TASK_NUM 32 +#define KAEZLIB_TASK_THREAD_NUM 12 +#define KAEZLIB_TASK_QUEUE_DEPTH 1024 +#define ENQUEUE_TIME_OUT_US 1000000 +#define ASYNC_DEQUEUE_PROCESS_DEFAULT_BUDGET 1 +#define ASYNC_POLLING_DEFAULT_BUDGET 1 + +typedef struct { + const struct kaezip_buffer_list *src; + struct kaezip_buffer_list *dst; + kaezip_async_callback callback; + struct kaezip_result *result; + enum kaezip_async_data_format data_format; + atomic_bool ready; +} kaezip_async_task_t; + +typedef struct { + kaezip_async_task_t *tasks; + atomic_uint pi; // pi + volatile unsigned int ci; // ci + pthread_mutex_t mutex; // 保护tasks资源的多线程互斥锁 + pthread_cond_t cond; + pthread_t worker_thread; + volatile int stop; // 用于停止线程的标志 + int index; + int is_polling; +} kaezip_task_queue; + +typedef struct { + kaezip_task_queue task_queue[MAX_TASK_NUM]; + kaezip_task_queue decompress_queue[MAX_TASK_NUM]; + iova_map_fn usr_map; + unsigned int num; + unsigned int decompress_queue_num; + volatile int init; +} kaezip_task_queues; + +struct kaezip_async_ctrl; +typedef struct { + kaezip_task_queue task_queue; + iova_map_fn usr_map; + struct kaezip_async_ctrl *ctrl; + int comp_optype; +} kaezip_session; + +typedef void *(*task_queue_process_fn)(void *); + + +void *kaezip_init_v1(int win_size, int is_sgl, int comp_optype); + +int kaezip_get_win_size(void); + +int kaezip_compress_async(struct kaezip_async_ctrl *ctrl, const struct kaezip_buffer_list *src, struct kaezip_buffer_list *dst, + kaezip_async_callback callback, struct kaezip_result *result, + enum kaezip_async_data_format data_format, int comp_optype); +int kaezip_async_compress_polling(struct kaezip_async_ctrl *ctrl, int budget); + +int kaezip_async_is_thread_do_comp_full(struct kaezip_async_ctrl *ctrl); + +int kaezip_async_instances_init(struct kaezip_async_ctrl **ctrl, iova_map_fn usr_map, int comp_optype); +void kaezip_async_instances_deinit(struct kaezip_async_ctrl *ctrl); + +#endif \ No newline at end of file diff --git a/KAEZlib/src/v1/kaezip_async_comp.c b/KAEZlib/src/v1/kaezip_async_comp.c new file mode 100644 index 0000000..7ec878a --- /dev/null +++ b/KAEZlib/src/v1/kaezip_async_comp.c @@ -0,0 +1,662 @@ +/* + * @Copyright: Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * @Description: kaezlib nosva compress + * @Author: LiuYongYang + * @Date: 2024-02-26 + * @LastEditTime: 2024-03-28 + */ +#include "kaezip_ctx.h" +#include "kaezip_async_comp.h" +#include "kaezip_log.h" +#include "kaezip_init.h" + + +#define PREFL1_64B(ptr) __builtin_prefetch((ptr), 0, 0) +#define PREFL2_64B(ptr) __builtin_prefetch((ptr), 0, 2) +#define PREFL1L2_256B(l1ptr, l2ptr) do { \ + PREFL1_64B((l1ptr) + 0 * 64); \ + PREFL2_64B((l2ptr) + 0 * 64); \ + PREFL1_64B((l1ptr) + 1 * 64); \ + PREFL2_64B((l2ptr) + 1 * 64); \ + PREFL1_64B((l1ptr) + 2 * 64); \ + PREFL2_64B((l2ptr) + 2 * 64); \ + PREFL1_64B((l1ptr) + 3 * 64); \ + PREFL2_64B((l2ptr) + 3 * 64); \ +} while (0) + +#define CRC32D_64B(crc, ptr) do { \ + (crc) = __crc32d((crc), *(const uint64_t *)((ptr) + 8 * 0)); \ + (crc) = __crc32d((crc), *(const uint64_t *)((ptr) + 8 * 1)); \ + (crc) = __crc32d((crc), *(const uint64_t *)((ptr) + 8 * 2)); \ + (crc) = __crc32d((crc), *(const uint64_t *)((ptr) + 8 * 3)); \ + (crc) = __crc32d((crc), *(const uint64_t *)((ptr) + 8 * 4)); \ + (crc) = __crc32d((crc), *(const uint64_t *)((ptr) + 8 * 5)); \ + (crc) = __crc32d((crc), *(const uint64_t *)((ptr) + 8 * 6)); \ + (crc) = __crc32d((crc), *(const uint64_t *)((ptr) + 8 * 7)); \ +} while (0) +#define CRC32D_64B_X4(crc, ptr) do { \ + CRC32D_64B((crc), (ptr) + 0 * 64); \ + CRC32D_64B((crc), (ptr) + 1 * 64); \ + CRC32D_64B((crc), (ptr) + 2 * 64); \ + CRC32D_64B((crc), (ptr) + 3 * 64); \ +} while (0) + +static const uint32_t table0_[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +#define PLATFORM_IS_LITTLE_ENDIAN (__BYTE_ORDER == __LITTLE_ENDIAN) + +static inline uint32_t DecodeFixed32(const char* ptr) +{ + if (PLATFORM_IS_LITTLE_ENDIAN) { + uint32_t result; + memcpy(&result, ptr, sizeof(result)); + return result; + } else { + return ((uint32_t)(ptr[0]) | ((uint32_t)(ptr[1]) << 8) | ((uint32_t)(ptr[2]) << 16) | ((uint32_t)(ptr[3]) << 24)); + } +} + +static inline uint64_t DecodeFixed64(char* ptr) +{ + if (PLATFORM_IS_LITTLE_ENDIAN) { + uint64_t result; + memcpy(&result, ptr, sizeof(result)); + return result; + } else { + uint64_t lo = DecodeFixed32(ptr); + uint64_t hi = DecodeFixed32(ptr + 4); + return (hi << 32) | lo; + } +} + +static inline uint64_t LE_LOAD64(uint8_t* p) +{ + return DecodeFixed64((char*)(p)); +} + +static inline void Slow_CRC32(uint64_t* l, uint8_t** p) +{ + *l = __crc32d(*l, LE_LOAD64(*p)); + *p += 8; +} + +// CRC32 API接口函数 +static uint32_t KAEZIPCRC32(uint32_t crc, const char *data, uint64_t len) +{ + if (data == NULL) { + return crc; + } + + uint64_t crcResult = crc ^ 0xffffffffu; + uint8_t *targetPtr = (uint8_t *)data; + + #define STEP1 \ + do { \ + int c = (crcResult & 0xff) ^ *targetPtr++; \ + crcResult = table0_[c] ^ (crcResult >> 8); \ + } while (0) + #define ALIGN(n, m) ((n + ((1 << m) - 1)) & ~((1 << m) - 1)) + + uint8_t *targetPtrAlign = (uint8_t *)ALIGN((uintptr_t)targetPtr, 4); + + while (targetPtr != targetPtrAlign && len > 0) { + STEP1; + len -= 1; + } + + while (len >= 256) { // 每次计算256B + PREFL1L2_256B(targetPtr + 704, targetPtr + 1984); + CRC32D_64B_X4(crcResult, targetPtr); + targetPtr += 256; + len -= 256; + } + + while (len >= 16) { // 每次计算16B + Slow_CRC32(&crcResult, &targetPtr); + Slow_CRC32(&crcResult, &targetPtr); + len -= 16; + } + + while (len >= 8) { // 每次计算8B + Slow_CRC32(&crcResult, &targetPtr); + len -= 8; + } + + while (len >= 1) { // 每次计算1B + STEP1; + len -= 1; + } + return crcResult ^ 0xffffffffu; +} + +static void kaezip_compress_async_callback(struct kaezip_compress_ctx *compress_ctx, int status) +{ + struct kaezip_result *result = compress_ctx->result; + result->status = status; + result->dst_len = compress_ctx->dst_len; + if (result->ibuf_crc != NULL && status == KAE_ZLIB_SUCC) { + for (int i = 0; i < compress_ctx->src->buf_num; i++) { + *result->ibuf_crc = KAEZIPCRC32(*result->ibuf_crc, compress_ctx->src->buf[i].data, + compress_ctx->src->buf[i].buf_len); + } + } + + if (result->obuf_crc != NULL && status == KAE_ZLIB_SUCC) { + *result->obuf_crc = KAEZIPCRC32(*result->obuf_crc, compress_ctx->dst->buf[0].data, compress_ctx->dst_len); + } + + if (unlikely(status != KAE_ZLIB_SUCC)) { + US_ERR("kae async compress fail! ret = %d\n", status); + } + + compress_ctx->callback(compress_ctx->result); +} + +static void kaezip_async_compress_cb(int status, void *param) +{ + struct kaezip_async_req* req = param; + kaezip_ctx_t* kz_ctx = req->kz_ctx; + struct wcrypto_comp_op_data *op_data = &kz_ctx->op_data; + + kaezip_set_comp_status(kz_ctx); + if (kz_ctx->status == KAEZIP_COMP_VERIFY_ERR) { + US_ERR("kaezip_async_compress_cb status %d !\n", status); + req->compress_ctx->status = KAE_ZLIB_COMP_FAIL; + req->done = 1; + return; + } + + if (op_data->stream_pos == WCRYPTO_COMP_STREAM_NEW) { + op_data->stream_pos = WCRYPTO_COMP_STREAM_OLD; + } + req->done = 1; +} + +static void kaezip_fill_sgl_buffer(kaezip_ctx_t *kz_ctx, const struct wd_buf_list *src, struct wd_buf_list *dst) +{ + struct wcrypto_comp_op_data *op_data = &kz_ctx->op_data; + + op_data->in_len = 0; + kz_ctx->src_sgl = kz_ctx->src_sgl_buf; + wd_build_sgl(kz_ctx->q_node->kae_wd_queue, kz_ctx->q_node->kae_queue_mem_pool, kz_ctx->src_sgl, src, + (wd_map)kz_ctx->usr_map); + + if (dst->buf_num) { + kz_ctx->dst_sgl_usr = kz_ctx->dst_sgl_buf; + wd_build_sgl(kz_ctx->q_node->kae_wd_queue, kz_ctx->q_node->kae_queue_mem_pool, kz_ctx->dst_sgl_usr, dst, + (wd_map)kz_ctx->usr_map); + } + op_data->in_len += kz_ctx->do_comp_len; + op_data->avail_out = kz_ctx->avail_out; + op_data->flush = kz_ctx->flush; + op_data->alg_type = kz_ctx->comp_alg_type; + op_data->stream_pos = WCRYPTO_COMP_STREAM_NEW; +} + +static void kaezip_fill_flat_buffer(kaezip_ctx_t *kz_ctx, const struct wd_buf_list *src) +{ + struct wcrypto_comp_op_data *op_data = &kz_ctx->op_data; + + op_data->in_len = 0; + size_t offset = 0; + for (int i = 0; i < src->buf_num; i++) { + ZIP_wildCopy16((uint8_t *)op_data->in + offset, src->buf[i].data, (uint8_t *)op_data->in + offset + src->buf[i].buf_len); + offset += src->buf[i].buf_len; + } + op_data->in_len += kz_ctx->do_comp_len; + op_data->avail_out = KAEZIP_STREAM_CHUNK_OUT; + op_data->flush = kz_ctx->flush; + op_data->alg_type = kz_ctx->comp_alg_type; + op_data->stream_pos = WCRYPTO_COMP_STREAM_NEW; +} + +static int kaezip_compress_async_impl(kaezip_ctx_t* kz_ctx, const struct wd_buf_list *src, struct wd_buf_list *dst, size_t srcSize, size_t dst_len, void *usr_data) +{ + if (kz_ctx == NULL || src == NULL || srcSize == 0) { + US_ERR("compress parameter invalid\n"); + return KAE_ZLIB_INVAL_PARA; + } + + US_INFO("kaezlib compress srcSize : %lu", srcSize); + kz_ctx->in = (void*)src; + kz_ctx->in_len = srcSize; + kz_ctx->out = NULL; + kz_ctx->consumed = 0; + kz_ctx->produced = 0; + kz_ctx->avail_out = dst_len; + kz_ctx->flush = WCRYPTO_FINISH; + kz_ctx->do_comp_len = kz_ctx->in_len; + kz_ctx->callback = kaezip_async_compress_cb; + kz_ctx->param = usr_data; + + if (kz_ctx->q_node->is_sgl) + kaezip_fill_sgl_buffer(kz_ctx, src, dst); + else + kaezip_fill_flat_buffer(kz_ctx, src); + + return wcrypto_do_comp(kz_ctx->wd_ctx, &kz_ctx->op_data, kz_ctx); // async +} + +static void kaezip_find_and_free_kz_ctx(struct kaezip_async_ctrl *ctrl, kaezip_ctx_t *kz_ctx) +{ + for (int i = 0; i < MAX_NUM_IN_COMP; i++) { + if (ctrl->kz_ctx[i] == kz_ctx) { + kaezip_free_ctx(ctrl->kz_ctx[i]); + ctrl->kz_ctx[i] = NULL; + } + } +} + + +static void kaezip_do_compress_polling(struct kaezip_async_ctrl *ctrl, struct kaezip_async_req *req) +{ + if (req->special_flag != 0) { + return; + } + + kaezip_ctx_t *kz_ctx = req->kz_ctx; + struct wd_queue *q = kz_ctx->q_node->kae_wd_queue; + + int ret = wcrypto_comp_poll(q, 1); + if (unlikely(ret < 0)) { + US_ERR("poll fail! ret = %d\n", ret); + kaezip_find_and_free_kz_ctx(ctrl, kz_ctx); + req->compress_ctx->status = KAE_ZLIB_COMP_FAIL; + req->done = 1; + } + return; +} + +int kaezip_async_is_thread_do_comp_full(struct kaezip_async_ctrl *ctrl) +{ + return ctrl->cur_num_in_comp < MAX_NUM_IN_COMP ? FALSE : TRUE; +} + +void kaezip_ctx_clear(struct kaezip_async_ctrl *ctrl) +{ + for (int i = 0; i < ctrl->ctx_num; i++) { + if (ctrl->kz_ctx[i] != NULL) { + kaezip_free_ctx(ctrl->kz_ctx[i]); + ctrl->kz_ctx[i] = NULL; + } + } +} + +int kaezip_async_instances_init(struct kaezip_async_ctrl **ctrl, iova_map_fn usr_map, int comp_optype) +{ + struct kaezip_async_ctrl *new_ctrl = (struct kaezip_async_ctrl *)kae_malloc(sizeof(struct kaezip_async_ctrl)); + if (!new_ctrl) + return KAE_ZLIB_INIT_FAIL; + + memset(new_ctrl, 0, sizeof(struct kaezip_async_ctrl)); + + int is_sgl = (usr_map != NULL) ? 1 : 0; + + new_ctrl->usr_map = usr_map; + new_ctrl->is_polling = TRUE; + for (int i = 0; i < MAX_NUM_IN_COMP; i++) { + new_ctrl->kz_ctx[i] = kaezip_init_v1(kaezip_get_win_size(), is_sgl, comp_optype); + if (new_ctrl->kz_ctx[i] == NULL) { + goto free_kz_ctx; + } + new_ctrl->kz_ctx[i]->usr_map = new_ctrl->usr_map; + new_ctrl->ctx_num++; + } + + *ctrl = new_ctrl; + return KAE_ZLIB_SUCC; + +free_kz_ctx: + kaezip_ctx_clear(new_ctrl); + free(new_ctrl); + return KAE_ZLIB_INIT_FAIL; +} + +void kaezip_async_instances_deinit(struct kaezip_async_ctrl *ctrl) +{ + kaezip_ctx_clear(ctrl); + free(ctrl); +} + +int kaezip_async_compress_polling(struct kaezip_async_ctrl *ctrl, int budget) +{ + int cnt = 0; + struct kaezip_compress_ctx *compress_ctx = ctrl->ctx_head; + + if (compress_ctx == NULL) { + return 0; + } + struct kaezip_async_req *req = compress_ctx->req_list; + US_DEBUG("do polling. budget = %d", budget); + while (req && cnt < budget) { + kaezip_do_compress_polling(ctrl, req); + if (!req->done) { + return KAE_ZLIB_PROCESS_HW_BUSY; + } + + int ret = -1; + + if (likely(compress_ctx->status == KAE_ZLIB_SUCC)) { + ret = compress_ctx->kaezip_post_process_handle(req, &req->src, + compress_ctx->dst->buf[0].data + compress_ctx->dst_len, + &compress_ctx->save_info); + if (ret < 0) { + US_ERR("kaezip_post_process_handle err. ret=%d\n", ret); + } + } + + if (ret >= 0 && compress_ctx->status == KAE_ZLIB_SUCC) { + compress_ctx->dst_len += ret; + compress_ctx->status = KAE_ZLIB_SUCC; + } else { + compress_ctx->dst_len = 0; + if (compress_ctx->status == KAE_ZLIB_SUCC) { + compress_ctx->status = KAE_ZLIB_COMP_FAIL; + } + + US_ERR("kae post process fail! req index %d src size 0x%lx dst size 0x%lx last %d ret = %d status %d\n", + req->idx, req->src_size, compress_ctx->dstCapacity, req->last, ret, compress_ctx->status); + } + + if (!req->special_flag) { + ctrl->cur_num_in_comp--; + } + + ctrl->ctx_head = compress_ctx->next; + kaezip_compress_async_callback(compress_ctx, compress_ctx->status); + compress_ctx = ctrl->ctx_head; + + if (ctrl->ctx_head == NULL) { + ctrl->tail = NULL; + break; + } + req = compress_ctx->req_list; + cnt++; + } + + return cnt; +} + +static struct timespec polling_timeout_10us = { 0, 10000 }; // 10us超时 + +static kaezip_ctx_t *kaezip_async_init_ctx(struct kaezip_async_ctrl *ctrl, int comp_optype) +{ + int enter_polling = 0; + kaezip_ctx_t *kz_ctx = NULL; + + if (unlikely(ctrl->kz_ctx[ctrl->ctx_index] == NULL)) { + int is_sgl = (ctrl->usr_map != NULL) ? 1 : 0; + kz_ctx = kaezip_init_v1(kaezip_get_win_size(), is_sgl, comp_optype); + while (kz_ctx == NULL) { // 本质来说,这个初始化函数就初始化了其中的kaeConfig,其他是没有的,所以在外面要赋值 + struct timespec timeout; + if (enter_polling == 0) { + get_time_out_spec(&timeout, &polling_timeout_10us); + enter_polling = 1; + } + + // 如果发生超时则提前退出,到polling阶段再处理切软算 + if (unlikely((ctrl->stop_flag && *ctrl->stop_flag != 0) || check_time_out(&timeout))) { + return NULL; + } + + (void)kaezip_async_compress_polling(ctrl, 1); + // 如果本线程已经idle,则使用之前已经申请到的kz_ctx + if (ctrl->cur_num_in_comp == 0 && ctrl->kz_ctx[0] != NULL) { + ctrl->ctx_index = 0; + kz_ctx = ctrl->kz_ctx[ctrl->ctx_index]; + } else { + kz_ctx = kaezip_init_v1(kaezip_get_win_size(), is_sgl, comp_optype); + } + } + ctrl->kz_ctx[ctrl->ctx_index] = kz_ctx; + ctrl->kz_ctx[ctrl->ctx_index]->usr_map = ctrl->usr_map; + } else { + while (kaezip_async_is_thread_do_comp_full(ctrl)) { + (void)kaezip_async_compress_polling(ctrl, 1); + // 此分支不需要超时判断,kaezlib_async_compress_polling本身具有超时机制,如果硬件超时,会主动释放资源 + if (unlikely(ctrl->stop_flag && *ctrl->stop_flag != 0)) { + return NULL; + } + + if (ctrl->kz_ctx[ctrl->ctx_index] == NULL) { + // polling 过程可能发生超时,kz资源可能已经释放 + return NULL; + } + } + kaezip_init_ctx(ctrl->kz_ctx[ctrl->ctx_index]); + kz_ctx = ctrl->kz_ctx[ctrl->ctx_index]; + } + + ctrl->ctx_index = (ctrl->ctx_index + 1) % MAX_NUM_IN_COMP; + ctrl->cur_num_in_comp++; + return kz_ctx; +} + +static int kaezip_send_async_compress(struct kaezip_async_ctrl *ctrl, struct kaezip_async_req *req, int comp_optype) +{ + // 1.kae上下文初始化函数调用 + req->kz_ctx = kaezip_async_init_ctx(ctrl, comp_optype); + if (unlikely(req->kz_ctx == NULL)) { + US_ERR("Get kae hw ctx failed!\n"); + return KAE_ZLIB_INIT_FAIL; + } + size_t compress_size = req->src_size; + size_t dst_len = req->dst_len; + int ret = kaezip_compress_async_impl(req->kz_ctx, &req->src, &req->dst, compress_size, dst_len, (void *)req); + if (unlikely(ret != KAE_ZLIB_SUCC)) { + kaezip_find_and_free_kz_ctx(ctrl, req->kz_ctx); + ctrl->ctx_index = (ctrl->ctx_index + MAX_NUM_IN_COMP - 1) % MAX_NUM_IN_COMP; + ctrl->cur_num_in_comp--; + req->kz_ctx = NULL; + US_ERR("Send compress cmd to kae hw failed! status %d\n", ret); + return ret; + } + return ret; +} + +static void kaezip_fill_hw_req_dst_buf_list(struct kaezip_async_req *req, const struct kaezip_buffer_list *dst) +{ + unsigned int index = 0; + + req->dst.buf = req->dst_buffers; + req->dst.buf_num = 0; + req->dst.usr_data = dst->usr_data; + req->dst_len = 0; + + while (index < dst->buf_num) { + req->dst.buf[req->dst.buf_num].data = dst->buf[index].data; + req->dst.buf[req->dst.buf_num].buf_len = dst->buf[index].buf_len; + req->dst_len += req->dst.buf[req->dst.buf_num].buf_len; + index += 1; + + req->dst.buf_num++; + } +} + +static void kaezip_fill_hw_req_src_buf_list(struct kaezip_async_req *req, const struct kaezip_buffer_list *src) +{ + unsigned int index = 0; + + req->src.buf = req->buffers; + req->src.buf_num = 0; + req->src.usr_data = src->usr_data; + req->src_size = 0; + + while (index < src->buf_num) { + req->src.buf[req->src.buf_num].data = src->buf[index].data; + req->src.buf[req->src.buf_num].buf_len = src->buf[index].buf_len; + req->src_size += req->src.buf[req->src.buf_num].buf_len; + index += 1; + + req->src.buf_num++; + } +} + +static int kaezip_async_compress_process(struct kaezip_async_ctrl *ctrl, void *arg, int comp_optype) +{ + struct kaezip_compress_ctx *compress_ctx = arg; + + // 转换衔接 + size_t srcSize = compress_ctx->srcSize; + size_t remainingLength = srcSize; // 该值用于保存剩余的待压缩数据长度 + + // 针对zlib的matchlength转换定义的数据结构 + int idx = 0; + while (remainingLength) { + struct kaezip_async_req *req = &compress_ctx->req; + req->idx = idx; + req->special_flag = 0; + req->last = 0; + req->done = 0; + req->compress_ctx = compress_ctx; + req->next = NULL; + kaezip_fill_hw_req_src_buf_list(req, compress_ctx->src); + kaezip_fill_hw_req_dst_buf_list(req, compress_ctx->dst); + remainingLength -= req->src_size; + // 最后一块实际下发给芯片的长度是 src_size - MFLIMIT + if (remainingLength == 0) { + req->last = 1; + } + + int ret = KAE_ZLIB_SUCC; + ret = kaezip_send_async_compress(ctrl, req, comp_optype); + compress_ctx->req_list = req; + idx++; + if (ret != KAE_ZLIB_SUCC) { + req->compress_ctx->status = KAE_ZLIB_COMP_FAIL; + req->special_flag = 1; + req->done = 1; + } + } + + return KAE_ZLIB_SUCC; +} + +static int kaezip_async_block_padding(struct kaezip_async_req *req, const struct wd_buf_list *source, + void *dst_tmp, struct kaezip_priv_save_info *save_info) +{ + kaezip_ctx_t* kz_ctx = req->kz_ctx; + + struct wcrypto_comp_op_data *op_data = &kz_ctx->op_data; + unsigned int output_len = op_data->produced; + return output_len; +} + +const kaezip_post_process_handle_t g_post_process_handle[KAEZIP_ASYNC_BUTT] = { + [KAEZIP_ASYNC_BLOCK] = kaezip_async_block_padding, +}; + +int kaezip_compress_async(struct kaezip_async_ctrl *ctrl, const struct kaezip_buffer_list *src, struct kaezip_buffer_list *dst, + kaezip_async_callback callback, struct kaezip_result *result, + enum kaezip_async_data_format data_format, int comp_optype) +{ + struct kaezip_compress_ctx *compress_ctx = &ctrl->ctx[ctrl->ctx_index]; + + compress_ctx->dst = dst; + compress_ctx->dstCapacity = result->dst_len; + compress_ctx->src = src; + compress_ctx->srcSize = result->src_size; + compress_ctx->callback = callback; + compress_ctx->result = result; + compress_ctx->data_format = data_format; + compress_ctx->kaezip_post_process_handle = g_post_process_handle[data_format]; + compress_ctx->dst_len = 0; + compress_ctx->next = NULL; + compress_ctx->status = KAE_ZLIB_SUCC; + compress_ctx->req_list = NULL; + compress_ctx->save_info.prev_last_lit_ptr = NULL; + compress_ctx->save_info.prev_last_lit_len = 0; + compress_ctx->save_info.src = src; + + if (ctrl->ctx_head) { + ctrl->tail->next = compress_ctx; + } else { + ctrl->ctx_head = compress_ctx; + } + ctrl->tail = compress_ctx; + + if (unlikely(kaezip_async_compress_process(ctrl, compress_ctx, comp_optype) != KAE_ZLIB_SUCC)) { + goto free_compress_ctx; + } + + return KAE_ZLIB_SUCC; + +free_compress_ctx: + ctrl->ctx_head = compress_ctx->next; + if (ctrl->ctx_head == NULL) { + ctrl->tail = NULL; + } + + if (ctrl->is_polling) { + return KAE_ZLIB_ALLOC_FAIL; + } + result->status = KAE_ZLIB_ALLOC_FAIL; + result->dst_len = 0; + callback(result); + return KAE_ZLIB_ALLOC_FAIL; +} diff --git a/KAEZlib/src/v1/kaezip_async_comp.h b/KAEZlib/src/v1/kaezip_async_comp.h new file mode 100644 index 0000000..577ab7f --- /dev/null +++ b/KAEZlib/src/v1/kaezip_async_comp.h @@ -0,0 +1,111 @@ +/* + * @Copyright: Copyright (c) Huawei Technologies Co., Ltd. 2024-2024. All rights reserved. + * @Description: kaezlib nosva compress header file + * @Author: LiuYongYang + * @Date: 2024-02-26 + * @LastEditTime: 2024-03-28 + */ + +#ifndef KAEZIP_ASYNC_COMP_H +#define KAEZIP_ASYNC_COMP_H + +#include +#include "kaezlib_common.h" +#include +#include +#include +#include "uadk/v1/wd_sgl.h" + +#define KAE_ZLIB_REBUILD_FAIL -257 +#define KAE_ZLIB_SW_RETURN_0_FAIL -256 + +#define HARDWARE_BLOCK_SIZE (64 * 1024) // 硬件支持的最大压缩块大小 + +#define MAX_NUM_IN_COMP MAX_KAE_CTX_DEPTH // 每个线程最多允许同时进行的压缩任务数 + +#if defined(__x86_64__) + typedef U64 reg_t; /* 64-bits in x32 mode */ +#else + typedef size_t reg_t; /* 32-bits in x32 mode */ +#endif + +typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) ZLIB_unalign; + +struct kaezip_compress_ctx; +struct kaezip_async_req; + +struct kaezip_priv_save_info { + void *prev_last_lit_ptr; // 用户输入数据>64K需要分块、返回BLOCK格式、现有保序返回切块压缩结果的约束下,记录前一个分块的last literal信息 + size_t prev_last_lit_len; + unsigned int prev_last_lit_buf_index; // 用户输入数据>64K需要分块、返回BLOCK格式、现有保序返回切块压缩结果的约束下,记录前一个分块的last literal信息 + const struct kaezip_buffer_list *src; +}; + +typedef int (*kaezip_post_process_handle_t)(struct kaezip_async_req *req, const struct wd_buf_list *source, + void *dest, struct kaezip_priv_save_info *save_info); + +struct kaezip_async_req { + kaezip_ctx_t *kz_ctx; + struct wd_buf_list src; + struct wd_buf_list dst; + struct wd_buf buffers[REQ_BUFFER_MAX]; + struct wd_buf dst_buffers[REQ_BUFFER_MAX]; + size_t src_size; + size_t dst_len; + U32 idx; + U32 special_flag; + U16 last; + U16 buf_start_index; + U32 done; + struct kaezip_compress_ctx *compress_ctx; + struct kaezip_async_req *next; +}; + +struct kaezip_compress_ctx { + size_t srcSize; + size_t dstCapacity; + size_t dst_len; + const struct kaezip_buffer_list *src; + struct kaezip_buffer_list *dst; + struct kaezip_priv_save_info save_info; + kaezip_async_callback callback; + struct kaezip_result *result; + enum kaezip_async_data_format data_format; + kaezip_post_process_handle_t kaezip_post_process_handle; + struct kaezip_async_req *req_list; + struct kaezip_async_req req; + struct kaezip_compress_ctx *next; + int status; +}; + +struct kaezip_seq_result { + unsigned int seq_num; + unsigned char seq_start[]; +}; + +struct kaezip_async_ctrl { + struct kaezip_compress_ctx *ctx_head; + struct kaezip_compress_ctx *tail; + struct kaezip_compress_ctx ctx[MAX_NUM_IN_COMP]; + int cur_num_in_comp; // 当前正在压缩的任务数量 + kaezip_ctx_t *kz_ctx[MAX_NUM_IN_COMP]; + int ctx_num; + int ctx_index; + volatile int *stop_flag; + iova_map_fn usr_map; + int is_polling; +}; + +#define KZL_MEMCPY_16(dst, src, size) vst1q_u8((dst), vld1q_u8(src)) + +static inline void ZIP_wildCopy16(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { KZL_MEMCPY_16(d,s,16); d+=16; s+=16; } while (d #include "wd_queue_memory.h" #include "kaezip_log.h" +#include "uadk/v1/wd_sgl.h" #include "uadk/v1/wd_bmm.h" #include "uadk/v1/wd_comp.h" #include "kaezip_ctx.h" void kaezip_wd_free_queue(struct wd_queue* queue); -struct wd_queue* kaezip_wd_new_queue(int comp_alg_type, int comp_optype); -struct wd_queue* kaezip_wd_new_queue(int comp_alg_type, int comp_optype) +struct wd_queue* kaezip_wd_new_queue(int comp_alg_type, int comp_optype, int is_sgl) { struct wd_queue* queue = (struct wd_queue *)kae_malloc(sizeof(struct wd_queue)); if (queue == NULL) { @@ -56,6 +56,9 @@ struct wd_queue* kaezip_wd_new_queue(int comp_alg_type, int comp_optype) queue->capa.latency = 0; queue->capa.throughput = 0; + if (is_sgl) + queue->capa.priv.is_single_thread = 1; + struct wcrypto_paras *priv = (struct wcrypto_paras *)&(queue->capa.priv); priv->direction = comp_optype; int ret = wd_request_queue(queue); @@ -69,6 +72,23 @@ struct wd_queue* kaezip_wd_new_queue(int comp_alg_type, int comp_optype) return queue; } +void* kaezip_create_sgl_mempool(struct wd_queue *q) +{ + struct wd_sglpool_setup setup; + + memset(&setup, 0, sizeof(setup)); + + setup.buf_size = COMP_BLOCK_SIZE; + setup.align_size = 64; + setup.sge_num_in_sgl = 1; + setup.buf_num_in_sgl = setup.sge_num_in_sgl; + setup.sgl_num = MAX_KAE_CTX_DEPTH; // Zlib模式下,每个SGL output仅需要 1 段buf + setup.buf_num = setup.buf_num_in_sgl * setup.sgl_num + setup.sgl_num * 2 + 2; + void *mempool = wd_sglpool_create(q, &setup); + + return mempool; +} + void kaezip_wd_free_queue(struct wd_queue* queue) { if (queue != NULL) { @@ -78,22 +98,6 @@ void kaezip_wd_free_queue(struct wd_queue* queue) } } -void* kaezip_create_alg_wd_queue_mempool(struct wd_queue *q) -{ - unsigned int block_size = COMP_BLOCK_SIZE; - unsigned int block_num = COMP_BLOCK_NUM; - struct wd_blkpool_setup setup; - - memset(&setup, 0, sizeof(setup)); - setup.block_size = block_size; - setup.block_num = block_num; - setup.align_size = 64; // align with 64 - - void *mempool = wd_blkpool_create(q, &setup); - - return mempool; -} - void kaezip_wd_queue_mempool_destroy(void *pool) { return wd_blkpool_destroy(pool); @@ -124,6 +128,11 @@ void kaezip_wd_free_blk(void *pool, void *blk) return wd_free_blk(pool, blk); } +static void kaezip_sgl_pool_destroy(void *pool) +{ + return wd_sglpool_destroy(pool); +} + KAE_QUEUE_POOL_HEAD_S* kaezip_init_queue_pool(int algtype) { KAE_QUEUE_POOL_HEAD_S *kae_pool = NULL; @@ -155,14 +164,14 @@ KAE_QUEUE_POOL_HEAD_S* kaezip_init_queue_pool(int algtype) return kae_pool; } -static KAE_QUEUE_DATA_NODE_S* kaezip_get_queue_data_from_list(KAE_QUEUE_POOL_HEAD_S* pool_head, int type) +static KAE_QUEUE_DATA_NODE_S* kaezip_get_queue_data_from_list(KAE_QUEUE_POOL_HEAD_S* pool_head, int type, int win_size, int is_sgl) { int i = 0; KAE_QUEUE_DATA_NODE_S *queue_data_node = NULL; KAE_QUEUE_POOL_HEAD_S *temp_pool = pool_head; if ((pool_head->pool_use_num == 0) && (pool_head->next == NULL)) { - return queue_data_node; + return NULL; } while (temp_pool != NULL) { @@ -172,17 +181,18 @@ static KAE_QUEUE_DATA_NODE_S* kaezip_get_queue_data_from_list(KAE_QUEUE_POOL_HEA } if (KAE_SPIN_TRYLOCK(temp_pool->kae_queue_pool[i].spinlock)) { - if (temp_pool->kae_queue_pool[i].node_data == NULL) { + queue_data_node = temp_pool->kae_queue_pool[i].node_data; + if (queue_data_node == NULL) { KAE_SPIN_UNLOCK(temp_pool->kae_queue_pool[i].spinlock); continue; } - if (temp_pool->kae_queue_pool[i].node_data->comp_alg_type != type) { + if (queue_data_node->comp_alg_type != type || queue_data_node->is_sgl != is_sgl || + queue_data_node->win_size != win_size) { KAE_SPIN_UNLOCK(temp_pool->kae_queue_pool[i].spinlock); continue; } - queue_data_node = temp_pool->kae_queue_pool[i].node_data; temp_pool->kae_queue_pool[i].node_data = NULL; KAE_SPIN_UNLOCK(temp_pool->kae_queue_pool[i].spinlock); @@ -194,10 +204,26 @@ static KAE_QUEUE_DATA_NODE_S* kaezip_get_queue_data_from_list(KAE_QUEUE_POOL_HEA temp_pool = temp_pool->next; } - return queue_data_node; + return NULL; } -static void kaezip_free_wd_queue_memory(KAE_QUEUE_DATA_NODE_S *queue_node, kae_release_priv_ctx_cb release_fn) +void* kaezip_create_alg_wd_queue_mempool(struct wd_queue *q) +{ + unsigned int block_size = COMP_BLOCK_SIZE; + unsigned int block_num = COMP_BLOCK_NUM * MAX_KAE_CTX_DEPTH; + struct wd_blkpool_setup setup; + + memset(&setup, 0, sizeof(setup)); + setup.block_size = block_size; + setup.block_num = block_num; + setup.align_size = 64; // align with 64 + + void *mempool = wd_blkpool_create(q, &setup); + + return mempool; +} + +void kaezip_free_wd_queue_memory(KAE_QUEUE_DATA_NODE_S *queue_node, kae_release_priv_ctx_cb release_fn) { if (queue_node != NULL) { if (release_fn != NULL && queue_node->priv_ctx != NULL) { @@ -206,7 +232,11 @@ static void kaezip_free_wd_queue_memory(KAE_QUEUE_DATA_NODE_S *queue_node, kae_r } if (queue_node->kae_queue_mem_pool != NULL) { - kaezip_wd_queue_mempool_destroy(queue_node->kae_queue_mem_pool); + if (queue_node->is_sgl) + kaezip_sgl_pool_destroy(queue_node->kae_queue_mem_pool); + else + kaezip_wd_queue_mempool_destroy(queue_node->kae_queue_mem_pool); + queue_node->kae_queue_mem_pool = NULL; } if (queue_node->kae_wd_queue != NULL) { @@ -221,7 +251,7 @@ static void kaezip_free_wd_queue_memory(KAE_QUEUE_DATA_NODE_S *queue_node, kae_r US_DEBUG("free wd queue success"); } -static KAE_QUEUE_DATA_NODE_S* kaezip_new_wd_queue_memory(int comp_alg_type, int comp_type) +static KAE_QUEUE_DATA_NODE_S* kaezip_new_wd_queue_memory(int comp_alg_type, int comp_type, int win_size, int is_sgl) { KAE_QUEUE_DATA_NODE_S *queue_node = NULL; @@ -232,13 +262,19 @@ static KAE_QUEUE_DATA_NODE_S* kaezip_new_wd_queue_memory(int comp_alg_type, int } memset(queue_node, 0, sizeof(KAE_QUEUE_DATA_NODE_S)); - queue_node->kae_wd_queue = kaezip_wd_new_queue(comp_alg_type, comp_type); + queue_node->kae_wd_queue = kaezip_wd_new_queue(comp_alg_type, comp_type, is_sgl); if (queue_node->kae_wd_queue == NULL) { US_ERR("new wd queue fail"); goto err; } - queue_node->kae_queue_mem_pool = kaezip_create_alg_wd_queue_mempool(queue_node->kae_wd_queue); + if (is_sgl) { + queue_node->kae_queue_mem_pool = kaezip_create_sgl_mempool(queue_node->kae_wd_queue); + } else { + queue_node->kae_queue_mem_pool = kaezip_create_alg_wd_queue_mempool(queue_node->kae_wd_queue); + } + queue_node->is_sgl = is_sgl; + queue_node->win_size = win_size; if (queue_node->kae_queue_mem_pool == NULL) { US_ERR("request mempool fail!"); goto err; @@ -252,7 +288,7 @@ err: return NULL; } -KAE_QUEUE_DATA_NODE_S* kaezip_get_node_from_pool(KAE_QUEUE_POOL_HEAD_S* pool_head, int comp_alg_type, int comp_type) +KAE_QUEUE_DATA_NODE_S* kaezip_get_node_from_pool(KAE_QUEUE_POOL_HEAD_S* pool_head, int comp_alg_type, int comp_type, int win_size, int is_sgl) { KAE_QUEUE_DATA_NODE_S *queue_data_node = NULL; @@ -261,9 +297,9 @@ KAE_QUEUE_DATA_NODE_S* kaezip_get_node_from_pool(KAE_QUEUE_POOL_HEAD_S* pool_hea return NULL; } - queue_data_node = kaezip_get_queue_data_from_list(pool_head, comp_alg_type); + queue_data_node = kaezip_get_queue_data_from_list(pool_head, comp_alg_type, win_size, is_sgl); if (queue_data_node == NULL) { - queue_data_node = kaezip_new_wd_queue_memory(comp_alg_type, comp_type); + queue_data_node = kaezip_new_wd_queue_memory(comp_alg_type, comp_type, win_size, is_sgl); } return queue_data_node; @@ -278,7 +314,7 @@ static void kaezip_set_pool_use_num(KAE_QUEUE_POOL_HEAD_S *pool, int set_num) (void)pthread_mutex_unlock(&pool->kae_queue_mutex); } -int kaezip_put_node_to_pool(KAE_QUEUE_POOL_HEAD_S* pool_head, KAE_QUEUE_DATA_NODE_S* node_data) +int kaezip_put_node_to_pool(KAE_QUEUE_POOL_HEAD_S* pool_head, KAE_QUEUE_DATA_NODE_S* node_data, kae_release_priv_ctx_cb release_fn) { int i = 0; KAE_QUEUE_POOL_HEAD_S *temp_pool = pool_head; @@ -328,16 +364,10 @@ int kaezip_put_node_to_pool(KAE_QUEUE_POOL_HEAD_S* pool_head, KAE_QUEUE_DATA_NO } } /* if not added,free it */ - kaezip_free_wd_queue_memory(node_data, kaezip_free_ctx); + kaezip_free_wd_queue_memory(node_data, release_fn); return 0; } -void kaezip_queue_pool_reset(KAE_QUEUE_POOL_HEAD_S* pool_head) -{ - (void)pool_head; - return; -} - void kaezip_queue_pool_destroy(KAE_QUEUE_POOL_HEAD_S* pool_head, kae_release_priv_ctx_cb release_fn) { int error = 0; @@ -442,3 +472,32 @@ void kaezip_queue_pool_check_and_release(KAE_QUEUE_POOL_HEAD_S* pool_head, kae_r return; } +void *kaezip_dma_map_sgl(void *usr, void *va, size_t sz) +{ + return wd_sgl_iova_map(usr, va, sz); +} + +void kaezip_dma_unmap_sgl(void *usr, void *va, void *dma, size_t sz) +{ + return wd_sgl_iova_unmap(usr, dma, va); +} + +void *kaezip_wd_alloc_sgl(void *pool, size_t size) +{ + if (pool == NULL) { + US_ERR("mem pool empty!"); + return NULL; + } + + return wd_alloc_sgl(pool, size); +} + +void kaezip_wd_free_sgl(void *pool, void *sgl) +{ + if (pool == NULL) { + US_ERR("mem pool empty!"); + return; + } + + return wd_free_sgl(pool, sgl); +} diff --git a/KAEZlib/src/v1/wd_queue_memory.h b/KAEZlib/src/v1/wd_queue_memory.h index cbc27a9..d00bbbb 100644 --- a/KAEZlib/src/v1/wd_queue_memory.h +++ b/KAEZlib/src/v1/wd_queue_memory.h @@ -1,12 +1,12 @@ /* * Copyright (C) 2019. Huawei Technologies Co., Ltd. All rights reserved. - * + * * This program is free software; you can redistribute it and/or modify - * it under the terms of the zlib License. + * it under the terms of the zlib License. * You may obtain a copy of the License at - * + * * https://www.zlib.net/zlib_license.html - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @@ -40,6 +40,8 @@ typedef struct KAE_QUEUE_DATA_NODE { void *kae_queue_mem_pool; int comp_alg_type; void *priv_ctx; + int win_size; + int is_sgl; } KAE_QUEUE_DATA_NODE_S; typedef struct KAE_QUEUE_POOL_NODE { @@ -47,7 +49,7 @@ typedef struct KAE_QUEUE_POOL_NODE { struct kae_spinlock spinlock; time_t add_time; // int index; /* index of node,init:-1 */ - KAE_QUEUE_DATA_NODE_S *node_data; + KAE_QUEUE_DATA_NODE_S *node_data; // KAE_QUEUE_POOL_NODE_S *next; } KAE_QUEUE_POOL_NODE_S; @@ -67,11 +69,22 @@ void *kaezip_dma_map(void *usr, void *va, size_t sz); void kaezip_dma_unmap(void *usr, void *va, void *dma, size_t sz); KAE_QUEUE_POOL_HEAD_S* kaezip_init_queue_pool (int algtype); -KAE_QUEUE_DATA_NODE_S* kaezip_get_node_from_pool(KAE_QUEUE_POOL_HEAD_S* pool_head, int alg_comp_type, int comp_optype); -int kaezip_put_node_to_pool (KAE_QUEUE_POOL_HEAD_S* pool_head, KAE_QUEUE_DATA_NODE_S* node_data); -void kaezip_queue_pool_reset(KAE_QUEUE_POOL_HEAD_S* pool_head); +KAE_QUEUE_DATA_NODE_S* kaezip_get_node_from_pool(KAE_QUEUE_POOL_HEAD_S* pool_head, int alg_comp_type, int comp_optype, int win_size, int is_sgl); +int kaezip_put_node_to_pool (KAE_QUEUE_POOL_HEAD_S* pool_head, KAE_QUEUE_DATA_NODE_S* node_data, kae_release_priv_ctx_cb release_fn); void kaezip_queue_pool_destroy(KAE_QUEUE_POOL_HEAD_S* pool_head, kae_release_priv_ctx_cb release_fn); void kaezip_queue_pool_check_and_release(KAE_QUEUE_POOL_HEAD_S* pool_head, kae_release_priv_ctx_cb release_ectx_fn); +void kaezip_wd_free_blk(void *pool, void *blk); +void *kaezip_wd_alloc_blk(void *pool, size_t size); +void *kaezip_wd_alloc_sgl(void *pool, size_t size); +void kaezip_wd_free_sgl(void *pool, void *sgl); +void *kaezip_dma_map(void *usr, void *va, size_t sz); +void kaezip_dma_unmap(void *usr, void *va, void *dma, size_t sz); +void *kaezip_dma_map_sgl(void *usr, void *va, size_t sz); +void kaezip_dma_unmap_sgl(void *usr, void *va, void *dma, size_t sz); + +KAE_QUEUE_POOL_HEAD_S* kaezip_init_queue_pool (int algtype); +void kaezip_free_wd_queue_memory(KAE_QUEUE_DATA_NODE_S *queue_node, kae_release_priv_ctx_cb release_fn); + #endif -- Gitee From 2bebec749e4bd953b30d30355efa87c2fe29e803 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E5=87=A4?= Date: Wed, 16 Jul 2025 08:00:55 +0000 Subject: [PATCH 5/6] test: add zlib async support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 白凤 --- KAELz4/test/kzip/alg/KAELz4/lz4.c | 3 +- KAELz4/test/kzip/alg/KAELz4/lz4Frame.c | 5 +- .../test/kzip/alg/KAELz4Async/lz4AsyncBlock.c | 26 +++++-- .../test/kzip/alg/KAELz4Async/lz4AsyncFrame.c | 23 +++++- .../test/kzip/alg/KAELz4Async/lz4AsyncLz77.c | 22 +++++- .../kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c | 22 +++++- KAELz4/test/kzip/alg/KAEZlib/deflate.c | 77 +++++++++++++++++++ KAELz4/test/kzip/alg/KAEZlib/zlib.c | 48 ++++++++++++ .../test/kzip/alg/KAEZlibAsync/deflateAsync.c | 77 +++++++++++++++++++ KAELz4/test/kzip/alg/manage.c | 3 + KAELz4/test/kzip/alg/manage.h | 12 ++- KAELz4/test/kzip/build.sh | 4 +- KAELz4/test/kzip/compress_ctx.h | 5 ++ KAELz4/test/kzip/runFunc.sh | 9 ++- KAELz4/test/kzip/runPerf.sh | 3 +- 15 files changed, 306 insertions(+), 33 deletions(-) create mode 100644 KAELz4/test/kzip/alg/KAEZlib/deflate.c create mode 100644 KAELz4/test/kzip/alg/KAEZlib/zlib.c create mode 100644 KAELz4/test/kzip/alg/KAEZlibAsync/deflateAsync.c diff --git a/KAELz4/test/kzip/alg/KAELz4/lz4.c b/KAELz4/test/kzip/alg/KAELz4/lz4.c index 9cb8022..115024f 100644 --- a/KAELz4/test/kzip/alg/KAELz4/lz4.c +++ b/KAELz4/test/kzip/alg/KAELz4/lz4.c @@ -23,8 +23,7 @@ static int lz4_bound(int src_len) { } // LZ4 初始化 -static int lz4_init() { - printf("Initializing LZ4...\n"); +static int lz4_init(struct compress_ctx *ctx) { return 0; } diff --git a/KAELz4/test/kzip/alg/KAELz4/lz4Frame.c b/KAELz4/test/kzip/alg/KAELz4/lz4Frame.c index 314af86..d2a6bba 100644 --- a/KAELz4/test/kzip/alg/KAELz4/lz4Frame.c +++ b/KAELz4/test/kzip/alg/KAELz4/lz4Frame.c @@ -21,7 +21,7 @@ static int lz4_frame_compress(const unsigned char *src, unsigned int *src_len, u } *dst_len = ret; // lz4 LZ4_compress_default的返回值才是压缩后的空间大小。 - return ret > 0 ? 0 : ret; + return ret > 0 ? 0 : ret; } // 单个 LZ4 frame 格式文件的解压实现 @@ -45,8 +45,7 @@ static int lz4_frame_bound(int src_len) { return needlen; } // LZ4 frame 初始化 -static int lz4_frame_init() { - printf("Initializing LZ4...\n"); +static int lz4_frame_init(struct compress_ctx *ctx) { return 0; } diff --git a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncBlock.c b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncBlock.c index 2162c22..cf2f1b3 100644 --- a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncBlock.c +++ b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncBlock.c @@ -1,4 +1,5 @@ #include "../manage.h" +#include "../../compress_ctx.h" #include #include @@ -12,21 +13,35 @@ static int lz4async_block_compress(void *sess, const struct kaelz4_buffer_list * } // LZ4 解压实现 -static int lz4async_block_decompress(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result) +static int lz4async_block_decompress(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result) { int ret = LZ4_decompress_async(src, dst, cb, result); return ret; } -static int lz4_bound(int src_len) { +static int lz4_bound(int src_len) +{ return LZ4_compressBound(src_len); } // LZ4 初始化 -static int lz4_async_block_init() { - printf("Initializing LZ4...\n"); +static int lz4_async_block_init(struct compress_ctx *ctx) +{ + if (ctx->is_polling && ctx->compress_or_decompress) { + ctx->sess = KAELZ4_create_async_compress_session(ctx->usr_map); + } else { + LZ4_async_compress_init(ctx->usr_map); + } return 0; } +static void lz4_async_block_cleanup(struct compress_ctx *ctx) +{ + if (ctx->sess) + KAELZ4_destroy_async_compress_session(ctx->sess); + else + LZ4_teardown_async_compress(); +} + // LZ4 算法实例 compression_algorithm_t lz4async_block_algorithm = { .name = "kaelz4async_block", @@ -34,7 +49,8 @@ compression_algorithm_t lz4async_block_algorithm = { .poll = KAELZ4_compress_async_polling_in_session, .bound = lz4_bound, .async_decompress = lz4async_block_decompress, - .init = lz4_async_block_init + .init = lz4_async_block_init, + .cleanup = lz4_async_block_cleanup }; // 注册 LZ4 算法 diff --git a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncFrame.c b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncFrame.c index 1cc4f14..4851153 100644 --- a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncFrame.c +++ b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncFrame.c @@ -1,4 +1,5 @@ #include "../manage.h" +#include "../../compress_ctx.h" #include #include #include @@ -29,7 +30,7 @@ static int lz4_async_frame_compress(void *sess, const struct kaelz4_buffer_list } // 单个 LZ4 frame 格式文件的解压实现 -static int lz4_async_frame_decompress(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result) +static int lz4_async_frame_decompress(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result) { int ret = LZ4F_decompress_async(src, dst, cb, result, NULL); return ret; @@ -42,11 +43,24 @@ static int lz4_frame_bound(int src_len) { return LZ4F_compressFrameBound(src_len, NULL); } // LZ4 frame 初始化 -static int lz4_frame_init() { - printf("Initializing LZ4...\n"); +static int lz4_frame_init(struct compress_ctx *ctx) { + if (ctx->is_polling && ctx->compress_or_decompress) { + ctx->sess = KAELZ4_create_async_compress_session(ctx->usr_map); + } else { + LZ4_async_compress_init(ctx->usr_map); + } return 0; } +static void lz4_async_frame_cleanup(struct compress_ctx *ctx) +{ + if (ctx->sess) + KAELZ4_destroy_async_compress_session(ctx->sess); + else + LZ4_teardown_async_compress(); +} + + // LZ4 frame 算法实例 compression_algorithm_t lz4_async_frame_algorithm = { .name = "kaelz4async_frame", @@ -54,7 +68,8 @@ compression_algorithm_t lz4_async_frame_algorithm = { .poll = KAELZ4_compress_async_polling_in_session, .async_compress = lz4_async_frame_compress, .async_decompress = lz4_async_frame_decompress, - .init = lz4_frame_init + .init = lz4_frame_init, + .cleanup = lz4_async_frame_cleanup }; // 注册 LZ4 frame 算法 diff --git a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c index ccb263c..2cae34e 100644 --- a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c +++ b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77.c @@ -1,4 +1,5 @@ #include "../manage.h" +#include "../../compress_ctx.h" #include #include @@ -9,7 +10,7 @@ static int lz4async_block_compress(void *sess, const struct kaelz4_buffer_list * } // LZ4 解压实现 -static int lz4async_block_decompress(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result) +static int lz4async_block_decompress(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result) { int ret = LZ4_decompress_async(src, dst, cb, result); return ret; @@ -19,11 +20,23 @@ static int lz4_bound(int src_len) { return LZ4_compressBound(src_len); } // LZ4 初始化 -static int lz4_async_block_init() { - printf("Initializing LZ4...\n"); +static int lz4_async_block_init(struct compress_ctx *ctx) { + if (ctx->compress_or_decompress) + ctx->sess = KAELZ4_create_async_compress_session(ctx->usr_map); + else + LZ4_async_compress_init(ctx->usr_map); + return 0; } +static void lz4_async_block_cleanup(struct compress_ctx *ctx) +{ + if (ctx->sess) + KAELZ4_destroy_async_compress_session(ctx->sess); + else + LZ4_teardown_async_compress(); +} + // LZ4 算法实例 compression_algorithm_t lz4async_lz77_algorithm = { .name = "kaelz4async_lz77", @@ -31,7 +44,8 @@ compression_algorithm_t lz4async_lz77_algorithm = { .poll = KAELZ4_compress_async_polling_in_session, .bound = lz4_bound, .async_decompress = lz4async_block_decompress, - .init = lz4_async_block_init + .init = lz4_async_block_init, + .cleanup = lz4_async_block_cleanup, }; // 注册 LZ4 算法 diff --git a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c index 983be7e..f5d6b13 100644 --- a/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c +++ b/KAELz4/test/kzip/alg/KAELz4Async/lz4AsyncLz77Frame.c @@ -1,4 +1,5 @@ #include "../manage.h" +#include "../../compress_ctx.h" #include #include #include @@ -8,7 +9,7 @@ static int lz4async_block_compress(void *sess, const struct kaelz4_buffer_list * return KAELZ4_compress_lz77_async_in_session(sess, src, dst, cb, result); } -static int lz4async_block_decompress(const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result) +static int lz4async_block_decompress(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result) { int ret = LZ4F_decompress_async(src, dst, cb, result, NULL); return ret; @@ -18,11 +19,23 @@ static int lz4_bound(int src_len) { return LZ4F_compressFrameBound(src_len, NULL); } // LZ4 初始化 -static int lz4_async_block_init() { - printf("Initializing LZ4...\n"); +static int lz4_async_block_init(struct compress_ctx *ctx) { + if (ctx->is_polling && ctx->compress_or_decompress) { + ctx->sess = KAELZ4_create_async_compress_session(ctx->usr_map); + } else { + LZ4_async_compress_init(ctx->usr_map); + } return 0; } +static void lz4_async_block_cleanup(struct compress_ctx *ctx) +{ + if (ctx->sess) + KAELZ4_destroy_async_compress_session(ctx->sess); + else + LZ4_teardown_async_compress(); +} + // LZ4 算法实例 compression_algorithm_t lz4async_lz77_frame_algorithm = { .name = "kaelz4async_lz77_frame", @@ -30,7 +43,8 @@ compression_algorithm_t lz4async_lz77_frame_algorithm = { .poll = KAELZ4_compress_async_polling_in_session, .bound = lz4_bound, .async_decompress = lz4async_block_decompress, - .init = lz4_async_block_init + .init = lz4_async_block_init, + .cleanup = lz4_async_block_cleanup, }; // 注册 LZ4 算法 diff --git a/KAELz4/test/kzip/alg/KAEZlib/deflate.c b/KAELz4/test/kzip/alg/KAEZlib/deflate.c new file mode 100644 index 0000000..20ba52c --- /dev/null +++ b/KAELz4/test/kzip/alg/KAEZlib/deflate.c @@ -0,0 +1,77 @@ +#include "../manage.h" +#include +#include + +static int g_level = 6; +static int g_windowBits = -15; + +static int zlib_bound(int src_len) { + return compressBound(src_len); +} + +// LZ4 压缩实现 +static int zlib_compress(const unsigned char *src, unsigned int *src_len, unsigned char *dst, unsigned int *dst_len) +{ + z_stream strm; + strm.zalloc = (alloc_func)0; + strm.zfree = (free_func)0; + strm.opaque = (voidpf)0; + (void)deflateInit2_(&strm, g_level, Z_DEFLATED, g_windowBits, 8, Z_DEFAULT_STRATEGY, "1.2.11", sizeof(z_stream)); + + strm.next_in = (z_const Bytef*) src; + strm.next_out = (void *)dst; + strm.avail_in = *src_len; + strm.avail_out = zlib_bound(*src_len); + // strm.avail_out = *dst_len; + int ret = deflate(&strm, Z_FINISH); + + *dst_len = strm.total_out; + // deflateReset(&strm); + (void)deflateEnd(&strm); + if (ret < Z_OK) { + printf("[KAE_ERR]:compress2 failed, ret is:%d. (dst_len = %d; src_len = %d.)\n", ret, *dst_len, *src_len); + } + return ret > 0 ? 0 : ret; +} + +// LZ4 解压实现 +static int zlib_decompress(const unsigned char *src, unsigned int *src_len, unsigned char *dst, unsigned int *dst_len) +{ + z_stream strm; + strm.zalloc = (alloc_func)0; + strm.zfree = (free_func)0; + strm.opaque = (voidpf)0; + (void)inflateInit2_(&strm, g_windowBits, "1.2.11", sizeof(z_stream)); + strm.next_in = (z_const Bytef *)src; + strm.next_out = dst; + strm.avail_in = *src_len; + strm.avail_out = *dst_len; + int ret = inflate(&strm, Z_FINISH); + + *dst_len = strm.total_out; + // inflateReset(&strm); + (void)inflateEnd(&strm); + if (ret < Z_OK) { + printf("[KAE_ERR]:uncompress2 failed, ret is:%d.\n", ret); + } + return ret > 0 ? 0 : ret; +} + +// LZ4 初始化 +static int zlib_init(struct compress_ctx *ctx) { + return 0; +} + +// LZ4 算法实例 +compression_algorithm_t zlib_deflate_algorithm = { + .name = "kaezlib_deflate", + .bound = zlib_bound, + .compress = zlib_compress, + .decompress = zlib_decompress, + .init = zlib_init +}; + +void register_zlib_deflate_algorithm(void) +{ + register_algorithm(&zlib_deflate_algorithm); +} \ No newline at end of file diff --git a/KAELz4/test/kzip/alg/KAEZlib/zlib.c b/KAELz4/test/kzip/alg/KAEZlib/zlib.c new file mode 100644 index 0000000..1b36da1 --- /dev/null +++ b/KAELz4/test/kzip/alg/KAEZlib/zlib.c @@ -0,0 +1,48 @@ +#include "../manage.h" +#include +#include + +static int g_level = 6; + +// LZ4 压缩实现 +static int zlib_compress(const unsigned char *src, unsigned int *src_len, unsigned char *dst, unsigned int *dst_len) +{ + int ret = compress2(dst, (unsigned long *)dst_len, src, *src_len, g_level); + if (ret != Z_OK) { + printf("[KAE_ERR]:compress2 failed, ret is:%d. (dst_len = %d; src_len = %d.)\n", ret, *dst_len, *src_len); + } + return ret > 0 ? 0 : ret; +} + +// LZ4 解压实现 +static int zlib_decompress(const unsigned char *src, unsigned int *src_len, unsigned char *dst, unsigned int *dst_len) +{ + int ret = uncompress2(dst, (unsigned long *)dst_len, src, (unsigned long *)src_len); + if (ret != Z_OK) { + printf("[KAE_ERR]:uncompress2 failed, ret is:%d.\n", ret); + } + return ret > 0 ? 0 : ret; +} + +static int zlib_bound(int src_len) { + return compressBound(src_len); +} + +// LZ4 初始化 +static int zlib_init(struct compress_ctx *ctx) { + return 0; +} + +// LZ4 算法实例 +compression_algorithm_t zlib_algorithm = { + .name = "kaezlib", + .bound = zlib_bound, + .compress = zlib_compress, + .decompress = zlib_decompress, + .init = zlib_init +}; + +void register_zlib_algorithm(void) +{ + register_algorithm(&zlib_algorithm); +} \ No newline at end of file diff --git a/KAELz4/test/kzip/alg/KAEZlibAsync/deflateAsync.c b/KAELz4/test/kzip/alg/KAEZlibAsync/deflateAsync.c new file mode 100644 index 0000000..593f508 --- /dev/null +++ b/KAELz4/test/kzip/alg/KAEZlibAsync/deflateAsync.c @@ -0,0 +1,77 @@ +#include "../manage.h" +#include "../../compress_ctx.h" +#include +#include +#include +#include +static int g_windowBits = -15; + +// Zlib 压缩实现 +static int zlibasync_deflate_compress(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result) +{ + return KAEZIP_compress_async_in_session(sess, (const struct kaezip_buffer_list *)src, (struct kaezip_buffer_list *)dst, (kaezip_async_callback)cb, (struct kaezip_result *)result); +} + +static int zlibasync_deflate_decompress(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result) +{ + int ret = KAEZIP_decompress_async_in_session(sess, (const struct kaezip_buffer_list *)src, (struct kaezip_buffer_list *)dst, (kaezip_async_callback)cb, (struct kaezip_result *)result); + return ret; +} +// Zlib 同步解压实现 +static int zlib_decompress(const unsigned char *src, unsigned int *src_len, unsigned char *dst, unsigned int *dst_len) +{ + z_stream strm; + strm.zalloc = (alloc_func)0; + strm.zfree = (free_func)0; + strm.opaque = (voidpf)0; + (void)inflateInit2_(&strm, g_windowBits, "1.2.11", sizeof(z_stream)); + strm.next_in = (z_const Bytef *)src; + strm.next_out = dst; + strm.avail_in = *src_len; + strm.avail_out = *dst_len; + int ret = inflate(&strm, Z_FINISH); + + *dst_len = strm.total_out; + // inflateReset(&strm); + (void)inflateEnd(&strm); + if (ret < Z_OK) { + printf("[KAE_ERR]:uncompress2 failed, ret is:%d.\n", ret); + } + return ret > 0 ? 0 : ret; +} + +static int zlib_bound(int src_len) { + return compressBound(src_len); +} +// Zlib 初始化 +static int zlib_async_deflate_init(struct compress_ctx *ctx) { + if(ctx->compress_or_decompress == 1) { + ctx->sess = KAEZIP_create_async_compress_session(ctx->usr_map); + } else { + ctx->sess = KAEZIP_create_async_decompress_session(ctx->usr_map); + } + return 0; +} + +static void zlib_async_deflate_cleanup(struct compress_ctx *ctx) +{ + KAEZIP_destroy_async_compress_session(ctx->sess); +} + +// Zlib 算法实例 +compression_algorithm_t zlibasync_block_algorithm = { + .name = "kaezlibasync_deflate", + .async_compress = zlibasync_deflate_compress, + .poll = KAEZIP_compress_async_polling_in_session, + .bound = zlib_bound, + .async_decompress = zlibasync_deflate_decompress, + .decompress = zlib_decompress, + .init = zlib_async_deflate_init, + .cleanup = zlib_async_deflate_cleanup, +}; + +// 注册 Zlib 算法 +void register_zlibasync_block_algorithm(void) +{ + register_algorithm(&zlibasync_block_algorithm); +} \ No newline at end of file diff --git a/KAELz4/test/kzip/alg/manage.c b/KAELz4/test/kzip/alg/manage.c index 64d5ce2..1ee0be1 100644 --- a/KAELz4/test/kzip/alg/manage.c +++ b/KAELz4/test/kzip/alg/manage.c @@ -50,4 +50,7 @@ void initialize_algorithms(void) { register_lz4async_frame_algorithm(); register_lz4async_lz77_algorithm(); register_lz4async_lz77_frame_algorithm(); + register_zlib_algorithm(); + register_zlib_deflate_algorithm(); + register_zlibasync_block_algorithm(); } \ No newline at end of file diff --git a/KAELz4/test/kzip/alg/manage.h b/KAELz4/test/kzip/alg/manage.h index bcd7865..d6ffc39 100644 --- a/KAELz4/test/kzip/alg/manage.h +++ b/KAELz4/test/kzip/alg/manage.h @@ -13,10 +13,11 @@ #include #include +struct compress_ctx; typedef struct { const char *name; // 同步接口 - int (*init)(); + int (*init)(struct compress_ctx *ctx); int (*bound)(int src_len); // 我们约定:由框架统一读取待处理的数据以及大小。统一申请待存储的空间以及大小。 // 压缩解压算法需要输出正确的处理后产物,输出正确的 dst_len。 @@ -26,13 +27,13 @@ typedef struct { unsigned char *dst, unsigned int *dst_len); int (*decompress)(const unsigned char *src, unsigned int *src_len, unsigned char *dst, unsigned int *dst_len); - void (*cleanup)(); - void (*poll)(void *sess, int budget); + void (*cleanup)(struct compress_ctx *ctx); + void (*poll)(void *sess, int budget); // polling 模式下,根据session查询结果的接口 // 异步接口 int (*async_compress)(void *sess, const struct kaelz4_buffer_list *src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result); - int (*async_decompress)(const struct kaelz4_buffer_list* src, struct kaelz4_buffer_list *dst, + int (*async_decompress)(void *sess, const struct kaelz4_buffer_list* src, struct kaelz4_buffer_list *dst, lz4_async_callback cb, struct kaelz4_result *result); } compression_algorithm_t; @@ -53,4 +54,7 @@ void register_lz4async_block_algorithm(void); void register_lz4async_frame_algorithm(void); void register_lz4async_lz77_algorithm(void); void register_lz4async_lz77_frame_algorithm(void); +void register_zlib_algorithm(void); +void register_zlib_deflate_algorithm(void); +void register_zlibasync_block_algorithm(void); #endif diff --git a/KAELz4/test/kzip/build.sh b/KAELz4/test/kzip/build.sh index 9e1f092..55b8658 100644 --- a/KAELz4/test/kzip/build.sh +++ b/KAELz4/test/kzip/build.sh @@ -1,5 +1,5 @@ export LD_LIBRARY_PATH=/usr/local/kaelz4/lib:/usr/local/kaezstd/lib:/usr/local/kaezip/lib:$LD_LIBRARY_PATH -export C_INCLUDE_PATH=/usr/local/kaelz4/include:$C_INCLUDE_PATH +export C_INCLUDE_PATH=/usr/local/kaelz4/include:/usr/local/kaezip/include:$C_INCLUDE_PATH # echo "测试kaezip 和 kaelz4,请使用 sh build.sh kaelz4, 默认kaelz4" # echo "测试kaezip 和 kaezstd,请使用 sh build.sh kaezstd" @@ -10,7 +10,7 @@ TestEnv=${TestEnv:=kaelz4} echo "build kzip for $TestEnv..." case "$TestEnv" in kaelz4) - gcc -g -o kzip main.c delayRecord.c datagen.c alg/manage.c alg/*/*.c scene_test_functions/*c -lz -lnuma -lrt -L/usr/local/kaelz4/lib -llz4 -lkaelz4 -DBUILD_ENV=$TestEnv -DBUILD_ENV_KAELZ4=1 -O3 -fstack-protector-all -Wall -Werror -lpthread + gcc -g -o kzip main.c delayRecord.c datagen.c alg/manage.c alg/*/*.c scene_test_functions/*c -lz -lnuma -lrt -L/usr/local/kaelz4/lib -llz4 -lkaelz4 -L/usr/local/kaezip/lib -lkaezip -DBUILD_ENV=$TestEnv -DBUILD_ENV_KAELZ4=1 -O3 -fstack-protector-all -Wall -Werror -lpthread ;; *) ;; diff --git a/KAELz4/test/kzip/compress_ctx.h b/KAELz4/test/kzip/compress_ctx.h index 0a01869..adc9a0d 100644 --- a/KAELz4/test/kzip/compress_ctx.h +++ b/KAELz4/test/kzip/compress_ctx.h @@ -9,6 +9,7 @@ struct fragment_metadata { unsigned int offset; // 分片的起始偏移量 unsigned int len; // 分片的长度 + size_t src_chunk_len; }; struct compress_out_buf { @@ -37,6 +38,7 @@ struct __attribute__((aligned(64))) compress_param { struct kaelz4_buffer_list src; struct kaelz4_buffer_list dst; struct kaelz4_buffer_list tuple; + struct kaelz4_buffer_list *dst_buf_list; uint64_t start_time; volatile unsigned int done; struct kaelz4_buffer src_buf[1024]; @@ -69,8 +71,11 @@ struct compress_ctx { int with_crc; unsigned int src_buf_num; void *sess; + iova_map_fn usr_map; uint64_t *all_delays; + int is_polling; int is_lz77_mode; + int is_zlib; }; diff --git a/KAELz4/test/kzip/runFunc.sh b/KAELz4/test/kzip/runFunc.sh index bc04d57..7f6700b 100644 --- a/KAELz4/test/kzip/runFunc.sh +++ b/KAELz4/test/kzip/runFunc.sh @@ -5,12 +5,13 @@ export KAE_LZ4_COMP_TYPE=8 sh build.sh kaelz4 -Algthm=("kaelz4" "kaelz4_frame" "kaelz4async_block" "kaelz4async_frame" "kaelz4async_lz77") +Algthm=("kaelz4" "kaelz4_frame" "kaelz4async_block" "kaelz4async_frame" "kaelz4async_lz77" "kaezlibasync_deflate") Datasets=("calgary" "itemdata" "dickens" "mozilla" "mr" "nci" "ooffice" "osdb" "reymont" "samba" "sao" "webster" "xml" "x-ray") Datasets=("calgary" "itemdata" "ooffice" "osdb" "samba" "webster" "xml" "x-ray") -BlockSize=("0" "4" "8" "16" "60" "64" "68" "128" "512" "1024" "2090" "10244") +BlockSize=("4" "8" "16" "60" "64" "68" "128" "512" "1024" "2090" "8191" "8192" "8193" "10244") Polling=("1" "0") + current_time=$(date +"%Y-%m-%d_%H-%M-%S") LogFile=kaelz4-function.log.$current_time testFilePath=../../../scripts/compressTestDataset @@ -50,8 +51,8 @@ for da in "${Datasets[@]}"; do rm -rf $testFileOrigin rm -rf $testFileComped.meta rm -rf $testFileOrigin.meta - ./kzip -A $alg -m 2 -f $testFile -o $testFileComped -n 2 -s $bs -i 256 -p $polling >> $LogFile # 压缩测试 - ./kzip -d -A $alg -m 1 -f $testFileComped -o $testFileOrigin -n 2 -s $bs -i 256 >> $LogFile # 压缩测试 + ./kzip -A $alg -m 1 -f $testFile -o $testFileComped -n 2 -s $bs -i 256 -p $polling >> $LogFile # 压缩测试 + ./kzip -d -A $alg -m 1 -f $testFileComped -o $testFileOrigin -n 2 -s $bs -i 256 >> $LogFile # 压缩测试 diffFile $testFile $testFileOrigin done done diff --git a/KAELz4/test/kzip/runPerf.sh b/KAELz4/test/kzip/runPerf.sh index 226d539..bf4b1b7 100644 --- a/KAELz4/test/kzip/runPerf.sh +++ b/KAELz4/test/kzip/runPerf.sh @@ -45,7 +45,7 @@ while getopts "m:l:n:w:f:o:v:A:h:g:s:c:i:t:p:k:r:P:" opt; do done Alg=${Alg:=kaezip} -multiProcess=${multiProcess:=2} +multiProcess=${multiProcess:=1} fileChunk=${fileChunk:=0} loppTimes=${loppTimes:=1} inflightNum=${inflightNum:=64} @@ -98,6 +98,7 @@ taskset -c $bindCpu0AndCpu1 ./kzip -A $Alg -m $multiProcess -f $testFile -o $tes date # sleep 1 +#taskset -c $bindCpu0AndCpu1 gdb --args ./kzip -d -A "kaezlib_deflate" -m $multiProcess -f $testFileComped -o $testFileOrigin -c $cpuConfigStr -n $loppTimes -s $fileChunk -i $inflightNum -t $threadsNum -r $isTestCrc taskset -c $bindCpu0AndCpu1 ./kzip -d -A $Alg -m $multiProcess -f $testFileComped -o $testFileOrigin -c $cpuConfigStr -n $loppTimes -s $fileChunk -i $inflightNum -t $threadsNum -r $isTestCrc date if [[ ! -f "$testFile" ]]; then -- Gitee From 43804999ae5bdd77f7ca49330d67d9e9ff7764a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=99=BD=E5=87=A4?= Date: Wed, 16 Jul 2025 08:01:02 +0000 Subject: [PATCH 6/6] test: add zlib async support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 白凤 --- KAELz4/test/kzip/main.c | 268 ++++++++++++++++++---------------------- 1 file changed, 120 insertions(+), 148 deletions(-) diff --git a/KAELz4/test/kzip/main.c b/KAELz4/test/kzip/main.c index 8fc38ed..e226f3c 100644 --- a/KAELz4/test/kzip/main.c +++ b/KAELz4/test/kzip/main.c @@ -20,6 +20,7 @@ #include #include +#include "kaezip.h" #include "lz4.h" #include "alg/manage.h" #include "delayRecord.h" @@ -32,15 +33,16 @@ #define GB *(1U << 30) int g_file_chunk_size = 0; // 测试分片大小。 默认 0kb 不分片 -int g_log_level = 1; // 打印日志级别。 0:不统计时延。 1:时延统计 +int g_log_level = 0; // 打印日志级别。 0:不统计时延。 1:时延统计 int g_cpu_threads_per_core = 1; // 是否开启超线程。 1: 未开启。 2:开启 -int g_enable_huge_pages = 0; // 是否使用内存大页 +int g_enable_huge_pages = 1; // 是否使用内存大页 int g_enable_polling_mode = 0; // 是否使用单线程polling模式 -#define HPAGE_SIZE (2 * 1024 * 1024) // 2MB大页 +#define HPAGE_SIZE (1024 * 1024 * 1024) // 1GB大页 #define PAGE_SHIFT 12 #define PAGE_SIZE (1UL << PAGE_SHIFT) #define PFN_MASK ((1UL << 55) - 1) +#define HW_MAX_SGE_LEN 0x800000UL struct cache_page_map { uint64_t *entries; @@ -130,20 +132,21 @@ void free_cache_page_map(struct cache_page_map *cache) { } } +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) void *get_huge_pages(size_t total_size) { void *addr = mmap( NULL, total_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_1GB, -1, 0 ); // 申请内存大页 if (addr == MAP_FAILED) { fprintf(stderr, "申请内存大页失败。\n"); fprintf(stderr, "系统可能没有足够的大页可用。\n"); - fprintf(stderr, "请尝试分配更多大页: sudo sysctl vm.nr_hugepages=10000\n"); + fprintf(stderr, "请尝试分配更多大页: echo 10 | tee /sys/devices/system/node/node0/hugepages/hugepages-1048576kB/nr_hugepages\n"); exit(EXIT_FAILURE); } @@ -180,7 +183,7 @@ static uLong read_inputFile(struct compress_ctx *ctx, const char* fileName, void int huge_page_num = (int)(input_size * sizeof(Bytef) / HPAGE_SIZE) + 1; // 大页大小为2M,申请大页时申请大小需为大页大小的整数倍 size_t total_size = huge_page_num * HPAGE_SIZE; *input = get_huge_pages(total_size); - printf("申请的大页虚拟地址: %p\n", *input); + printf("申请的大页虚拟地址: %p len:%ld\n", *input, total_size); if (*input == NULL) { return 0; @@ -320,6 +323,7 @@ static size_t write_outputFile(const char* outFileName, struct compress_out_buf fragments[num].offset = base_offset; fragments[num].len = out_buf_node->len; // 假设每个分片固定大小 100 + fragments[num].src_chunk_len = (size_t)g_file_chunk_size * 1024; base_offset += fragments[num].len; // 更新下一个分片的偏移量 uint32_t tmp_crc = crc32(0, out_buf_node->buf_addr, out_buf_node->len); if (out_buf_node->obuf_crc != 0 && out_buf_node->obuf_crc != tmp_crc) { @@ -408,7 +412,11 @@ static void compress_async_polling(struct compress_param *param) } out_buf->len = param->dst_len; - out_buf->buf_addr = param->dst.buf[0].data; + if (ctx->is_zlib) + out_buf->buf_addr = param->tuple.buf[0].data; + else + out_buf->buf_addr = param->dst.buf[0].data; + out_buf->src_len = param->src_len; out_buf->src = param->src.buf[0].data; out_buf->next = NULL; @@ -429,12 +437,12 @@ static void compress_async_polling(struct compress_param *param) static void compress_async_callback(struct kaelz4_result *result) { // printf("[user]异步 callback 了!!\n"); - if (result->status != 0) { + if (unlikely(result->status != 0)) { printf("[user]回调压缩异常 : %d\n", result->status); } struct compress_param *param = (struct compress_param *)result->user_data; - if (param->ctx->is_lz77_mode) { + if (unlikely(param->ctx->is_lz77_mode)) { const char *alg_name = param->ctx->algorithm->name; if(strcmp(alg_name, "kaelz4async_lz77_frame") == 0) { if (KAELZ4_rebuild_lz77_to_frame(¶m->src, ¶m->tuple, ¶m->dst, result, NULL) != 0) { @@ -448,8 +456,8 @@ static void compress_async_callback(struct kaelz4_result *result) } param->dst_len = result->dst_len; - if ((param->ctx->algorithm->async_compress != NULL && param->ctx->compress_or_decompress != 0) || - ((param->ctx->algorithm->async_decompress != NULL && param->ctx->compress_or_decompress == 0))) { + if ((!param->ctx->is_polling || !param->ctx->is_zlib) && ((param->ctx->algorithm->async_compress != NULL && param->ctx->compress_or_decompress != 0) || + ((param->ctx->algorithm->async_decompress != NULL && param->ctx->compress_or_decompress == 0)))) { wmb(); } @@ -476,7 +484,7 @@ static int do_real_compression(struct compress_ctx *ctx, const struct kaelz4_buf } } else { // 解压逻辑 if (ctx->algorithm->async_decompress) { - return ctx->algorithm->async_decompress(src, dst, compress_async_callback, param); + return ctx->algorithm->async_decompress(ctx->sess, src, dst, compress_async_callback, param); } else { return ctx->algorithm->decompress(src->buf[0].data, src_len, dst->buf[0].data, dst_len); } @@ -500,8 +508,9 @@ static void compress_ctx_init(struct compress_ctx *ctx, int compress_or_decompre ctx->inflight_num = inflight_num; ctx->with_crc = is_test_crc; ctx->src_buf_num = 1; - if (g_file_chunk_size && (g_file_chunk_size * 1024) <= HPAGE_SIZE && ctx->algorithm->async_compress != NULL && ctx->compress_or_decompress != 0) { - g_enable_huge_pages = 1; + ctx->usr_map = NULL; + if (g_file_chunk_size && ((size_t)g_file_chunk_size * 1024) <= HPAGE_SIZE && ((ctx->algorithm->async_compress != NULL && ctx->compress_or_decompress != 0) + || (ctx->algorithm->async_decompress != NULL && !ctx->compress_or_decompress))) { ctx->src_buf_num = 4; } @@ -513,7 +522,7 @@ static void compress_ctx_init(struct compress_ctx *ctx, int compress_or_decompre int is_test_lz77_block = strcmp(algorithm->name, "kaelz4async_lz77") == 0; int is_test_lz77_frame = strcmp(algorithm->name, "kaelz4async_lz77_frame") == 0; if ((is_test_lz77_block || is_test_lz77_frame) && ctx->compress_or_decompress != 0) { - if (g_file_chunk_size == 0 || g_file_chunk_size * 1024 > HPAGE_SIZE) { + if (g_file_chunk_size == 0 || (size_t)g_file_chunk_size * 1024 > HPAGE_SIZE || (size_t)g_file_chunk_size * 1024 >= HW_MAX_SGE_LEN / 2) { // TBM: 当前chunk_size超过2M kzip不支持lz77模式,因为大页内存不连续 ctx->algorithm = get_algorithm("kaelz4async_block"); if (is_test_lz77_frame) { @@ -524,6 +533,8 @@ static void compress_ctx_init(struct compress_ctx *ctx, int compress_or_decompre ctx->is_lz77_mode = 1; g_enable_polling_mode = 1; } + ctx->is_polling = g_enable_polling_mode; + ctx->is_zlib = strcmp(algorithm->name, "kaezlibasync_deflate") == 0; } static void compress_ctx_destory(struct compress_ctx *ctx) @@ -623,6 +634,11 @@ static uLong get_src_content(struct compress_ctx *ctx, const char* in_filename, *inbuf = get_compress_input(ctx->chunk_len); src_len = ctx->chunk_len; } + + if (g_file_chunk_size == 0) { + g_file_chunk_size = (src_len / 1024) + 1; + } + if (!*inbuf) { fprintf(stderr, "inbuf is NULL!\n"); return -1; @@ -716,68 +732,73 @@ static int wait_for_all_fork_done() return 0; } -static int do_comp_and_decomp_with_full_file( - struct compress_ctx *ctx, void *inbuf, uLong src_len, void *outbuf, uLong output_sz, unsigned long *out_offset) +static void comp_and_decomp_fill_buffer_list(struct kaelz4_buffer_list *buf_list, size_t sge_len, size_t rem_len, void *start_addr, size_t offset) { - struct compress_param *param = NULL; - - while (ctx->param_buf[ctx->param_index].done != 0) { - compress_async_polling(&ctx->param_buf[ctx->param_index]); - } - param = &ctx->param_buf[ctx->param_index]; - ctx->param_index = (ctx->param_index + 1) % ctx->inflight_num; - // 单次接口调用时延 - if (g_log_level == 1) { - param->start_time = get_ns(); + size_t tmp_offset = 0; + unsigned int i = 0; + unsigned int tmp_size; + + while (rem_len) { + tmp_size = MIN(sge_len, rem_len); + buf_list->buf[i].data = start_addr + offset + tmp_offset; + if (((offset + tmp_offset) % HPAGE_SIZE) + tmp_size <= HPAGE_SIZE) { + buf_list->buf[i].buf_len = tmp_size; + } else { + buf_list->buf[i].buf_len = HPAGE_SIZE - ((offset + tmp_offset) % HPAGE_SIZE); + } + tmp_offset += buf_list->buf[i].buf_len; + rem_len -= buf_list->buf[i].buf_len; + i++; + buf_list->buf_num = i; } - param->done = 2; - param->ctx = ctx; - param->sn = ctx->sn; - param->loop_index = ctx->loop_index; +} +static void comp_and_decomp_fill_src_buf(struct compress_param *param, size_t src_len, void *start_addr, size_t offset) +{ + struct compress_ctx *ctx = param->ctx; - uLong output_sz_tmp = output_sz; - void *dst_start = outbuf + *out_offset; - param->dst.buf_num = 1; - param->dst.buf = param->dst_buf; - param->dst.buf[0].data = dst_start; - param->dst.buf[0].buf_len = output_sz_tmp; - param->src.buf_num = ctx->src_buf_num; param->src.buf = param->src_buf; - unsigned int tmp_size = src_len / param->src.buf_num; - for (int i = 0; i < param->src.buf_num - 1; i++) { - param->src.buf[i].data = inbuf + tmp_size * i; - param->src.buf[i].buf_len = tmp_size; - } - param->src.buf[param->src.buf_num - 1].data = inbuf + tmp_size * (param->src.buf_num - 1); - param->src.buf[param->src.buf_num - 1].buf_len = src_len - tmp_size * (param->src.buf_num - 1); param->src.usr_data = ctx->page_info; + unsigned int tmp_size = src_len / ctx->src_buf_num; + comp_and_decomp_fill_buffer_list(¶m->src, tmp_size, src_len, start_addr, offset); param->src_len = src_len; - param->ibuf_crc = 0; - param->obuf_crc = 0; - if (ctx->with_crc == 1) { - param->result.ibuf_crc = ¶m->ibuf_crc; - param->result.obuf_crc = ¶m->obuf_crc; - } else { - param->result.ibuf_crc = NULL; - param->result.obuf_crc = NULL; - } - param->result.user_data = param; param->result.src_size = src_len; - param->result.dst_len = output_sz_tmp; - int ret = do_real_compression(ctx, ¶m->src, (unsigned int *)&src_len, ¶m->dst, (unsigned int *)&output_sz_tmp, ¶m->result); - if (ctx->algorithm->async_compress == NULL || (ctx->compress_or_decompress == 0 && ctx->algorithm->async_decompress == NULL)) { - param->result.dst_len = output_sz_tmp; - compress_async_callback(¶m->result); +} + +void comp_and_decomp_fill_dst_buf(struct compress_param *param, size_t dst_len, void *start_addr) +{ + struct compress_ctx *ctx = param->ctx; + + param->dst.buf_num = 1; + param->dst.buf = param->dst_buf; + param->dst.buf[0].data = start_addr; + param->dst.buf[0].buf_len = dst_len; + param->tuple.buf = param->tuple_buf; + param->tuple.usr_data = ctx->tuple_page_info; + param->result.dst_len = dst_len; + + if ((ctx->is_lz77_mode && ctx->compress_or_decompress) || ctx->is_zlib) { + if (ctx->is_lz77_mode) { + dst_len = KAELZ4_compress_get_tuple_buf_len(dst_len); + } + unsigned int tmp_size = MIN(dst_len, HW_MAX_SGE_LEN); // HW_MAX_SGE_LEN: hisi_zip约束sge len不超过8M + comp_and_decomp_fill_buffer_list(¶m->tuple, tmp_size, dst_len, ctx->tuple_buf, ctx->tuple_buf_offset); + ctx->tuple_buf_offset += dst_len; + if (ctx->tuple_buf_offset > ctx->tuple_buf_len) { + printf("ctx->tuple_buf_offset[0x%lx] > ctx->tuple_buf_len[0x%lx]\n", ctx->tuple_buf_offset, ctx->tuple_buf_len); + exit(-1); + } + param->dst_buf_list = ¶m->tuple; + } else { + param->dst_buf_list = ¶m->dst; } - ctx->sn++; - return ret; } + static int do_comp_with_split_file( struct compress_ctx *ctx, void *inbuf, uLong src_len, void *outbuf, uLong output_sz, unsigned long *out_offset) { int ret = 0; unsigned int remaining = src_len; - int chunk_size = g_file_chunk_size * 1024; + size_t chunk_size = (size_t)g_file_chunk_size * 1024; void *start_buf = inbuf; while (remaining > 0) { @@ -804,31 +825,6 @@ static int do_comp_with_split_file( output_size_chunk -= output_size_chunk % 4; uLong output_sz_tmp = output_size_chunk; void *dst_start = outbuf + *out_offset; // 使用总内存里面的部分空间 - param->dst.buf_num = 1; - param->dst.buf = param->dst_buf; - param->dst.buf[0].data = dst_start; - param->dst.buf[0].buf_len = output_sz_tmp; - param->src.buf_num = ctx->src_buf_num; - param->src.buf = param->src_buf; - param->src.usr_data = ctx->page_info; - unsigned int tmp_size = chunk_len_this_loop / param->src.buf_num; - size_t tmp_offset = 0; - for (int i = 0; i < param->src.buf_num; i++) { - param->src.buf[i].data = start_buf + tmp_offset; - if (((start_buf - inbuf + tmp_offset) % HPAGE_SIZE) + tmp_size <= HPAGE_SIZE) { - param->src.buf[i].buf_len = tmp_size; - } else { - param->src.buf[i].buf_len = HPAGE_SIZE - ((start_buf - inbuf + tmp_offset) % HPAGE_SIZE); - } - tmp_offset += param->src.buf[i].buf_len; - } - - if (tmp_offset < chunk_len_this_loop) { - param->src.buf[param->src.buf_num].data = start_buf + tmp_offset; - param->src.buf[param->src.buf_num].buf_len = chunk_len_this_loop - tmp_offset; - param->src.buf_num++; - } - param->src_len = chunk_len_this_loop; param->ibuf_crc = 0; param->obuf_crc = 0; if (ctx->with_crc == 1) { @@ -839,24 +835,10 @@ static int do_comp_with_split_file( param->result.obuf_crc = NULL; } param->result.user_data = param; - param->result.src_size = chunk_len_this_loop; - param->result.dst_len = output_sz_tmp; - if (!ctx->is_lz77_mode) { - ret = do_real_compression( - ctx, ¶m->src, (unsigned int *)&chunk_len_this_loop, ¶m->dst, (unsigned int *)&output_sz_tmp, ¶m->result); - } else { - param->tuple.buf_num = 1; - param->tuple.buf = param->tuple_buf; - param->tuple.buf[0].data = ctx->tuple_buf + ctx->tuple_buf_offset; - param->tuple.buf[0].buf_len = KAELZ4_compress_get_tuple_buf_len(chunk_len_this_loop); - param->tuple.usr_data = ctx->tuple_page_info; - ctx->tuple_buf_offset += param->tuple.buf[0].buf_len; - if (ctx->tuple_buf_offset > ctx->tuple_buf_len) { - printf("ctx->tuple_buf_offset[0x%lx] > ctx->tuple_buf_len[0x%lx]\n", ctx->tuple_buf_offset, ctx->tuple_buf_len); - return -1; - } - ret = do_real_compression(ctx, ¶m->src, (unsigned int *)&chunk_len_this_loop, ¶m->tuple, (unsigned int *)&output_sz_tmp, ¶m->result); - } + comp_and_decomp_fill_src_buf(param, chunk_len_this_loop, inbuf, start_buf - inbuf); + comp_and_decomp_fill_dst_buf(param, output_sz_tmp, dst_start); + ret = do_real_compression(ctx, ¶m->src, (unsigned int *)&chunk_len_this_loop, param->dst_buf_list, + (unsigned int *)&output_sz_tmp, ¶m->result); if (ret != 0) { printf("Error: do_real_compression error. ret = %d \nexit\n", ret); return ret; @@ -877,7 +859,7 @@ static int do_comp_with_split_file( static int prepare_tuple_buf(struct compress_ctx *ctx, size_t src_len) { - size_t tuple_buf_len = KAELZ4_compress_get_tuple_buf_len(g_file_chunk_size * 1024) * (src_len / (g_file_chunk_size * 1024) + 1) * 2; + size_t tuple_buf_len = KAELZ4_compress_get_tuple_buf_len((size_t)g_file_chunk_size * 1024) * (src_len / ((size_t)g_file_chunk_size * 1024) + 1) * 4; size_t huge_page_num = tuple_buf_len * sizeof(Bytef) / HPAGE_SIZE + 1; // 大页大小为2M,申请大页时申请大小需为大页大小的整数倍 size_t total_size = huge_page_num * HPAGE_SIZE; ctx->tuple_buf = get_huge_pages(total_size); @@ -935,15 +917,11 @@ static int start_work(struct compress_ctx *ctx, const char* in_filename, const c } } - iova_map_fn usr_map = NULL; - if (ctx->src_buf_num != 1) - usr_map = get_physical_address_wrapper; + if (ctx->src_buf_num != 1 || ctx->is_zlib) + ctx->usr_map = get_physical_address_wrapper; - if (g_enable_polling_mode) { - ctx->sess = KAELZ4_create_async_compress_session(usr_map); - } else { - LZ4_async_compress_init(usr_map); - } + if (ctx->algorithm->init) + ctx->algorithm->init(ctx); uLong output_sz; if(ctx->compress_or_decompress) { // 压缩空间预估理论上不同算法各有自己的计算规则 @@ -963,7 +941,7 @@ static int start_work(struct compress_ctx *ctx, const char* in_filename, const c return -1; } - if (ctx->is_lz77_mode) { + if (ctx->is_lz77_mode || ctx->is_zlib) { if (prepare_tuple_buf(ctx, src_len) != 0) { return -1; } @@ -981,8 +959,6 @@ static int start_work(struct compress_ctx *ctx, const char* in_filename, const c } if (g_file_chunk_size != 0) { // 分片逻辑 ret = do_comp_with_split_file(ctx, inbuf, src_len, outbuf, output_sz, &out_offset); - } else { // 原始文件整体丢入 - ret = do_comp_and_decomp_with_full_file(ctx, inbuf, src_len, outbuf, output_sz, &out_offset); } if(ret < 0) { printf("Error: 压缩解压失败 ret=%d \n", ret); @@ -1047,9 +1023,9 @@ static int start_work_decompress( struct fragment_metadata *loaded_fragments = NULL; unsigned int fragment_count; load_metadata_from_file(in_filename, &loaded_fragments, &fragment_count); - // // // 打印读取的元数据 + // 打印读取的元数据 // for (unsigned int i = 0; i < fragment_count; i++) { - // printf("Fragment %d: Offset = %u, Length = %u\n", i + 1, loaded_fragments[i].offset, loaded_fragments[i].len); + // printf("Fragment %d: Offset = %u, Length = %u chunk_len = 0x%lx\n", i + 1, loaded_fragments[i].offset, loaded_fragments[i].len, loaded_fragments[i].src_chunk_len); // } ctx->src_buf = inbuf; @@ -1076,11 +1052,17 @@ static int start_work_decompress( } } - if (ctx->src_buf_num != 1) - LZ4_async_compress_init(get_physical_address_wrapper); - else - LZ4_async_compress_init(NULL); + if (ctx->src_buf_num != 1 || ctx->is_zlib) + ctx->usr_map = get_physical_address_wrapper; + + if (ctx->algorithm->init) + ctx->algorithm->init(ctx); + if (ctx->is_zlib) { + if (prepare_tuple_buf(ctx, loaded_fragments[0].src_chunk_len * fragment_count) != 0) { + return -1; + } + } struct timeval start, stop; gettimeofday(&start, NULL); @@ -1089,6 +1071,7 @@ static int start_work_decompress( for (j = 0; j < ctx->loop_times; j++) { if (j > 0) { // 为第1次之后的循环的产物复用空间 out_offset = output_sz; + ctx->tuple_buf_offset = ctx->tuple_buf_len / 2; } if (g_file_chunk_size != 0) { // 分片逻辑 for (int k = 0; k < fragment_count; k++) { @@ -1111,17 +1094,8 @@ static int start_work_decompress( param->sn = ctx->sn; param->loop_index = j; - size_t output_size_chunk = MIN(this_src_len * 300, 1*1024*1024*1024); // 预估本次压缩后产物的长度 + size_t output_size_chunk = loaded_fragments[k].src_chunk_len; // 预估本次压缩后产物的长度 void *dst_start = outbuf + out_offset; // 使用总内存里面的部分空间 - param->dst.buf_num = 1; - param->dst.buf = param->dst_buf; - param->dst.buf[0].data = dst_start; - param->dst.buf[0].buf_len = output_size_chunk; - param->src.buf_num = 1; - param->src.buf = param->src_buf; - param->src.buf[0].data = inbuf + this_offset; - param->src.buf[0].buf_len = this_src_len; - param->src_len = this_src_len; param->ibuf_crc = 0; param->obuf_crc = 0; if (ctx->with_crc == 1) { @@ -1132,11 +1106,10 @@ static int start_work_decompress( param->result.obuf_crc = NULL; } param->result.user_data = param; - param->result.src_size = this_src_len; - param->result.dst_len = output_size_chunk; - ret = do_real_compression(ctx, ¶m->src, (unsigned int *)&this_src_len, - ¶m->dst, (unsigned int *)&output_size_chunk, ¶m->result); - + comp_and_decomp_fill_src_buf(param, this_src_len, inbuf, this_offset); + comp_and_decomp_fill_dst_buf(param, output_size_chunk, dst_start); + ret = do_real_compression(ctx, ¶m->src, (unsigned int *)&this_src_len, param->dst_buf_list, + (unsigned int *)&output_size_chunk, ¶m->result); if(ret != 0) { printf("Error: sn %d len=%d;offset=%lx. end=%lx.do_real_compression decomp error. ret = %d \nexit\n ", param->sn, loaded_fragments[k].len, this_offset, this_offset + loaded_fragments[k].len, ret); @@ -1150,9 +1123,6 @@ static int start_work_decompress( } ctx->sn++; - if (ctx->compress_or_decompress == 0 && ctx->algorithm->async_decompress && g_file_chunk_size) { - output_size_chunk = g_file_chunk_size * 1024 * 2; - } out_offset += output_size_chunk; // 偏移本次解压实际使用的空间 } } @@ -1350,7 +1320,11 @@ int round_trip_fuzztest(uint32_t RDGseed) ctx.loop_times = 1; multi = 1; } - LZ4_async_compress_init(NULL); + ctx.usr_map = NULL; + + if (ctx.algorithm->init) + ctx.algorithm->init(&ctx); + if (!ctx.compress_or_decompress && g_file_chunk_size > 0) { // 如果是分片解压,单独处理 ret = start_work_decompress(&ctx, in_filename, out_filename, multi, window_bits, level); } else { @@ -1549,7 +1523,7 @@ int main(int argc, char **argv) compress_ctx_init(ctx, compress, inflight_num, chunk_len, algorithm, is_test_crc); ctx->loop_times = loop_times; - if (!ctx->compress_or_decompress && g_file_chunk_size > 0 && threadNum == 1) { // 如果是分片解压,单独处理 + if (!ctx->compress_or_decompress && threadNum == 1) { // 如果是分片解压,单独处理 ret = start_work_decompress(ctx, in_filename, out_filename, multi, window_bits, level); } else { if (threadNum > 1) { @@ -1578,10 +1552,8 @@ int main(int argc, char **argv) } } - if (ctx->sess) - KAELZ4_destroy_async_compress_session(ctx->sess); - else - LZ4_teardown_async_compress(); + if (ctx->algorithm->cleanup) + ctx->algorithm->cleanup(ctx); compress_ctx_destory(ctx); free(ctx); -- Gitee