diff --git a/Makefile b/Makefile index 921584238502fa1e3c608ab8aa316f94ef617ec0..d07f1d20112424588b4d20b1016cb4578714733a 100644 --- a/Makefile +++ b/Makefile @@ -91,6 +91,11 @@ CFLAGS_O2 = $(MOREFLAGS) $(CODE_FLAGS) $(OPT_FLAGS_O2) $(DEFINES) LDFLAGS += $(MOREFLAGS) -lz LDFLAGS += -lzstd LDFLAGS += -llz4 +ifdef ENABLE_QAT +LDFLAGS += -lqatzip +LDFLAGS += -lqatseqprod +CFLAGS += -DENABLE_QAT +endif LZO_FILES = lzo/lzo1.o lzo/lzo1a.o lzo/lzo1a_99.o lzo/lzo1b_1.o lzo/lzo1b_2.o lzo/lzo1b_3.o lzo/lzo1b_4.o lzo/lzo1b_5.o LZO_FILES += lzo/lzo1b_6.o lzo/lzo1b_7.o lzo/lzo1b_8.o lzo/lzo1b_9.o lzo/lzo1b_99.o lzo/lzo1b_9x.o lzo/lzo1b_cc.o diff --git a/README.md b/README.md index 77a505d3e16001d67d48c14d536b70bd0393d122..a1b5c0b71b1d9d774136d998828664bc1442db58 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,20 @@ The directory where the CUDA compiler and libraries are available can be passed make CUDA_BASE=/usr/local/cuda ``` +QAT support +------------------------- + +If QAT is available, lzbench supports additional compressors: + - [qatzip](https://github.com/intel/QATzip) - QAT default compressor, using defalte algorithm + - [qatlz4](https://github.com/intel/QATzip) - QAT compressor, using lz4 algorithm + - [qatgzip](https://github.com/intel/QATzip) - similar to the reference `qatzip` benchmark, using GPU memory + - [qatzstd](https://github.com/intel/QAT-ZSTD-Plugin) ZSTD compressor, using QAT hardware + +The QAT compiler is available which can be passed to `make` via the `ENABLE_QAT` variable, *e.g.*: +``` +make ENABLE_QAT=1 +``` + Benchmarks ------------------------- diff --git a/_lzbench/compressors.cpp b/_lzbench/compressors.cpp index 487dd8f59c32948ebb815b9d0d43fd6dba7e2557..db29253b39bdc35f8e37213b40d0cd00748991bb 100644 --- a/_lzbench/compressors.cpp +++ b/_lzbench/compressors.cpp @@ -2,6 +2,10 @@ #include #include #include // memcpy +#ifdef ENABLE_QAT +#include "qatzip.h" +#include "qatseqprod.h" +#endif #ifndef MAX #define MAX(a,b) ((a)>(b))?(a):(b) @@ -1887,6 +1891,196 @@ int64_t lzbench_zstd_simple_decompress(char *inbuf, size_t insize, char *outbuf, #endif +#ifdef ENABLE_QAT +int64_t lzbench_qat_zip_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*) +{ + // 初始化QAT压缩上下文 + QzSession_T sess = {0}; + int rc = QZ_OK; + rc = qzInit(&sess, 1); + QzSessionParamsDeflate_T params; + qzGetDefaultsDeflate(¶ms); + params.common_params.hw_buff_sz = 128 * 1024; + params.common_params.comp_lvl = level; + params.common_params.direction = QZ_DIR_COMPRESS; + params.common_params.comp_algorithm = QZ_DEFLATE; + params.common_params.sw_backup = QZ_SW_BACKUP_BIT_POSITION; + + rc = qzSetupSessionDeflate(&sess, ¶ms); + + unsigned int src_len = (unsigned int)insize; + unsigned int dest_len = (unsigned int)outsize; + // printf("size before compress: %u ", src_len); + rc = qzCompress(&sess, (unsigned char *)inbuf, &src_len, (unsigned char *)outbuf, &dest_len, 1); + // printf("size after compress: %u", dest_len); + outsize = (size_t)dest_len; + return dest_len; +} + +int64_t lzbench_qat_zip_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t, size_t, char*) +{ + // 初始化QAT压缩上下文 + QzSession_T sess = {0}; + int rc = QZ_OK; + rc = qzInit(&sess, 1); + QzSessionParamsDeflate_T params; + qzGetDefaultsDeflate(¶ms); + params.common_params.hw_buff_sz = 128 * 1024; + params.common_params.direction = QZ_DIR_DECOMPRESS; + params.common_params.comp_algorithm = QZ_DEFLATE; + params.common_params.sw_backup = QZ_SW_BACKUP_BIT_POSITION; + + rc = qzSetupSessionDeflate(&sess, ¶ms); + unsigned int src_len = (unsigned int)insize; + unsigned int dest_len = (unsigned int)outsize; + // printf("size before decompress:%d", src_len); + rc = qzDecompress(&sess, (unsigned char *)inbuf, &src_len, (unsigned char *)outbuf, &dest_len); + // printf("size after decompress: %d", dest_len); + return outsize; +} + +int64_t lzbench_qat_lz4_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*) +{ + // 初始化QAT压缩上下文 + QzSession_T sess = {0}; + int rc = QZ_OK; + rc = qzInit(&sess, 1); + QzSessionParamsLZ4_T params; + qzGetDefaultsLZ4(¶ms); + // params.data_fmt = LZ4S_BK; // QZ_DEFLATE_4B 、 QZIP_LZ4_FH 、 QZ_DEFLATE_RAW 、 QZIP_LZ4S_BK + params.common_params.hw_buff_sz = 128 * 1024; + params.common_params.comp_lvl = level; + params.common_params.direction = QZ_DIR_COMPRESS; + params.common_params.comp_algorithm = QZ_LZ4; + params.common_params.sw_backup = QZ_SW_BACKUP_BIT_POSITION; + + rc = qzSetupSessionLZ4(&sess, ¶ms); + unsigned int src_len = (unsigned int)insize; + unsigned int dest_len = (unsigned int)outsize; + // printf("size before lz4 compress: %u ", src_len); + rc = qzCompress(&sess, (unsigned char *)inbuf, &src_len, (unsigned char *)outbuf, &dest_len, 1); + // printf("size after lze compress: %u", dest_len); + outsize = (size_t)dest_len; + return outsize; +} + +int64_t lzbench_qat_lz4_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t, size_t, char*) +{ + // 初始化QAT压缩上下文 + QzSession_T sess = {0}; + int rc = QZ_OK; + rc = qzInit(&sess, 1); + QzSessionParamsLZ4_T params; + qzGetDefaultsLZ4(¶ms); + // params.data_fmt = LZ4S_BK; // QZ_DEFLATE_4B + params.common_params.hw_buff_sz = 128 * 1024; + params.common_params.direction = QZ_DIR_DECOMPRESS; + params.common_params.comp_algorithm = QZ_LZ4; + params.common_params.sw_backup = QZ_SW_BACKUP_BIT_POSITION; + + rc = qzSetupSessionLZ4(&sess, ¶ms); + unsigned int src_len = (unsigned int)insize; + unsigned int dest_len = (unsigned int)outsize; + rc = qzDecompress(&sess, (unsigned char *)inbuf, &src_len, (unsigned char *)outbuf, &dest_len); + outsize = (size_t)dest_len; + return outsize; +} + +int64_t lzbench_qat_gzip_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*) +{ + // 初始化QAT压缩上下文 + QzSession_T sess = {0}; + int rc = QZ_OK; + rc = qzInit(&sess, 1); + QzSessionParams_T params; + qzGetDefaults(¶ms); + params.data_fmt = QZ_DEFLATE_GZIP; // set gzip format + params.hw_buff_sz = 128 * 1024; + params.comp_lvl = level; + params.direction = QZ_DIR_COMPRESS; + params.comp_algorithm = QZ_DEFLATE; + params.sw_backup = QZ_SW_BACKUP_BIT_POSITION; + + rc = qzSetupSession(&sess, ¶ms); + unsigned int src_len = (unsigned int)insize; + unsigned int dest_len = (unsigned int)outsize; + // printf("size before gzip compress: %u ", src_len); + rc = qzCompress(&sess, (unsigned char *)inbuf, &src_len, (unsigned char *)outbuf, &dest_len, 1); + // printf("size after gzip compress: %u", dest_len); + outsize = (size_t)dest_len; + return outsize; +} + +int64_t lzbench_qat_gzip_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t, size_t, char*) +{ + // 初始化QAT压缩上下文 + QzSession_T sess = {0}; + int rc = QZ_OK; + rc = qzInit(&sess, 1); + QzSessionParams_T params; + qzGetDefaults(¶ms); + params.data_fmt = QZ_DEFLATE_GZIP; + params.hw_buff_sz = 128 * 1024; + params.direction = QZ_DIR_DECOMPRESS; + params.comp_algorithm = QZ_DEFLATE; + params.sw_backup = QZ_SW_BACKUP_BIT_POSITION; + + rc = qzSetupSession(&sess, ¶ms); + unsigned int src_len = (unsigned int)insize; + unsigned int dest_len = (unsigned int)outsize; + rc = qzDecompress(&sess, (unsigned char *)inbuf, &src_len, (unsigned char *)outbuf, &dest_len); + outsize = (size_t)dest_len; + return outsize; +} + +ZSTD_CCtx* getZc(){ + static ZSTD_CCtx* zc = NULL; // 在函数作用域内的静态变量,单例模式 + if(zc == NULL) { + zc = ZSTD_createCCtx(); + /* Start QAT device, start QAT device at any + time before compression job started */ + QZSTD_startQatDevice(); + /* Create sequence producer state for QAT sequence producer */ + void *sequenceProducerState = QZSTD_createSeqProdState(); + /* register qatSequenceProducer */ + ZSTD_registerSequenceProducer( + zc, + sequenceProducerState, + qatSequenceProducer + ); + /* Enable sequence producer fallback */ + ZSTD_CCtx_setParameter(zc, ZSTD_c_enableSeqProducerFallback, 1); + + } + return zc; +} + +int64_t lzbench_qat_zstd_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*) +{ + + ZSTD_CCtx* zc =getZc(); // 获取初始化上下文 + + ZSTD_compress2(zc, outbuf, outsize, inbuf, insize); + + // /* Free sequence producer state */ + // QZSTD_freeSeqProdState(sequenceProducerState); + // /* Please call QZSTD_stopQatDevice before + // QAT is no longer used or the process exits */ + // QZSTD_stopQatDevice(); + + return outsize; +} + +int64_t lzbench_qat_zstd_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t, size_t, char*) +{ + + ZSTD_DCtx *const zdc = ZSTD_createDCtx(); + ZSTD_decompressDCtx(zdc, outbuf, outsize, inbuf, insize); + + return outsize; +} +#endif + #ifdef BENCH_HAS_NAKAMICHI #include "nakamichi/nakamichi.h" diff --git a/_lzbench/compressors.h b/_lzbench/compressors.h index 60cf69e16988e2769bf98b1f14631e542d1f92c1..18085a2cc6f2a6262b636e42a15aaf2eb5aec546 100644 --- a/_lzbench/compressors.h +++ b/_lzbench/compressors.h @@ -495,6 +495,19 @@ int64_t lzbench_return_0(char *inbuf, size_t insize, char *outbuf, size_t outsiz #define lzbench_zstd_LDM_compress NULL #endif +#ifdef ENABLE_QAT + int64_t lzbench_qat_zip_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*); + int64_t lzbench_qat_zip_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t, size_t, char*); + + int64_t lzbench_qat_lz4_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*); + int64_t lzbench_qat_lz4_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t, size_t, char*); + + int64_t lzbench_qat_gzip_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*); + int64_t lzbench_qat_gzip_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t, size_t, char*); + + int64_t lzbench_qat_zstd_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*); + int64_t lzbench_qat_zstd_decompress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t, size_t, char*); +#endif #ifdef BENCH_HAS_NAKAMICHI int64_t lzbench_nakamichi_compress(char *inbuf, size_t insize, char *outbuf, size_t outsize, size_t level, size_t, char*); diff --git a/_lzbench/lzbench.h b/_lzbench/lzbench.h index 80320e5e4b1eb3507417767a3c1092e40f2094fd..93fd958fa4b1d341a1a4e070722f52f29d6f8439 100644 --- a/_lzbench/lzbench.h +++ b/_lzbench/lzbench.h @@ -136,7 +136,11 @@ typedef struct -#define LZBENCH_COMPRESSOR_COUNT 75 +#ifdef ENABLE_QAT + #define LZBENCH_COMPRESSOR_COUNT 79 +#else + #define LZBENCH_COMPRESSOR_COUNT 75 +#endif static const compressor_desc_t comp_desc[LZBENCH_COMPRESSOR_COUNT] = { @@ -215,6 +219,12 @@ static const compressor_desc_t comp_desc[LZBENCH_COMPRESSOR_COUNT] = { "nakamichi", "okamigan", 0, 0, 0, 0, lzbench_nakamichi_compress, lzbench_nakamichi_decompress, NULL, NULL }, { "cudaMemcpy", "", 0, 0, 0, 0, lzbench_cuda_return_0, lzbench_cuda_memcpy, lzbench_cuda_init, lzbench_cuda_deinit }, { "nvcomp_lz4", "1.2.2", 0, 5, 0, 0, lzbench_nvcomp_compress, lzbench_nvcomp_decompress, lzbench_nvcomp_init, lzbench_nvcomp_deinit }, +#ifdef ENABLE_QAT + { "qatzip", "1.0.0", 1, 9, 0, 0, lzbench_qat_zip_compress, lzbench_qat_zip_decompress, NULL, NULL }, + { "qatlz4", "1.0.0", 1, 9, 0, 0, lzbench_qat_lz4_compress, lzbench_qat_lz4_decompress, NULL, NULL }, + { "qatgzip", "1.0.0", 1, 9, 0, 0, lzbench_qat_gzip_compress, lzbench_qat_gzip_decompress, NULL, NULL }, + { "qatzstd", "1.0.0", 1, 9, 0, 0, lzbench_qat_zstd_compress, lzbench_qat_zstd_decompress, NULL, NULL }, +#endif };