diff --git a/1001-zlib-anolis-Neon-Optimized-hash-chain-rebase.patch b/1001-zlib-anolis-Neon-Optimized-hash-chain-rebase.patch deleted file mode 100644 index e6f909f462b1c134815f4eee26e6c703a3774ab0..0000000000000000000000000000000000000000 --- a/1001-zlib-anolis-Neon-Optimized-hash-chain-rebase.patch +++ /dev/null @@ -1,170 +0,0 @@ -From f0fd8c553fa024c599f4aff65d7c603ceeaa6a58 Mon Sep 17 00:00:00 2001 -From: Adenilson Cavalcanti -Date: Mon, 9 Apr 2018 13:52:17 -0700 -Subject: [PATCH 1/3] Neon-Optimized hash chain rebase - -This should help with compression of data, using NEON instructions -(therefore useful for ARMv7/ARMv8). - -Original patch by Jun He. ---- - CMakeLists.txt | 18 ++++++++ - contrib/arm/neon_slide_hash.h | 84 +++++++++++++++++++++++++++++++++++ - deflate.c | 7 +++ - 3 files changed, 109 insertions(+) - create mode 100644 contrib/arm/neon_slide_hash.h - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 0fe939d..e9a74e9 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -136,6 +136,24 @@ if(CMAKE_COMPILER_IS_GNUCC) - set(ZLIB_ASMS contrib/amd64/amd64-match.S) - endif () - -+ if(ARM_NEON) -+ list(REMOVE_ITEM ZLIB_SRCS inflate.c) -+ set(ZLIB_ARM_NEON_HDRS -+ contrib/arm/chunkcopy.h -+ contrib/arm/inffast_chunk.h -+ contrib/arm/neon_slide_hash.h) -+ set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c) -+ add_definitions(-DARM_NEON) -+ set(COMPILER ${CMAKE_C_COMPILER}) -+ # NEON is mandatory in ARMv8. -+ if(${COMPILER} MATCHES "aarch64") -+ set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -march=armv8-a) -+ # But it was optional for ARMv7. -+ elseif(${COMPILER} MATCHES "arm") -+ set_source_files_properties(${ZLIB_ARM_NEON} PROPERTIES LANGUAGE C COMPILE_FLAGS -mfpu=neon) -+ endif() -+ endif() -+ - if(ZLIB_ASMS) - add_definitions(-DASMV) - set_source_files_properties(${ZLIB_ASMS} PROPERTIES LANGUAGE C COMPILE_FLAGS -DNO_UNDERLINE) -diff --git a/contrib/arm/neon_slide_hash.h b/contrib/arm/neon_slide_hash.h -new file mode 100644 -index 0000000..0daffa1 ---- /dev/null -+++ b/contrib/arm/neon_slide_hash.h -@@ -0,0 +1,84 @@ -+/* Copyright (C) 1995-2011, 2016 Mark Adler -+ * Copyright (C) 2017 ARM Holdings Inc. -+ * Authors: Adenilson Cavalcanti -+ * Jun He -+ * This software is provided 'as-is', without any express or implied -+ * warranty. In no event will the authors be held liable for any damages -+ * arising from the use of this software. -+ * Permission is granted to anyone to use this software for any purpose, -+ * including commercial applications, and to alter it and redistribute it -+ * freely, subject to the following restrictions: -+ * 1. The origin of this software must not be misrepresented; you must not -+ * claim that you wrote the original software. If you use this software -+ * in a product, an acknowledgment in the product documentation would be -+ * appreciated but is not required. -+ * 2. Altered source versions must be plainly marked as such, and must not be -+ * misrepresented as being the original software. -+ * 3. This notice may not be removed or altered from any source distribution. -+ */ -+#ifndef __NEON_SLIDE_HASH__ -+#define __NEON_SLIDE_HASH__ -+ -+#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) -+#include "deflate.h" -+#include -+ -+inline static void neon_slide_hash(deflate_state *s) -+{ -+ /* -+ * This is ASIMD implementation for hash table rebase -+ * it assumes: -+ * 1. hash chain offset (Pos) is 2 bytes -+ * 2. hash table size is multiple*128 bytes -+ * #1 should be true as Pos is defined as "ush" -+ * #2 should be true as hash_bits are greater that 7 -+ */ -+ unsigned n, m; -+ unsigned short wsize = s->w_size; -+ uint16x8_t v, *p; -+ size_t size; -+ -+ size = s->hash_size*sizeof(s->head[0]); -+ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err"); -+ -+ Assert(sizeof(Pos) == 2, "Wrong Pos size"); -+ -+ /* slide s->head */ -+ v = vdupq_n_u16(wsize); -+ p = (uint16x8_t *)(s->head); -+ n = size / (sizeof(uint16x8_t) * 8); -+ do { -+ p[0] = vqsubq_u16(p[0], v); -+ p[1] = vqsubq_u16(p[1], v); -+ p[2] = vqsubq_u16(p[2], v); -+ p[3] = vqsubq_u16(p[3], v); -+ p[4] = vqsubq_u16(p[4], v); -+ p[5] = vqsubq_u16(p[5], v); -+ p[6] = vqsubq_u16(p[6], v); -+ p[7] = vqsubq_u16(p[7], v); -+ p += 8; -+ } while (--n); -+#ifndef FASTEST -+ /* slide s->prev */ -+ size = wsize*sizeof(s->prev[0]); -+ -+ Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err"); -+ -+ p = (uint16x8_t *)(s->prev); -+ n = size / (sizeof(uint16x8_t) * 8); -+ do { -+ p[0] = vqsubq_u16(p[0], v); -+ p[1] = vqsubq_u16(p[1], v); -+ p[2] = vqsubq_u16(p[2], v); -+ p[3] = vqsubq_u16(p[3], v); -+ p[4] = vqsubq_u16(p[4], v); -+ p[5] = vqsubq_u16(p[5], v); -+ p[6] = vqsubq_u16(p[6], v); -+ p[7] = vqsubq_u16(p[7], v); -+ p += 8; -+ } while (--n); -+#endif -+} -+ -+#endif -+#endif -diff --git a/deflate.c b/deflate.c -index 1ec7614..36f99ac 100644 ---- a/deflate.c -+++ b/deflate.c -@@ -50,6 +50,9 @@ - /* @(#) $Id$ */ - - #include "deflate.h" -+#if __ARM_NEON -+#include "contrib/arm/neon_slide_hash.h" -+#endif - - const char deflate_copyright[] = - " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; -@@ -201,6 +204,9 @@ local const config configuration_table[10] = { - local void slide_hash(s) - deflate_state *s; - { -+#if ARM_NEON -+ return neon_slide_hash(s); -+#else - unsigned n, m; - Posf *p; - uInt wsize = s->w_size; -@@ -222,6 +228,7 @@ local void slide_hash(s) - */ - } while (--n); - #endif -+#endif - } - - /* ========================================================================= */ --- -2.19.0 - diff --git a/1002-zlib-anolis-Porting-optimized-longest_match.patch b/1002-zlib-anolis-Porting-optimized-longest_match.patch deleted file mode 100644 index 7fda12ec7f5b6bb4cf970e11a1a2b2e440edb361..0000000000000000000000000000000000000000 --- a/1002-zlib-anolis-Porting-optimized-longest_match.patch +++ /dev/null @@ -1,218 +0,0 @@ -From 17a154db6774a4acf347cfc5189eaf2cd675e696 Mon Sep 17 00:00:00 2001 -From: Adenilson Cavalcanti -Date: Mon, 9 Apr 2018 15:14:19 -0700 -Subject: [PATCH 2/3] Porting optimized longest_match - -This patch was contributed to zlib-ng and features an improved longest_match -function using the most distant hash code to reduce number of checks -(see: http://www.gildor.org/en/projects/zlib). - -Original patch by Jun He. ---- - CMakeLists.txt | 3 +- - contrib/arm/arm_longest_match.h | 142 ++++++++++++++++++++++++++++++++ - deflate.c | 11 ++- - 3 files changed, 152 insertions(+), 4 deletions(-) - create mode 100644 contrib/arm/arm_longest_match.h - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index e9a74e9..3826eba 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -141,7 +141,8 @@ if(CMAKE_COMPILER_IS_GNUCC) - set(ZLIB_ARM_NEON_HDRS - contrib/arm/chunkcopy.h - contrib/arm/inffast_chunk.h -- contrib/arm/neon_slide_hash.h) -+ contrib/arm/neon_slide_hash.h -+ contrib/arm/arm_longest_match.h) - set(ZLIB_ARM_NEON contrib/arm/inflate.c contrib/arm/inffast_chunk.c) - add_definitions(-DARM_NEON) - set(COMPILER ${CMAKE_C_COMPILER}) -diff --git a/contrib/arm/arm_longest_match.h b/contrib/arm/arm_longest_match.h -new file mode 100644 -index 0000000..9e7083f ---- /dev/null -+++ b/contrib/arm/arm_longest_match.h -@@ -0,0 +1,142 @@ -+/* Copyright (C) 1995-2011, 2016 Mark Adler -+ * Copyright (C) 2017 ARM Holdings Inc. -+ * Authors: Adenilson Cavalcanti -+ * Jun He -+ * This software is provided 'as-is', without any express or implied -+ * warranty. In no event will the authors be held liable for any damages -+ * arising from the use of this software. -+ * Permission is granted to anyone to use this software for any purpose, -+ * including commercial applications, and to alter it and redistribute it -+ * freely, subject to the following restrictions: -+ * 1. The origin of this software must not be misrepresented; you must not -+ * claim that you wrote the original software. If you use this software -+ * in a product, an acknowledgment in the product documentation would be -+ * appreciated but is not required. -+ * 2. Altered source versions must be plainly marked as such, and must not be -+ * misrepresented as being the original software. -+ * 3. This notice may not be removed or altered from any source distribution. -+ */ -+#ifndef __ARM_LONGEST__MATCH__ -+#define __ARM_LONGEST__MATCH__ -+ -+#if defined(ARM_NEON) -+#include "deflate.h" -+#include -+static inline long get_match_len(const unsigned char *a, const unsigned char *b, long max) -+{ -+ register int len = 0; -+ register unsigned long xor = 0; -+ register int check_loops = max/sizeof(unsigned long); -+ while(check_loops-- > 0) { -+ xor = (*(unsigned long *)(a+len)) ^ (*(unsigned long *)(b+len)); -+ if (xor) break; -+ len += sizeof(unsigned long); -+ } -+ if (0 == xor) { -+ while (len < max) { -+ if (a[len] != b[len]) break; -+ len++; -+ } -+ return len; -+ } -+ xor = __builtin_ctzl(xor)>>3; -+ return len + xor; -+} -+ -+/* -+ * This implementation is based on algorithm described at: -+ * http://www.gildor.org/en/projects/zlib -+ * It uses the hash chain indexed by the most distant hash code to -+ * reduce number of checks. -+ * This also eliminates the those unnecessary check loops in legacy -+ * longest_match's do..while loop if the "most distant code" is out -+ * of search buffer -+ * -+ */ -+static inline unsigned arm_longest_match(deflate_state *const s, IPos cur_match) { -+ unsigned chain_length = s->max_chain_length;/* max hash chain length */ -+ unsigned char *scan = s->window + s->strstart; /* current string */ -+ unsigned char *match; /* matched string */ -+ unsigned int len; /* length of current match */ -+ unsigned int best_len = s->prev_length; /* best match length so far */ -+ unsigned int nice_match = s->nice_match; /* stop if match long enough */ -+ IPos limit = s->strstart > (IPos)MAX_DIST(s) ? -+ s->strstart - (IPos)MAX_DIST(s) : 0; -+ /* Stop when cur_match becomes <= limit. To simplify the code, -+ * we prevent matches with the string of window index 0. -+ */ -+ int offset = 0; /* offset of the head[most_distant_hash] from IN cur_match */ -+ Pos *prev = s->prev; -+ unsigned int wmask = s->w_mask; -+ unsigned char *scan_buf_base = s->window; -+ -+ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. -+ * It is easy to get rid of this optimization if necessary. -+ */ -+ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); -+ -+ /* Do not look for matches beyond the end of the input. This is necessary -+ * to make deflate deterministic. -+ */ -+ if ((unsigned int)nice_match > s->lookahead) nice_match = s->lookahead; -+ -+ Assert((unsigned long)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); -+ -+ /* find most distant hash code for lazy_match */ -+ if (best_len > MIN_MATCH) { -+ /* search for most distant hash code */ -+ int i; -+ uint16_t hash = 0; -+ IPos pos; -+ -+ UPDATE_HASH(s, hash, scan[1]); -+ UPDATE_HASH(s, hash, scan[2]); -+ for (i = 3; i <= best_len; i++) { -+ UPDATE_HASH(s, hash, scan[i]); -+ /* get head IPos of hash calced by scan[i-2..i] */ -+ pos = s->head[hash]; -+ /* compare it to current "farthest hash" IPos */ -+ if (pos <= cur_match) { -+ /* we have a new "farthest hash" now */ -+ offset = i - 2; -+ cur_match = pos; -+ } -+ } -+ -+ /* update variables to correspond offset */ -+ limit += offset; -+ /* -+ * check if the most distant code's offset is out of search buffer -+ * if it is true, then this means scan[offset..offset+2] are not -+ * presented in the search buffer. So we just return best_len -+ * we've found. -+ */ -+ if (cur_match < limit) return best_len; -+ -+ scan_buf_base -= offset; -+ /* reduce hash search depth based on best_len */ -+ chain_length /= best_len - MIN_MATCH; -+ } -+ -+ do { -+ Assert(cur_match < s->strstart, "no future"); -+ -+ /* Determine matched length at current pos */ -+ match = scan_buf_base + cur_match; -+ len = get_match_len(match, scan, MAX_MATCH); -+ -+ if (len > best_len) { -+ /* found longer string */ -+ s->match_start = cur_match - offset; -+ best_len = len; -+ /* good enough? */ -+ if (len >= nice_match) break; -+ } -+ /* move to prev pos in this hash chain */ -+ } while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length != 0); -+ -+ return (best_len <= s->lookahead)? best_len : s->lookahead; -+} -+ -+#endif -+#endif -diff --git a/deflate.c b/deflate.c -index 36f99ac..4c42259 100644 ---- a/deflate.c -+++ b/deflate.c -@@ -50,9 +50,6 @@ - /* @(#) $Id$ */ - - #include "deflate.h" --#if __ARM_NEON --#include "contrib/arm/neon_slide_hash.h" --#endif - - const char deflate_copyright[] = - " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; -@@ -196,6 +193,11 @@ local const config configuration_table[10] = { - s->head[s->hash_size-1] = NIL; \ - zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); - -+#if defined(ARM_NEON) -+#include "contrib/arm/arm_longest_match.h" -+#include "contrib/arm/neon_slide_hash.h" -+#endif -+ - /* =========================================================================== - * Slide the hash table when sliding the window down (could be avoided with 32 - * bit values at the expense of memory usage). We slide even when level == 0 to -@@ -1244,6 +1246,9 @@ local uInt longest_match(s, cur_match) - deflate_state *s; - IPos cur_match; /* current match */ - { -+#if defined(ARM_NEON) -+ return arm_longest_match(s, cur_match); -+#endif - unsigned chain_length = s->max_chain_length;/* max hash chain length */ - register Bytef *scan = s->window + s->strstart; /* current string */ - register Bytef *match; /* matched string */ --- -2.19.0 - diff --git a/1003-zlib-anolis-arm64-specific-build-patch.patch b/1003-zlib-anolis-arm64-specific-build-patch.patch deleted file mode 100644 index b8b6b55a9fd02ee2cac9299146d015ffa72ceba5..0000000000000000000000000000000000000000 --- a/1003-zlib-anolis-arm64-specific-build-patch.patch +++ /dev/null @@ -1,115 +0,0 @@ -From e0be75f8dce27a4e32196529df2a08dca791a286 Mon Sep 17 00:00:00 2001 -From: Jeremy Linton -Date: Fri, 6 Apr 2018 11:46:42 -0500 -Subject: [PATCH 3/3] arm64 specific build patch - ---- - Makefile.in | 19 ++++++++++++------- - configure | 2 +- - contrib/minizip/zip.c | 6 ++++-- - 3 files changed, 17 insertions(+), 10 deletions(-) - -diff --git a/Makefile.in b/Makefile.in -index 5a77949..9f088e5 100644 ---- a/Makefile.in -+++ b/Makefile.in -@@ -57,7 +57,7 @@ SRCDIR= - ZINC= - ZINCOUT=-I. - --OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inflate.o inftrees.o trees.o zutil.o -+OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inffast.o inflate.o inftrees.o trees.o zutil.o - OBJG = compress.o uncompr.o gzclose.o gzlib.o gzread.o gzwrite.o - OBJC = $(OBJZ) $(OBJG) - -@@ -163,16 +163,16 @@ crc32.o: $(SRCDIR)crc32.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c - - deflate.o: $(SRCDIR)deflate.c -- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c -+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)deflate.c - - infback.o: $(SRCDIR)infback.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)infback.c - - inffast.o: $(SRCDIR)inffast.c -- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inffast.c -+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)inffast.c - - inflate.o: $(SRCDIR)inflate.c -- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inflate.c -+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -c -o $@ $(SRCDIR)inflate.c - - inftrees.o: $(SRCDIR)inftrees.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inftrees.c -@@ -214,7 +214,7 @@ crc32.lo: $(SRCDIR)crc32.c - - deflate.lo: $(SRCDIR)deflate.c - -@mkdir objs 2>/dev/null || test -d objs -- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c - -@mv objs/deflate.o $@ - - infback.lo: $(SRCDIR)infback.c -@@ -222,14 +222,19 @@ infback.lo: $(SRCDIR)infback.c - $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/infback.o $(SRCDIR)infback.c - -@mv objs/infback.o $@ - -+arminffast.lo: $(SRCDIR)contrib/arm/inffast_chunk.c $(SRCDIR)inffast.c -+ -@mkdir objs 2>/dev/null || test -d objs -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/arminffast.o $(SRCDIR)contrib/arm/inffast_chunk.c -+ -@mv objs/arminffast.o $@ -+ - inffast.lo: $(SRCDIR)inffast.c - -@mkdir objs 2>/dev/null || test -d objs -- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c - -@mv objs/inffast.o $@ - - inflate.lo: $(SRCDIR)inflate.c - -@mkdir objs 2>/dev/null || test -d objs -- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/arm -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c - -@mv objs/inflate.o $@ - - inftrees.lo: $(SRCDIR)inftrees.c -diff --git a/configure b/configure -index e974d1f..0c5f837 100755 ---- a/configure -+++ b/configure -@@ -23,7 +23,7 @@ SRCDIR=`dirname $0` - if test $SRCDIR = "."; then - ZINC="" - ZINCOUT="-I." -- SRCDIR="" -+ SRCDIR="./" - else - ZINC='-include zconf.h' - ZINCOUT='-I. -I$(SRCDIR)' -diff --git a/contrib/minizip/zip.c b/contrib/minizip/zip.c -index 44e88a9..0517930 100644 ---- a/contrib/minizip/zip.c -+++ b/contrib/minizip/zip.c -@@ -519,15 +519,17 @@ local ZPOS64_T zip64local_SearchCentralDir(const zlib_filefunc64_32_def* pzlib_f - break; - - for (i=(int)uReadSize-3; (i--)>0;) -+ { - if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) && - ((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06)) - { - uPosFound = uReadPos+i; - break; - } -+ } - -- if (uPosFound!=0) -- break; -+ if (uPosFound!=0) -+ break; - } - TRYFREE(buf); - return uPosFound; --- -2.19.0 - diff --git a/1004-zlib-anolis-compute-crc32-using-armv8-specific-instruction.patch b/1004-zlib-anolis-compute-crc32-using-armv8-specific-instruction.patch deleted file mode 100644 index 78fb032d82c7299cced228eef60c07e0743315ca..0000000000000000000000000000000000000000 --- a/1004-zlib-anolis-compute-crc32-using-armv8-specific-instruction.patch +++ /dev/null @@ -1,240 +0,0 @@ -From 23e2623f8b9e64872fa1f512bd296d96e122fd88 Mon Sep 17 00:00:00 2001 -From: Chunmei Xu -Date: Sat, 29 Feb 2020 17:12:31 +0800 -Subject: [PATCH] compute crc32 using armv8 specific instruction - -backport crc32_acle.c from zlib-ng project -https://github.com/zlib-ng/zlib-ng/blob/develop/arch/arm/crc32_acle.c - -Signed-off-by: Chunmei Xu ---- - Makefile.in | 16 +++++-- - contrib/arm/crc32_acle.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++ - crc32.c | 12 +++++ - 3 files changed, 135 insertions(+), 4 deletions(-) - create mode 100644 contrib/arm/crc32_acle.c - -diff --git a/Makefile.in b/Makefile.in -index 9f088e5..0ae9acc 100644 ---- a/Makefile.in -+++ b/Makefile.in -@@ -57,11 +57,11 @@ SRCDIR= - ZINC= - ZINCOUT=-I. - --OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inffast.o inflate.o inftrees.o trees.o zutil.o -+OBJZ = adler32.o crc32_acle.o crc32.o deflate.o infback.o inffast.o inffast.o inflate.o inftrees.o trees.o zutil.o - OBJG = compress.o uncompr.o gzclose.o gzlib.o gzread.o gzwrite.o - OBJC = $(OBJZ) $(OBJG) - --PIC_OBJZ = adler32.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo -+PIC_OBJZ = adler32.lo crc32_acle.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo - PIC_OBJG = compress.lo uncompr.lo gzclose.lo gzlib.lo gzread.lo gzwrite.lo - PIC_OBJC = $(PIC_OBJZ) $(PIC_OBJG) - -@@ -155,10 +155,12 @@ example64.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h - minigzip64.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h - $(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/minigzip.c - -- - adler32.o: $(SRCDIR)adler32.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)adler32.c - -+crc32_acle.o: $(SRCDIR)contrib/arm/crc32_acle.c -+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -c -o $@ $(SRCDIR)contrib/arm/crc32_acle.c -+ - crc32.o: $(SRCDIR)crc32.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c - -@@ -201,12 +203,16 @@ gzread.o: $(SRCDIR)gzread.c - gzwrite.o: $(SRCDIR)gzwrite.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)gzwrite.c - -- - adler32.lo: $(SRCDIR)adler32.c - -@mkdir objs 2>/dev/null || test -d objs - $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/adler32.o $(SRCDIR)adler32.c - -@mv objs/adler32.o $@ - -+crc32_acle.lo: $(SRCDIR)contrib/arm/crc32_acle.c -+ -@mkdir objs 2>/dev/null || test -d objs -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -DPIC -c -o objs/crc32_acle.o $(SRCDIR)contrib/arm/crc32_acle.c -+ -@mv objs/crc32_acle.o $@ -+ - crc32.lo: $(SRCDIR)crc32.c - -@mkdir objs 2>/dev/null || test -d objs - $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c -@@ -397,6 +403,7 @@ tags: - adler32.o zutil.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h - gzclose.o gzlib.o gzread.o gzwrite.o: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h - compress.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h -+crc32_acle.o: zconf.h - crc32.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h - deflate.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h - infback.o inflate.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h -@@ -407,6 +414,7 @@ trees.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)tr - adler32.lo zutil.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h - gzclose.lo gzlib.lo gzread.lo gzwrite.lo: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h - compress.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h -+crc32_acle.lo: zconf.h - crc32.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h - deflate.lo: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h - infback.lo inflate.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h -diff --git a/contrib/arm/crc32_acle.c b/contrib/arm/crc32_acle.c -new file mode 100644 -index 0000000..2da1808 ---- /dev/null -+++ b/contrib/arm/crc32_acle.c -@@ -0,0 +1,111 @@ -+/* crc32_acle.c -- compute the CRC-32 of a data stream -+ * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler -+ * Copyright (C) 2016 Yang Zhang -+ * For conditions of distribution and use, see copyright notice in zlib.h -+ * -+*/ -+ -+#ifdef __ARM_FEATURE_CRC32 -+# include -+# include -+# ifdef __linux__ -+# include -+# endif -+ -+uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) { -+ register uint32_t c; -+ register const uint16_t *buf2; -+ register const uint32_t *buf4; -+ -+ c = ~crc; -+ if (len && ((ptrdiff_t)buf & 1)) { -+ c = __crc32b(c, *buf++); -+ len--; -+ } -+ -+ if ((len > sizeof(uint16_t)) && ((ptrdiff_t)buf & sizeof(uint16_t))) { -+ buf2 = (const uint16_t *) buf; -+ c = __crc32h(c, *buf2++); -+ len -= sizeof(uint16_t); -+ buf4 = (const uint32_t *) buf2; -+ } else { -+ buf4 = (const uint32_t *) buf; -+ } -+ -+# if defined(__aarch64__) -+ if ((len > sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) { -+ c = __crc32w(c, *buf4++); -+ len -= sizeof(uint32_t); -+ } -+ -+ const uint64_t *buf8 = (const uint64_t *) buf4; -+ -+# ifdef UNROLL_MORE -+ while (len >= 4 * sizeof(uint64_t)) { -+ c = __crc32d(c, *buf8++); -+ c = __crc32d(c, *buf8++); -+ c = __crc32d(c, *buf8++); -+ c = __crc32d(c, *buf8++); -+ len -= 4 * sizeof(uint64_t); -+ } -+# endif -+ -+ while (len >= sizeof(uint64_t)) { -+ c = __crc32d(c, *buf8++); -+ len -= sizeof(uint64_t); -+ } -+ -+ if (len >= sizeof(uint32_t)) { -+ buf4 = (const uint32_t *) buf8; -+ c = __crc32w(c, *buf4++); -+ len -= sizeof(uint32_t); -+ buf2 = (const uint16_t *) buf4; -+ } else { -+ buf2 = (const uint16_t *) buf8; -+ } -+ -+ if (len >= sizeof(uint16_t)) { -+ c = __crc32h(c, *buf2++); -+ len -= sizeof(uint16_t); -+ } -+ -+ buf = (const unsigned char *) buf2; -+# else /* __aarch64__ */ -+ -+# ifdef UNROLL_MORE -+ while (len >= 8 * sizeof(uint32_t)) { -+ c = __crc32w(c, *buf4++); -+ c = __crc32w(c, *buf4++); -+ c = __crc32w(c, *buf4++); -+ c = __crc32w(c, *buf4++); -+ c = __crc32w(c, *buf4++); -+ c = __crc32w(c, *buf4++); -+ c = __crc32w(c, *buf4++); -+ c = __crc32w(c, *buf4++); -+ len -= 8 * sizeof(uint32_t); -+ } -+# endif -+ -+ while (len >= sizeof(uint32_t)) { -+ c = __crc32w(c, *buf4++); -+ len -= sizeof(uint32_t); -+ } -+ -+ if (len >= sizeof(uint16_t)) { -+ buf2 = (const uint16_t *) buf4; -+ c = __crc32h(c, *buf2++); -+ len -= sizeof(uint16_t); -+ buf = (const unsigned char *) buf2; -+ } else { -+ buf = (const unsigned char *) buf4; -+ } -+# endif /* __aarch64__ */ -+ -+ if (len) { -+ c = __crc32b(c, *buf); -+ } -+ -+ c = ~c; -+ return c; -+} -+#endif /* __ARM_FEATURE_CRC32 */ -diff --git a/crc32.c b/crc32.c -index 9580440..8338dbc 100644 ---- a/crc32.c -+++ b/crc32.c -@@ -30,6 +30,10 @@ - - #include "zutil.h" /* for STDC and FAR definitions */ - -+#if defined(__ARM_FEATURE_CRC32) -+#include -+#endif -+ - /* Definitions for doing the crc four data bytes at a time. */ - #if !defined(NOBYFOUR) && defined(Z_U4) - # define BYFOUR -@@ -198,6 +202,10 @@ const z_crc_t FAR * ZEXPORT get_crc_table() - #define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) - #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 - -+#if defined(__ARM_FEATURE_CRC32) -+extern uint32_t crc32_acle(uint32_t, const unsigned char *, uint64_t); -+#endif -+ - /* ========================================================================= */ - unsigned long ZEXPORT crc32_z(crc, buf, len) - unsigned long crc; -@@ -217,7 +225,11 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) - - endian = 1; - if (*((unsigned char *)(&endian))) -+#if defined(__ARM_FEATURE_CRC32) -+ return crc32_acle(crc, buf, len); -+#else - return crc32_little(crc, buf, len); -+#endif - else - return crc32_big(crc, buf, len); - } --- -1.8.3.1 - diff --git a/1005-zlib-anolis-ARM-optimized-insert_string.patch b/1005-zlib-anolis-ARM-optimized-insert_string.patch deleted file mode 100644 index 54e0d99c33ca97c7cc83b93416a788b045464baa..0000000000000000000000000000000000000000 --- a/1005-zlib-anolis-ARM-optimized-insert_string.patch +++ /dev/null @@ -1,152 +0,0 @@ -From a036b584c292e7b67336d3abee65e2415cb68be9 Mon Sep 17 00:00:00 2001 -From: Chunmei Xu -Date: Thu, 5 Mar 2020 11:22:11 +0800 -Subject: [PATCH] ARM optimized insert_string - -refer to -https://chromium-review.googlesource.com/c/chromium/src/+/1173262 - -Signed-off-by: Chunmei Xu ---- - contrib/arm/insert_string_acle.h | 37 +++++++++++++++++++++++++++++++++++ - deflate.c | 42 ++++++++++++++++++++++++---------------- - 2 files changed, 62 insertions(+), 17 deletions(-) - create mode 100644 contrib/arm/insert_string_acle.h - -diff --git a/contrib/arm/insert_string_acle.h b/contrib/arm/insert_string_acle.h -new file mode 100644 -index 0000000..5f3c5e1 ---- /dev/null -+++ b/contrib/arm/insert_string_acle.h -@@ -0,0 +1,37 @@ -+/* insert_string_acle.h -- insert_string variant using ACLE's CRC instructions -+ * -+ * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler -+ * For conditions of distribution and use, see copyright notice in zlib.h -+ * -+ */ -+ -+#if defined(__ARM_FEATURE_CRC32) -+#include -+ -+/* =========================================================================== -+ * Insert string str in the dictionary and set match_head to the previous head -+ * of the hash chain (the most recent string with same hash key). Return -+ * the previous length of the hash chain. -+ * IN assertion: all calls to to INSERT_STRING are made with consecutive -+ * input characters and the first MIN_MATCH bytes of str are valid -+ * (except for the last MIN_MATCH-1 bytes of the input file). -+ */ -+local inline Pos insert_string_acle(deflate_state *const s, const Pos str) { -+ Pos ret; -+ unsigned *ip, val, h = 0; -+ -+ ip = (unsigned *)&s->window[str]; -+ val = *ip; -+ -+ if (s->level >= 6) -+ val &= 0xFFFFFF; -+ -+ h = __crc32w(h, val); -+ -+ ret = s->head[h & s->hash_mask]; -+ s->head[h & s->hash_mask] = str; -+ s->prev[str & s->w_mask] = ret; -+ return ret; -+} -+#endif -+ -diff --git a/deflate.c b/deflate.c -index 4c42259..397f8c6 100644 ---- a/deflate.c -+++ b/deflate.c -@@ -173,17 +173,29 @@ local const config configuration_table[10] = { - * characters and the first MIN_MATCH bytes of str are valid (except for - * the last MIN_MATCH-1 bytes of the input file). - */ -+static inline Pos insert_string_c(deflate_state *s, Pos str) { -+ Pos match_head; -+ -+ UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]); - #ifdef FASTEST --#define INSERT_STRING(s, str, match_head) \ -- (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ -- match_head = s->head[s->ins_h], \ -- s->head[s->ins_h] = (Pos)(str)) -+ match_head = s->head[s->ins_h]; - #else --#define INSERT_STRING(s, str, match_head) \ -- (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ -- match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ -- s->head[s->ins_h] = (Pos)(str)) -+ match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h]; - #endif -+ s->head[s->ins_h] = (Pos)str; -+ return match_head; -+} -+ -+#if defined(__ARM_FEATURE_CRC32) -+#include "contrib/arm/insert_string_acle.h" -+#endif -+static inline Pos insert_string(deflate_state *s, Pos str) { -+#if defined(__ARM_FEATURE_CRC32) -+ return insert_string_acle(s, str); -+#else -+ return insert_string_c(s, str); -+#endif -+} - - /* =========================================================================== - * Initialize the hash table (avoiding 64K overflow for 16 bit systems). -@@ -427,11 +439,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) - str = s->strstart; - n = s->lookahead - (MIN_MATCH-1); - do { -- UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); --#ifndef FASTEST -- s->prev[str & s->w_mask] = s->head[s->ins_h]; --#endif -- s->head[s->ins_h] = (Pos)str; -+ insert_string(s, str); - str++; - } while (--n); - s->strstart = str; -@@ -1859,7 +1867,7 @@ local block_state deflate_fast(s, flush) - */ - hash_head = NIL; - if (s->lookahead >= MIN_MATCH) { -- INSERT_STRING(s, s->strstart, hash_head); -+ hash_head = insert_string(s, s->strstart); - } - - /* Find the longest match, discarding those <= prev_length. -@@ -1890,7 +1898,7 @@ local block_state deflate_fast(s, flush) - s->match_length--; /* string at strstart already in table */ - do { - s->strstart++; -- INSERT_STRING(s, s->strstart, hash_head); -+ hash_head = insert_string(s, s->strstart); - /* strstart never exceeds WSIZE-MAX_MATCH, so there are - * always MIN_MATCH bytes ahead. - */ -@@ -1962,7 +1970,7 @@ local block_state deflate_slow(s, flush) - */ - hash_head = NIL; - if (s->lookahead >= MIN_MATCH) { -- INSERT_STRING(s, s->strstart, hash_head); -+ hash_head = insert_string(s, s->strstart); - } - - /* Find the longest match, discarding those <= prev_length. -@@ -2013,7 +2021,7 @@ local block_state deflate_slow(s, flush) - s->prev_length -= 2; - do { - if (++s->strstart <= max_insert) { -- INSERT_STRING(s, s->strstart, hash_head); -+ hash_head = insert_string(s, s->strstart); - } - } while (--s->prev_length != 0); - s->match_available = 0; --- -1.8.3.1 - diff --git a/1006-zlib-anolis-Optimize-slide_hash.patch b/1006-zlib-anolis-Optimize-slide_hash.patch deleted file mode 100644 index 6342123c0636230d224d7bb3fcdd421192d67356..0000000000000000000000000000000000000000 --- a/1006-zlib-anolis-Optimize-slide_hash.patch +++ /dev/null @@ -1,169 +0,0 @@ -diff -aurN zlib-1.2.11.orig/configure zlib-1.2.11/configure ---- zlib-1.2.11.orig/configure 2017-01-01 02:06:40.000000000 +0800 -+++ zlib-1.2.11/configure 2021-11-12 17:47:26.111812826 +0800 -@@ -23,7 +23,7 @@ - if test $SRCDIR = "."; then - ZINC="" - ZINCOUT="-I." -- SRCDIR="" -+ SRCDIR="./" - else - ZINC='-include zconf.h' - ZINCOUT='-I. -I$(SRCDIR)' -diff -aurN zlib-1.2.11.orig/contrib/amd64/sse2_slide_hash.c zlib-1.2.11/contrib/amd64/sse2_slide_hash.c ---- zlib-1.2.11.orig/contrib/amd64/sse2_slide_hash.c 1970-01-01 08:00:00.000000000 +0800 -+++ zlib-1.2.11/contrib/amd64/sse2_slide_hash.c 2021-11-12 17:56:19.373389046 +0800 -@@ -0,0 +1,51 @@ -+/* -+ * SSE optimized hash slide -+ * -+ * Copyright (C) 2017 Intel Corporation -+ * Authors: -+ * Arjan van de Ven -+ * Jim Kukunas -+ * -+ * For conditions of distribution and use, see copyright notice in zlib.h -+ */ -+ -+#if __SSE2__ == 1 -+ -+#include "sse2_slide_hash.h" -+ -+void sse2_slide_hash(deflate_state *s) -+{ -+ unsigned n; -+ Posf *p; -+ uInt wsize = s->w_size; -+ z_const __m128i xmm_wsize = _mm_set1_epi16(s->w_size); -+ -+ n = s->hash_size; -+ p = &s->head[n] - 8; -+ do { -+ __m128i value, result; -+ -+ value = _mm_loadu_si128((__m128i *)p); -+ result= _mm_subs_epu16(value, xmm_wsize); -+ _mm_storeu_si128((__m128i *)p, result); -+ p -= 8; -+ n -= 8; -+ } while (n > 0); -+ -+#ifndef FASTEST -+ n = wsize; -+ p = &s->prev[n] - 8; -+ do { -+ __m128i value, result; -+ -+ value = _mm_loadu_si128((__m128i *)p); -+ result= _mm_subs_epu16(value, xmm_wsize); -+ _mm_storeu_si128((__m128i *)p, result); -+ -+ p -= 8; -+ n -= 8; -+ } while (n > 0); -+#endif -+} -+ -+#endif -diff -aurN zlib-1.2.11.orig/contrib/amd64/sse2_slide_hash.h zlib-1.2.11/contrib/amd64/sse2_slide_hash.h ---- zlib-1.2.11.orig/contrib/amd64/sse2_slide_hash.h 1970-01-01 08:00:00.000000000 +0800 -+++ zlib-1.2.11/contrib/amd64/sse2_slide_hash.h 2021-11-12 17:56:13.131183305 +0800 -@@ -0,0 +1,23 @@ -+/* -+ * SSE optimized hash slide -+ * -+ * Copyright (C) 2017 Intel Corporation -+ * Authors: -+ * Arjan van de Ven -+ * Jim Kukunas -+ * -+ * For conditions of distribution and use, see copyright notice in zlib.h -+ */ -+ -+#ifndef __SSE_SLIDE_HASH__ -+#define __SSE_SLIDE_HASH__ -+ -+#if __SSE2__ == 1 -+#include "deflate.h" -+#include -+ -+extern void sse2_slide_hash(deflate_state *s); -+ -+#endif -+ -+#endif -diff -aurN zlib-1.2.11.orig/deflate.c zlib-1.2.11/deflate.c ---- zlib-1.2.11.orig/deflate.c 2017-01-16 01:29:40.000000000 +0800 -+++ zlib-1.2.11/deflate.c 2021-11-12 17:56:03.458864512 +0800 -@@ -50,6 +50,9 @@ - /* @(#) $Id$ */ - - #include "deflate.h" -+#if __SSE2__ == 1 -+#include "contrib/amd64/sse2_slide_hash.h" -+#endif - - const char deflate_copyright[] = - " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; -@@ -201,6 +204,9 @@ - local void slide_hash(s) - deflate_state *s; - { -+#if __SSE2__ == 1 -+ return sse2_slide_hash(s); -+#endif - unsigned n, m; - Posf *p; - uInt wsize = s->w_size; -@@ -224,6 +230,7 @@ - #endif - } - -+ - /* ========================================================================= */ - int ZEXPORT deflateInit_(strm, level, version, stream_size) - z_streamp strm; -diff -aurN zlib-1.2.11.orig/Makefile.in zlib-1.2.11/Makefile.in ---- zlib-1.2.11.orig/Makefile.in 2017-01-16 01:29:40.000000000 +0800 -+++ zlib-1.2.11/Makefile.in 2021-11-12 20:42:51.610668805 +0800 -@@ -57,11 +57,11 @@ - ZINC= - ZINCOUT=-I. - --OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inflate.o inftrees.o trees.o zutil.o -+OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inflate.o inftrees.o trees.o zutil.o sse2_slide_hash.o - OBJG = compress.o uncompr.o gzclose.o gzlib.o gzread.o gzwrite.o - OBJC = $(OBJZ) $(OBJG) - --PIC_OBJZ = adler32.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo -+PIC_OBJZ = adler32.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo sse2_slide_hash.lo - PIC_OBJG = compress.lo uncompr.lo gzclose.lo gzlib.lo gzread.lo gzwrite.lo - PIC_OBJC = $(PIC_OBJZ) $(PIC_OBJG) - -@@ -163,7 +163,10 @@ - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c - - deflate.o: $(SRCDIR)deflate.c -- $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c -+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/amd64 -c -o $@ $(SRCDIR)deflate.c -+ -+sse2_slide_hash.o: $(SRCDIR)/contrib/amd64/sse2_slide_hash.c -+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/amd64 -c -o $@ $(SRCDIR)contrib/amd64/sse2_slide_hash.c - - infback.o: $(SRCDIR)infback.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)infback.c -@@ -214,8 +217,13 @@ - - deflate.lo: $(SRCDIR)deflate.c - -@mkdir objs 2>/dev/null || test -d objs -- $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/amd64 -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c - -@mv objs/deflate.o $@ -+ -+sse2_slide_hash.lo: $(SRCDIR)/contrib/amd64/sse2_slide_hash.c -+ -@mkdir objs 2>/dev/null || test -d objs -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -I$(SRCDIR)contrib/amd64 -DPIC -c -o objs/sse2_slide_hash.o $(SRCDIR)contrib/amd64/sse2_slide_hash.c -+ -@mv objs/sse2_slide_hash.o $@ - - infback.lo: $(SRCDIR)infback.c - -@mkdir objs 2>/dev/null || test -d objs diff --git a/dist b/dist index 0ee7539a2292be885eb3a6caf1a4ee4666e5e99f..535c6900412d365bb0ff6de8d1f27110833b3ae3 100644 --- a/dist +++ b/dist @@ -1 +1 @@ -an8_6 +an8_7 diff --git a/zlib-1.2.11-CVE-2018-25032.patch b/zlib-1.2.11-CVE-2018-25032.patch index 9b4debdf2cc356b4d2791cd868f107bf0866e25b..800692f7df4b13c539c01fc3ed6cdbfa9b19bdc5 100644 --- a/zlib-1.2.11-CVE-2018-25032.patch +++ b/zlib-1.2.11-CVE-2018-25032.patch @@ -128,7 +128,7 @@ index 425babc..19cba87 100644 if (deflateStateCheck(source) || dest == Z_NULL) { @@ -1133,8 +1169,7 @@ int ZEXPORT deflateCopy (dest, source) - ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->window = (Bytef *) ZALLOC_WINDOW(dest, ds->w_size, 2*sizeof(Byte)); ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); - overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); diff --git a/zlib-1.2.11-IBM-DFLTCC-compression-level-switching-issues.patch b/zlib-1.2.11-IBM-DFLTCC-compression-level-switching-issues.patch new file mode 100644 index 0000000000000000000000000000000000000000..c69d9bc4edd95e2d4a7c649ab0a6b29054db8215 --- /dev/null +++ b/zlib-1.2.11-IBM-DFLTCC-compression-level-switching-issues.patch @@ -0,0 +1,206 @@ +Subject: [PATCH] Fixed DFLTCC compression level switching issues + +--- + configure | 4 +-- + contrib/s390/dfltcc.c | 52 ++++++++++++++++++++++++++++++----- + contrib/s390/dfltcc_deflate.h | 2 ++ + deflate.c | 12 ++++---- + test/infcover.c | 2 +- + 5 files changed, 57 insertions(+), 15 deletions(-) + +diff --git a/configure b/configure +index bfe4386..70ed86b 100755 +--- a/configure ++++ b/configure +@@ -139,7 +139,7 @@ case "$1" in + -w* | --warn) warn=1; shift ;; + -d* | --debug) debug=1; shift ;; + --dfltcc) +- CFLAGS="$CFLAGS -DDFLTCC" ++ CFLAGS="$CFLAGS -DDFLTCC -DDFLTCC_LEVEL_MASK=0x7e" + OBJC="$OBJC dfltcc.o" + PIC_OBJC="$PIC_OBJC dfltcc.lo" + shift +@@ -838,7 +838,7 @@ cat > $test.c << EOF + #include + int main() { return 0; } + EOF +-if try ${CC} ${CFLAGS} $test.c; then ++ if try $CC -c $CFLAGS $test.c; then + echo "Checking for sys/sdt.h ... Yes." | tee -a configure.log + CFLAGS="$CFLAGS -DHAVE_SYS_SDT_H" + SFLAGS="$SFLAGS -DHAVE_SYS_SDT_H" +diff --git a/contrib/s390/dfltcc.c b/contrib/s390/dfltcc.c +index d88a0d6..94a196f 100644 +--- a/contrib/s390/dfltcc.c ++++ b/contrib/s390/dfltcc.c +@@ -350,8 +350,12 @@ int ZLIB_INTERNAL dfltcc_deflate(strm, flush, result) + int soft_bcc; + int no_flush; + +- if (!dfltcc_can_deflate(strm)) ++ if (!dfltcc_can_deflate(strm)) { ++ /* Clear history. */ ++ if (flush == Z_FULL_FLUSH) ++ param->hl = 0; + return 0; ++ } + + again: + masked_avail_in = 0; +@@ -376,7 +380,8 @@ again: + /* Clear history. */ + if (flush == Z_FULL_FLUSH) + param->hl = 0; +- *result = need_more; ++ /* Trigger block post-processing if necessary. */ ++ *result = no_flush ? need_more : block_done; + return 1; + } + +@@ -403,13 +408,18 @@ again: + param->bcf = 0; + dfltcc_state->block_threshold = + strm->total_in + dfltcc_state->block_size; +- if (strm->avail_out == 0) { +- *result = need_more; +- return 1; +- } + } + } + ++ /* No space for compressed data. If we proceed, dfltcc_cmpr() will return ++ * DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still ++ * set BCF=1, which is wrong. Avoid complications and return early. ++ */ ++ if (strm->avail_out == 0) { ++ *result = need_more; ++ return 1; ++ } ++ + /* The caller gave us too much data. Pass only one block worth of + * uncompressed data to DFLTCC and mask the rest, so that on the next + * iteration we start a new block. +@@ -737,10 +747,15 @@ __attribute__((constructor)) local void init_globals(void) + * compiling with -m31, gcc defaults to ESA mode, however, since the kernel + * is 64-bit, it's always z/Architecture mode at runtime. + */ +- __asm__ volatile(".machinemode push\n" ++ __asm__ volatile( ++#ifndef __clang__ ++ ".machinemode push\n" + ".machinemode zarch\n" ++#endif + "stfle %[facilities]\n" ++#ifndef __clang__ + ".machinemode pop\n" ++#endif + : [facilities] "=Q" (cpu_facilities) + , [r0] "+r" (r0) + : +@@ -872,6 +887,28 @@ int ZLIB_INTERNAL dfltcc_deflate_params(strm, level, strategy, flush) + return Z_OK; + } + ++int ZLIB_INTERNAL dfltcc_deflate_done(strm, flush) ++ z_streamp strm; ++ int flush; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; ++ ++ /* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might ++ * close the block without resetting the compression state. Detect this ++ * situation and return that deflation is not done. ++ */ ++ if (flush == Z_FULL_FLUSH && strm->avail_out == 0) ++ return 0; ++ ++ /* Return that deflation is not done if DFLTCC is used and either it ++ * buffered some data (Continuation Flag is set), or has not written EOBS ++ * yet (Block-Continuation Flag is set). ++ */ ++ return !dfltcc_can_deflate(strm) || (!param->cf && !param->bcf); ++} ++ + /* + Preloading history. + */ +@@ -925,6 +962,7 @@ int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(strm, dictionary, dict_length) + + append_history(param, state->window, dictionary, dict_length); + state->strstart = 1; /* Add FDICT to zlib header */ ++ state->block_start = state->strstart; /* Make deflate_stored happy */ + return Z_OK; + } + +diff --git a/contrib/s390/dfltcc_deflate.h b/contrib/s390/dfltcc_deflate.h +index de36784..914daa4 100644 +--- a/contrib/s390/dfltcc_deflate.h ++++ b/contrib/s390/dfltcc_deflate.h +@@ -11,6 +11,7 @@ int ZLIB_INTERNAL dfltcc_deflate_params OF((z_streamp strm, + int level, + int strategy, + int *flush)); ++int ZLIB_INTERNAL dfltcc_deflate_done OF((z_streamp strm, int flush)); + int ZLIB_INTERNAL dfltcc_deflate_set_dictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dict_length)); +@@ -41,6 +42,7 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dictionary OF((z_streamp strm, + if (err == Z_STREAM_ERROR) \ + return err; \ + } while (0) ++#define DEFLATE_DONE dfltcc_deflate_done + #define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \ + do { \ + if (dfltcc_can_deflate((strm))) \ +diff --git a/deflate.c b/deflate.c +index d907a1b..085abbe 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -75,6 +75,7 @@ const char deflate_copyright[] = + #define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) + #define DEFLATE_RESET_KEEP_HOOK(strm) do {} while (0) + #define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) do {} while (0) ++#define DEFLATE_DONE(strm, flush) 1 + #define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen) do {} while (0) + #define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) 0 + #define DEFLATE_HOOK(strm, flush, bstate) 0 +@@ -605,14 +606,15 @@ int ZEXPORT deflateParams(strm, level, strategy) + DEFLATE_PARAMS_HOOK(strm, level, strategy, &hook_flush); + func = configuration_table[s->level].func; + +- if ((strategy != s->strategy || func != configuration_table[level].func || +- hook_flush != Z_NO_FLUSH) && s->last_flush != -2) { ++ if (((strategy != s->strategy || func != configuration_table[level].func) && ++ s->last_flush != -2) || hook_flush != Z_NO_FLUSH) { + /* Flush the last buffer: */ +- int err = deflate(strm, RANK(hook_flush) > RANK(Z_BLOCK) ? +- hook_flush : Z_BLOCK); ++ int flush = RANK(hook_flush) > RANK(Z_BLOCK) ? hook_flush : Z_BLOCK; ++ int err = deflate(strm, flush); + if (err == Z_STREAM_ERROR) + return err; +- if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead) ++ if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead || ++ !DEFLATE_DONE(strm, flush)) + return Z_BUF_ERROR; + } + if (s->level != level) { +diff --git a/test/infcover.c b/test/infcover.c +index a34cd17..a208219 100644 +--- a/test/infcover.c ++++ b/test/infcover.c +@@ -373,7 +373,7 @@ local void cover_support(void) + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = Z_NULL; +- ret = inflateInit_(&strm, ZLIB_VERSION - 1, (int)sizeof(z_stream)); ++ ret = inflateInit_(&strm, &ZLIB_VERSION[1], (int)sizeof(z_stream)); + assert(ret == Z_VERSION_ERROR); + mem_done(&strm, "wrong version"); + +-- +2.26.0 + diff --git a/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-compressBound-fix.patch b/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-compressBound-fix.patch new file mode 100644 index 0000000000000000000000000000000000000000..27454abb708e139d908bac495e5dd1b126b40eb4 --- /dev/null +++ b/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-compressBound-fix.patch @@ -0,0 +1,93 @@ +Source from https://gitlab.com/redhat/centos-stream/rpms/zlib/-/merge_requests/9 +Author: Ilya Leoshkevich + +--- a/compress.c ++++ b/compress.c +@@ -5,9 +5,15 @@ + + /* @(#) $Id$ */ + +-#define ZLIB_INTERNAL ++#include "zutil.h" + #include "zlib.h" + ++#ifdef DFLTCC ++# include "contrib/s390/dfltcc.h" ++#else ++#define DEFLATE_BOUND_COMPLEN(source_len) 0 ++#endif ++ + /* =========================================================================== + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte +@@ -81,6 +87,12 @@ int ZEXPORT compress (dest, destLen, source, sourceLen) + uLong ZEXPORT compressBound (sourceLen) + uLong sourceLen; + { ++ uLong complen = DEFLATE_BOUND_COMPLEN(sourceLen); ++ ++ if (complen > 0) ++ /* Architecture-specific code provided an upper bound. */ ++ return complen + ZLIB_WRAPLEN; ++ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13; + } +--- a/contrib/s390/dfltcc.h ++++ b/contrib/s390/dfltcc.h +@@ -12,6 +12,28 @@ void ZLIB_INTERNAL dfltcc_reset OF((z_streamp strm, uInt size)); + voidpf ZLIB_INTERNAL dfltcc_alloc_window OF((z_streamp strm, uInt items, + uInt size)); + void ZLIB_INTERNAL dfltcc_free_window OF((z_streamp strm, voidpf w)); ++#define DFLTCC_BLOCK_HEADER_BITS 3 ++#define DFLTCC_HLITS_COUNT_BITS 5 ++#define DFLTCC_HDISTS_COUNT_BITS 5 ++#define DFLTCC_HCLENS_COUNT_BITS 4 ++#define DFLTCC_MAX_HCLENS 19 ++#define DFLTCC_HCLEN_BITS 3 ++#define DFLTCC_MAX_HLITS 286 ++#define DFLTCC_MAX_HDISTS 30 ++#define DFLTCC_MAX_HLIT_HDIST_BITS 7 ++#define DFLTCC_MAX_SYMBOL_BITS 16 ++#define DFLTCC_MAX_EOBS_BITS 15 ++#define DFLTCC_MAX_PADDING_BITS 7 ++#define DEFLATE_BOUND_COMPLEN(source_len) \ ++ ((DFLTCC_BLOCK_HEADER_BITS + \ ++ DFLTCC_HLITS_COUNT_BITS + \ ++ DFLTCC_HDISTS_COUNT_BITS + \ ++ DFLTCC_HCLENS_COUNT_BITS + \ ++ DFLTCC_MAX_HCLENS * DFLTCC_HCLEN_BITS + \ ++ (DFLTCC_MAX_HLITS + DFLTCC_MAX_HDISTS) * DFLTCC_MAX_HLIT_HDIST_BITS + \ ++ (source_len) * DFLTCC_MAX_SYMBOL_BITS + \ ++ DFLTCC_MAX_EOBS_BITS + \ ++ DFLTCC_MAX_PADDING_BITS) >> 3) + int ZLIB_INTERNAL dfltcc_can_inflate OF((z_streamp strm)); + typedef enum { + DFLTCC_INFLATE_CONTINUE, +diff --git a/contrib/s390/dfltcc_deflate.h b/contrib/s390/dfltcc_deflate.h +index 03f7f53..46acfc5 100644 +--- a/contrib/s390/dfltcc_deflate.h ++++ b/contrib/s390/dfltcc_deflate.h +@@ -46,8 +46,7 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dictionary OF((z_streamp strm, + #define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \ + do { \ + if (dfltcc_can_deflate((strm))) \ +- (complen) = (3 + 5 + 5 + 4 + 19 * 3 + (286 + 30) * 7 + \ +- (source_len) * 16 + 15 + 7) >> 3; \ ++ (complen) = DEFLATE_BOUND_COMPLEN(source_len); \ + } while (0) + #define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (dfltcc_can_deflate((strm))) + #define DEFLATE_HOOK dfltcc_deflate +diff --git a/zutil.h b/zutil.h +index 14277bc..cf90e49 100644 +--- a/zutil.h ++++ b/zutil.h +@@ -87,6 +87,8 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ + + #define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + ++#define ZLIB_WRAPLEN 6 /* zlib format overhead */ ++ + /* target dependencies */ + + #if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) diff --git a/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-fix.patch b/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-fix.patch new file mode 100644 index 0000000000000000000000000000000000000000..a9e5e297200cfb06af88ff0b6a8affc0afeb972b --- /dev/null +++ b/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-fix.patch @@ -0,0 +1,516 @@ +From 608b71008c16ce6fbf2305145c5ffb69cd88ef59 Mon Sep 17 00:00:00 2001 +From: Ondrej Dubaj +Date: Fri, 7 Aug 2020 07:12:50 +0200 +Subject: [PATCH] Fix for Z hardware-accelerated deflate for s390x + +--- + configure | 7 + + contrib/s390/dfltcc.c | 244 +++++++++++++++++++++------------- + contrib/s390/dfltcc_deflate.h | 10 +- + deflate.c | 21 +-- + 4 files changed, 177 insertions(+), 105 deletions(-) + +diff --git a/configure b/configure +index 66caece..bfe4386 100755 +--- a/configure ++++ b/configure +@@ -114,6 +114,7 @@ case "$1" in + echo ' configure [--const] [--zprefix] [--prefix=PREFIX] [--eprefix=EXPREFIX]' | tee -a configure.log + echo ' [--static] [--64] [--libdir=LIBDIR] [--sharedlibdir=LIBDIR]' | tee -a configure.log + echo ' [--includedir=INCLUDEDIR] [--archs="-arch i386 -arch x86_64"]' | tee -a configure.log ++ echo ' [--dfltcc]' | tee -a configure.log + exit 0 ;; + -p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;; + -e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;; +@@ -137,6 +138,12 @@ case "$1" in + -c* | --const) zconst=1; shift ;; + -w* | --warn) warn=1; shift ;; + -d* | --debug) debug=1; shift ;; ++ --dfltcc) ++ CFLAGS="$CFLAGS -DDFLTCC" ++ OBJC="$OBJC dfltcc.o" ++ PIC_OBJC="$PIC_OBJC dfltcc.lo" ++ shift ++ ;; + *) + echo "unknown option: $1" | tee -a configure.log + echo "$0 --help for help" | tee -a configure.log +diff --git a/contrib/s390/dfltcc.c b/contrib/s390/dfltcc.c +index d187796..d88a0d6 100644 +--- a/contrib/s390/dfltcc.c ++++ b/contrib/s390/dfltcc.c +@@ -2,12 +2,13 @@ + + /* + Use the following commands to build zlib with DFLTCC support: +- $ CFLAGS=-DDFLTCC ./configure +- $ make OBJA=dfltcc.o PIC_OBJA=dfltcc.lo ++ $ ./configure --dfltcc ++ $ make + */ + + #define _GNU_SOURCE + #include ++#include + #include + #include + #include +@@ -230,31 +231,28 @@ struct dfltcc_state { + /* + Compress. + */ +-local inline int dfltcc_are_params_ok(int level, +- uInt window_bits, +- int strategy, +- uLong level_mask); +-local inline int dfltcc_are_params_ok(level, window_bits, strategy, level_mask) ++local inline int dfltcc_can_deflate_with_params(z_streamp strm, ++ int level, ++ uInt window_bits, ++ int strategy); ++local inline int dfltcc_can_deflate_with_params(strm, ++ level, ++ window_bits, ++ strategy) ++ z_streamp strm; + int level; + uInt window_bits; + int strategy; +- uLong level_mask; +-{ +- return (level_mask & (1 << level)) != 0 && +- (window_bits == HB_BITS) && +- (strategy == Z_FIXED || strategy == Z_DEFAULT_STRATEGY); +-} +- +- +-int ZLIB_INTERNAL dfltcc_can_deflate(strm) +- z_streamp strm; + { + deflate_state FAR *state = (deflate_state FAR *)strm->state; + struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); + + /* Unsupported compression settings */ +- if (!dfltcc_are_params_ok(state->level, state->w_bits, state->strategy, +- dfltcc_state->level_mask)) ++ if ((dfltcc_state->level_mask & (1 << level)) == 0) ++ return 0; ++ if (window_bits != HB_BITS) ++ return 0; ++ if (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY) + return 0; + + /* Unsupported hardware */ +@@ -266,6 +264,17 @@ int ZLIB_INTERNAL dfltcc_can_deflate(strm) + return 1; + } + ++int ZLIB_INTERNAL dfltcc_can_deflate(strm) ++ z_streamp strm; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ ++ return dfltcc_can_deflate_with_params(strm, ++ state->level, ++ state->w_bits, ++ state->strategy); ++} ++ + local void dfltcc_gdht OF((z_streamp strm)); + local void dfltcc_gdht(strm) + z_streamp strm; +@@ -349,22 +358,24 @@ again: + soft_bcc = 0; + no_flush = flush == Z_NO_FLUSH; + +- /* Trailing empty block. Switch to software, except when Continuation Flag +- * is set, which means that DFLTCC has buffered some output in the +- * parameter block and needs to be called again in order to flush it. ++ /* No input data. Return, except when Continuation Flag is set, which means ++ * that DFLTCC has buffered some output in the parameter block and needs to ++ * be called again in order to flush it. + */ +- if (flush == Z_FINISH && strm->avail_in == 0 && !param->cf) { +- if (param->bcf) { +- /* A block is still open, and the hardware does not support closing +- * blocks without adding data. Thus, close it manually. +- */ ++ if (strm->avail_in == 0 && !param->cf) { ++ /* A block is still open, and the hardware does not support closing ++ * blocks without adding data. Thus, close it manually. ++ */ ++ if (!no_flush && param->bcf) { + send_eobs(strm, param); + param->bcf = 0; + } +- return 0; +- } +- +- if (strm->avail_in == 0 && !param->cf) { ++ /* Let one of deflate_* functions write a trailing empty block. */ ++ if (flush == Z_FINISH) ++ return 0; ++ /* Clear history. */ ++ if (flush == Z_FULL_FLUSH) ++ param->hl = 0; + *result = need_more; + return 1; + } +@@ -418,7 +429,7 @@ again: + param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32; + if (!no_flush) + /* We need to close a block. Always do this in software - when there is +- * no input data, the hardware will not nohor BCC. */ ++ * no input data, the hardware will not honor BCC. */ + soft_bcc = 1; + if (flush == Z_FINISH && !param->bcf) + /* We are about to open a BFINAL block, set Block Header Final bit +@@ -433,8 +444,8 @@ again: + param->sbb = (unsigned int)state->bi_valid; + if (param->sbb > 0) + *strm->next_out = (Bytef)state->bi_buf; +- if (param->hl) +- param->nt = 0; /* Honor history */ ++ /* Honor history and check value */ ++ param->nt = 0; + param->cv = state->wrap == 2 ? ZSWAP32(strm->adler) : strm->adler; + + /* When opening a block, choose a Huffman-Table Type */ +@@ -642,27 +653,86 @@ int ZLIB_INTERNAL dfltcc_inflate_disable(strm) + return 0; + } + +-/* +- Memory management. +- DFLTCC requires parameter blocks and window to be aligned. zlib allows +- users to specify their own allocation functions, so using e.g. +- `posix_memalign' is not an option. Thus, we overallocate and take the +- aligned portion of the buffer. +-*/ ++local int env_dfltcc_disabled; ++local int env_source_date_epoch; ++local unsigned long env_level_mask; ++local unsigned long env_block_size; ++local unsigned long env_block_threshold; ++local unsigned long env_dht_threshold; ++local unsigned long env_ribm; ++local uint64_t cpu_facilities[(DFLTCC_FACILITY / 64) + 1]; ++local struct dfltcc_qaf_param cpu_af __attribute__((aligned(8))); ++ + local inline int is_dfltcc_enabled OF((void)); + local inline int is_dfltcc_enabled(void) ++{ ++ if (env_dfltcc_disabled) ++ /* User has explicitly disabled DFLTCC. */ ++ return 0; ++ ++ return is_bit_set((const char *)cpu_facilities, DFLTCC_FACILITY); ++} ++ ++local unsigned long xstrtoul OF((const char *s, unsigned long _default)); ++local unsigned long xstrtoul(s, _default) ++ const char *s; ++ unsigned long _default; ++{ ++ char *endptr; ++ unsigned long result; ++ ++ if (!(s && *s)) ++ return _default; ++ errno = 0; ++ result = strtoul(s, &endptr, 0); ++ return (errno || *endptr) ? _default : result; ++} ++ ++__attribute__((constructor)) local void init_globals OF((void)); ++__attribute__((constructor)) local void init_globals(void) + { + const char *env; +- uint64_t facilities[(DFLTCC_FACILITY / 64) + 1]; + register char r0 __asm__("r0"); + + env = secure_getenv("DFLTCC"); +- if (env && !strcmp(env, "0")) +- /* User has explicitly disabled DFLTCC. */ +- return 0; ++ ++ ++ env_dfltcc_disabled = env && !strcmp(env, "0"); ++ ++ env = secure_getenv("SOURCE_DATE_EPOCH"); ++ env_source_date_epoch = !!env; ++ ++#ifndef DFLTCC_LEVEL_MASK ++#define DFLTCC_LEVEL_MASK 0x2 ++#endif ++ env_level_mask = xstrtoul(secure_getenv("DFLTCC_LEVEL_MASK"), ++ DFLTCC_LEVEL_MASK); ++ ++#ifndef DFLTCC_BLOCK_SIZE ++#define DFLTCC_BLOCK_SIZE 1048576 ++#endif ++ env_block_size = xstrtoul(secure_getenv("DFLTCC_BLOCK_SIZE"), ++ DFLTCC_BLOCK_SIZE); + +- memset(facilities, 0, sizeof(facilities)); +- r0 = sizeof(facilities) / sizeof(facilities[0]) - 1; ++#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE ++#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096 ++#endif ++ env_block_threshold = xstrtoul(secure_getenv("DFLTCC_FIRST_FHT_BLOCK_SIZE"), ++ DFLTCC_FIRST_FHT_BLOCK_SIZE); ++ ++#ifndef DFLTCC_DHT_MIN_SAMPLE_SIZE ++#define DFLTCC_DHT_MIN_SAMPLE_SIZE 4096 ++#endif ++ env_dht_threshold = xstrtoul(secure_getenv("DFLTCC_DHT_MIN_SAMPLE_SIZE"), ++ DFLTCC_DHT_MIN_SAMPLE_SIZE); ++ ++#ifndef DFLTCC_RIBM ++#define DFLTCC_RIBM 0 ++#endif ++ env_ribm = xstrtoul(secure_getenv("DFLTCC_RIBM"), DFLTCC_RIBM); ++ ++ memset(cpu_facilities, 0, sizeof(cpu_facilities)); ++ r0 = sizeof(cpu_facilities) / sizeof(cpu_facilities[0]) - 1; + /* STFLE is supported since z9-109 and only in z/Architecture mode. When + * compiling with -m31, gcc defaults to ESA mode, however, since the kernel + * is 64-bit, it's always z/Architecture mode at runtime. +@@ -671,31 +741,35 @@ local inline int is_dfltcc_enabled(void) + ".machinemode zarch\n" + "stfle %[facilities]\n" + ".machinemode pop\n" +- : [facilities] "=Q" (facilities) ++ : [facilities] "=Q" (cpu_facilities) + , [r0] "+r" (r0) + : + : "cc"); +- return is_bit_set((const char *)facilities, DFLTCC_FACILITY); ++ /* Initialize available functions */ ++ if (is_dfltcc_enabled()) ++ dfltcc(DFLTCC_QAF, &cpu_af, NULL, NULL, NULL, NULL, NULL); ++ else ++ memset(&cpu_af, 0, sizeof(cpu_af)); + } + ++/* ++ Memory management. ++ ++ DFLTCC requires parameter blocks and window to be aligned. zlib allows ++ users to specify their own allocation functions, so using e.g. ++ `posix_memalign' is not an option. Thus, we overallocate and take the ++ aligned portion of the buffer. ++*/ + void ZLIB_INTERNAL dfltcc_reset(strm, size) + z_streamp strm; + uInt size; + { + struct dfltcc_state *dfltcc_state = + (struct dfltcc_state *)((char FAR *)strm->state + ALIGN_UP(size, 8)); +- struct dfltcc_qaf_param *param = +- (struct dfltcc_qaf_param *)&dfltcc_state->param; +- const char *s; + +- /* Initialize available functions */ +- if (is_dfltcc_enabled()) { +- dfltcc(DFLTCC_QAF, param, NULL, NULL, NULL, NULL, NULL); +- memmove(&dfltcc_state->af, param, sizeof(dfltcc_state->af)); +- } else +- memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); ++ memcpy(&dfltcc_state->af, &cpu_af, sizeof(dfltcc_state->af)); + +- if (secure_getenv("SOURCE_DATE_EPOCH")) ++ if (env_source_date_epoch) + /* User needs reproducible results, but the output of DFLTCC_CMPR + * depends on buffers' page offsets. + */ +@@ -706,36 +780,11 @@ void ZLIB_INTERNAL dfltcc_reset(strm, size) + dfltcc_state->param.nt = 1; + + /* Initialize tuning parameters */ +-#ifndef DFLTCC_LEVEL_MASK +-#define DFLTCC_LEVEL_MASK 0x2 +-#endif +- s = secure_getenv("DFLTCC_LEVEL_MASK"); +- dfltcc_state->level_mask = (s && *s) ? strtoul(s, NULL, 0) : +- DFLTCC_LEVEL_MASK; +-#ifndef DFLTCC_BLOCK_SIZE +-#define DFLTCC_BLOCK_SIZE 1048576 +-#endif +- s = secure_getenv("DFLTCC_BLOCK_SIZE"); +- dfltcc_state->block_size = (s && *s) ? strtoul(s, NULL, 0) : +- DFLTCC_BLOCK_SIZE; +-#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE +-#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096 +-#endif +- s = secure_getenv("DFLTCC_FIRST_FHT_BLOCK_SIZE"); +- dfltcc_state->block_threshold = (s && *s) ? strtoul(s, NULL, 0) : +- DFLTCC_FIRST_FHT_BLOCK_SIZE; +-#ifndef DFLTCC_DHT_MIN_SAMPLE_SIZE +-#define DFLTCC_DHT_MIN_SAMPLE_SIZE 4096 +-#endif +- s = secure_getenv("DFLTCC_DHT_MIN_SAMPLE_SIZE"); +- dfltcc_state->dht_threshold = (s && *s) ? strtoul(s, NULL, 0) : +- DFLTCC_DHT_MIN_SAMPLE_SIZE; +-#ifndef DFLTCC_RIBM +-#define DFLTCC_RIBM 0 +-#endif +- s = secure_getenv("DFLTCC_RIBM"); +- dfltcc_state->param.ribm = (s && *s) ? strtoul(s, NULL, 0) : +- DFLTCC_RIBM; ++ dfltcc_state->level_mask = env_level_mask; ++ dfltcc_state->block_size = env_block_size; ++ dfltcc_state->block_threshold = env_block_threshold; ++ dfltcc_state->dht_threshold = env_dht_threshold; ++ dfltcc_state->param.ribm = env_ribm; + } + + voidpf ZLIB_INTERNAL dfltcc_alloc_state(strm, items, size) +@@ -787,22 +836,26 @@ void ZLIB_INTERNAL dfltcc_free_window(strm, w) + + /* + Switching between hardware and software compression. ++ + DFLTCC does not support all zlib settings, e.g. generation of non-compressed + blocks or alternative window sizes. When such settings are applied on the + fly with deflateParams, we need to convert between hardware and software + window formats. + */ +-int ZLIB_INTERNAL dfltcc_deflate_params(strm, level, strategy) ++int ZLIB_INTERNAL dfltcc_deflate_params(strm, level, strategy, flush) + z_streamp strm; + int level; + int strategy; ++ int *flush; + { + deflate_state FAR *state = (deflate_state FAR *)strm->state; + struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); + struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; + int could_deflate = dfltcc_can_deflate(strm); +- int can_deflate = dfltcc_are_params_ok(level, state->w_bits, strategy, +- dfltcc_state->level_mask); ++ int can_deflate = dfltcc_can_deflate_with_params(strm, ++ level, ++ state->w_bits, ++ strategy); + + if (can_deflate == could_deflate) + /* We continue to work in the same mode - no changes needed */ +@@ -812,8 +865,11 @@ int ZLIB_INTERNAL dfltcc_deflate_params(strm, level, strategy) + /* DFLTCC was not used yet - no changes needed */ + return Z_OK; + +- /* Switching between hardware and software is not implemented */ +- return Z_STREAM_ERROR; ++ /* For now, do not convert between window formats - simply get rid of the ++ * old data instead. ++ */ ++ *flush = Z_FULL_FLUSH; ++ return Z_OK; + } + + /* +diff --git a/contrib/s390/dfltcc_deflate.h b/contrib/s390/dfltcc_deflate.h +index a129a91..de36784 100644 +--- a/contrib/s390/dfltcc_deflate.h ++++ b/contrib/s390/dfltcc_deflate.h +@@ -9,7 +9,8 @@ int ZLIB_INTERNAL dfltcc_deflate OF((z_streamp strm, + block_state *result)); + int ZLIB_INTERNAL dfltcc_deflate_params OF((z_streamp strm, + int level, +- int strategy)); ++ int strategy, ++ int *flush)); + int ZLIB_INTERNAL dfltcc_deflate_set_dictionary OF((z_streamp strm, + const Bytef *dictionary, + uInt dict_length)); +@@ -29,11 +30,14 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dictionary OF((z_streamp strm, + } while (0) + #define DEFLATE_RESET_KEEP_HOOK(strm) \ + dfltcc_reset((strm), sizeof(deflate_state)) +-#define DEFLATE_PARAMS_HOOK(strm, level, strategy) \ ++#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \ + do { \ + int err; \ + \ +- err = dfltcc_deflate_params((strm), (level), (strategy)); \ ++ err = dfltcc_deflate_params((strm), \ ++ (level), \ ++ (strategy), \ ++ (hook_flush)); \ + if (err == Z_STREAM_ERROR) \ + return err; \ + } while (0) +diff --git a/deflate.c b/deflate.c +index b17a7dd..a80bd3e 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -74,7 +74,7 @@ const char deflate_copyright[] = + #define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) + #define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) + #define DEFLATE_RESET_KEEP_HOOK(strm) do {} while (0) +-#define DEFLATE_PARAMS_HOOK(strm, level, strategy) do {} while (0) ++#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) do {} while (0) + #define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen) do {} while (0) + #define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) 0 + #define DEFLATE_HOOK(strm, flush, bstate) 0 +@@ -589,6 +589,7 @@ int ZEXPORT deflateParams(strm, level, strategy) + { + deflate_state *s; + compress_func func; ++ int hook_flush = Z_NO_FLUSH; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; +@@ -601,13 +602,14 @@ int ZEXPORT deflateParams(strm, level, strategy) + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } +- DEFLATE_PARAMS_HOOK(strm, level, strategy); ++ DEFLATE_PARAMS_HOOK(strm, level, strategy, &hook_flush); + func = configuration_table[s->level].func; + +- if ((strategy != s->strategy || func != configuration_table[level].func) && +- s->high_water) { ++ if ((strategy != s->strategy || func != configuration_table[level].func || ++ hook_flush != Z_NO_FLUSH) && s->high_water) { + /* Flush the last buffer: */ +- int err = deflate(strm, Z_BLOCK); ++ int err = deflate(strm, RANK(hook_flush) > RANK(Z_BLOCK) ? ++ hook_flush : Z_BLOCK); + if (err == Z_STREAM_ERROR) + return err; + if (strm->avail_out == 0) +@@ -1065,7 +1067,6 @@ int ZEXPORT deflate (strm, flush) + } + + if (flush != Z_FINISH) return Z_OK; +- if (s->wrap <= 0) return Z_STREAM_END; + + /* Write the trailer */ + #ifdef GZIP +@@ -1081,7 +1082,7 @@ int ZEXPORT deflate (strm, flush) + } + else + #endif +- { ++ if (s->wrap == 1) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } +@@ -1090,7 +1091,11 @@ int ZEXPORT deflate (strm, flush) + * to flush the rest. + */ + if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ +- return s->pending != 0 ? Z_OK : Z_STREAM_END; ++ if (s->pending == 0) { ++ Assert(s->bi_valid == 0, "bi_buf not flushed"); ++ return Z_STREAM_END; ++ } ++ return Z_OK; + } + + /* ========================================================================= */ +-- +2.26.0 + diff --git a/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-s390x.patch b/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-s390x.patch new file mode 100644 index 0000000000000000000000000000000000000000..002a0d5afbcc50e0ff1fec0dbb6c08b5fccb7253 --- /dev/null +++ b/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-s390x.patch @@ -0,0 +1,1709 @@ +From 4e65ca20fc242e4a03471558a357d7809adeb9c4 Mon Sep 17 00:00:00 2001 +From: IBM developers +Date: Thu, 1 Aug 2019 09:02:01 +0200 +Subject: [PATCH] Add support for IBM Z hardware-accelerated deflate + +Future versions of IBM Z mainframes will provide DFLTCC instruction, +which implements deflate algorithm in hardware with estimated +compression and decompression performance orders of magnitude faster +than the current zlib and ratio comparable with that of level 1. + +This patch adds DFLTCC support to zlib. In order to enable it, the +following build commands should be used: + + $ CFLAGS=-DDFLTCC ./configure + $ make OBJA=dfltcc.o PIC_OBJA=dfltcc.lo + +When built like this, zlib would compress in hardware on level 1, and in +software on all other levels. Decompression will always happen in +hardware. In order to enable DFLTCC compression for levels 1-6 (i.e. to +make it used by default) one could either add -DDFLTCC_LEVEL_MASK=0x7e +at compile time, or set the environment variable DFLTCC_LEVEL_MASK to +0x7e at run time. + +Two DFLTCC compression calls produce the same results only when they +both are made on machines of the same generation, and when the +respective buffers have the same offset relative to the start of the +page. Therefore care should be taken when using hardware compression +when reproducible results are desired. One such use case - reproducible +software builds - is handled explicitly: when SOURCE_DATE_EPOCH +environment variable is set, the hardware compression is disabled. + +DFLTCC does not support every single zlib feature, in particular: + + * inflate(Z_BLOCK) and inflate(Z_TREES) + * inflateMark() + * inflatePrime() + * deflateParams() after the first deflate() call + +When used, these functions will either switch to software, or, in case +this is not possible, gracefully fail. + +This patch tries to add DFLTCC support in a least intrusive way. +All SystemZ-specific code was placed into a separate file, but +unfortunately there is still a noticeable amount of changes in the +main zlib code. Below is the summary of those changes. + +DFLTCC takes as arguments a parameter block, an input buffer, an output +buffer and a window. Since DFLTCC requires parameter block to be +doubleword-aligned, and it's reasonable to allocate it alongside +deflate and inflate states, ZALLOC_STATE, ZFREE_STATE and ZCOPY_STATE +macros were introduced in order to encapsulate the allocation details. +The same is true for window, for which ZALLOC_WINDOW and +TRY_FREE_WINDOW macros were introduced. + +While for inflate software and hardware window formats match, this is +not the case for deflate. Therefore, deflateSetDictionary and +deflateGetDictionary need special handling, which is triggered using the +new DEFLATE_SET_DICTIONARY_HOOK and DEFLATE_GET_DICTIONARY_HOOK macros. + +deflateResetKeep() and inflateResetKeep() now update the DFLTCC +parameter block, which is allocated alongside zlib state, using +the new DEFLATE_RESET_KEEP_HOOK and INFLATE_RESET_KEEP_HOOK macros. + +In order to make unsupported deflateParams(), inflatePrime() and +inflateMark() calls to fail gracefully, the new DEFLATE_PARAMS_HOOK, +INFLATE_PRIME_HOOK and INFLATE_MARK_HOOK macros were introduced. + +The algorithm implemented in hardware has different compression ratio +than the one implemented in software. In order for deflateBound() to +return the correct results for the hardware implementation, the new +DEFLATE_BOUND_ADJUST_COMPLEN and DEFLATE_NEED_CONSERVATIVE_BOUND macros +were introduced. + +Actual compression and decompression are handled by the new DEFLATE_HOOK +and INFLATE_TYPEDO_HOOK macros. Since inflation with DFLTCC manages the +window on its own, calling updatewindow() is suppressed using the new +INFLATE_NEED_UPDATEWINDOW() macro. + +In addition to compression, DFLTCC computes CRC-32 and Adler-32 +checksums, therefore, whenever it's used, software checksumming needs to +be suppressed using the new DEFLATE_NEED_CHECKSUM and +INFLATE_NEED_CHECKSUM macros. + +DFLTCC will refuse to write an End-of-block Symbol if there is no input +data, thus in some cases it is necessary to do this manually. In order +to achieve this, send_bits, bi_reverse, bi_windup and flush_pending +were promoted from local to ZLIB_INTERNAL. Furthermore, since block and +stream termination must be handled in software as well, block_state enum +was moved to deflate.h. + +Since the first call to dfltcc_inflate already needs the window, and it +might be not allocated yet, inflate_ensure_window was factored out of +updatewindow and made ZLIB_INTERNAL. +--- + Makefile.in | 8 + + configure | 13 + + contrib/README.contrib | 4 + + contrib/s390/dfltcc.c | 901 ++++++++++++++++++++++++++++++++++ + contrib/s390/dfltcc.h | 55 +++ + contrib/s390/dfltcc_deflate.h | 50 ++ + deflate.c | 60 ++- + deflate.h | 12 + + gzguts.h | 4 + + inflate.c | 84 +++- + inflate.h | 2 + + test/infcover.c | 2 +- + test/minigzip.c | 4 + + trees.c | 13 +- + 14 files changed, 1161 insertions(+), 51 deletions(-) + create mode 100644 contrib/s390/dfltcc.c + create mode 100644 contrib/s390/dfltcc.h + create mode 100644 contrib/s390/dfltcc_deflate.h + +diff --git a/Makefile.in b/Makefile.in +index 5a77949..e756e2f 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -143,6 +143,14 @@ match.lo: match.S + mv _match.o match.lo + rm -f _match.s + ++dfltcc.o: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h ++ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/s390/dfltcc.c ++ ++dfltcc.lo: $(SRCDIR)contrib/s390/dfltcc.c $(SRCDIR)zlib.h zconf.h ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/dfltcc.o $(SRCDIR)contrib/s390/dfltcc.c ++ -@mv objs/dfltcc.o $@ ++ + example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c + +diff --git a/configure b/configure +index e974d1f..8fab355 100755 +--- a/configure ++++ b/configure +@@ -826,6 +826,19 @@ EOF + fi + fi + ++# Check whether sys/sdt.h is available ++cat > $test.c << EOF ++#include ++int main() { return 0; } ++EOF ++if try ${CC} ${CFLAGS} $test.c; then ++ echo "Checking for sys/sdt.h ... Yes." | tee -a configure.log ++ CFLAGS="$CFLAGS -DHAVE_SYS_SDT_H" ++ SFLAGS="$SFLAGS -DHAVE_SYS_SDT_H" ++else ++ echo "Checking for sys/sdt.h ... No." | tee -a configure.log ++fi ++ + # show the results in the log + echo >> configure.log + echo ALL = $ALL >> configure.log +diff --git a/contrib/README.contrib b/contrib/README.contrib +index a411d5c..b4d3b18 100644 +--- a/contrib/README.contrib ++++ b/contrib/README.contrib +@@ -67,6 +67,10 @@ puff/ by Mark Adler + Small, low memory usage inflate. Also serves to provide an + unambiguous description of the deflate format. + ++s390/ by Ilya Leoshkevich ++ Hardware-accelerated deflate on IBM Z with DEFLATE CONVERSION CALL ++ instruction. ++ + testzlib/ by Gilles Vollant + Example of the use of zlib + +diff --git a/contrib/s390/dfltcc.c b/contrib/s390/dfltcc.c +new file mode 100644 +index 0000000..d187796 +--- /dev/null ++++ b/contrib/s390/dfltcc.c +@@ -0,0 +1,901 @@ ++/* dfltcc.c - SystemZ DEFLATE CONVERSION CALL support. */ ++ ++/* ++ Use the following commands to build zlib with DFLTCC support: ++ $ CFLAGS=-DDFLTCC ./configure ++ $ make OBJA=dfltcc.o PIC_OBJA=dfltcc.lo ++*/ ++ ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++#include ++#include ++#include "../../zutil.h" ++#include "../../deflate.h" ++#include "../../inftrees.h" ++#include "../../inflate.h" ++#include "dfltcc.h" ++#include "dfltcc_deflate.h" ++#ifdef HAVE_SYS_SDT_H ++#include ++#endif ++ ++/* ++ C wrapper for the DEFLATE CONVERSION CALL instruction. ++ */ ++typedef enum { ++ DFLTCC_CC_OK = 0, ++ DFLTCC_CC_OP1_TOO_SHORT = 1, ++ DFLTCC_CC_OP2_TOO_SHORT = 2, ++ DFLTCC_CC_OP2_CORRUPT = 2, ++ DFLTCC_CC_AGAIN = 3, ++} dfltcc_cc; ++ ++#define DFLTCC_QAF 0 ++#define DFLTCC_GDHT 1 ++#define DFLTCC_CMPR 2 ++#define DFLTCC_XPND 4 ++#define HBT_CIRCULAR (1 << 7) ++#define HB_BITS 15 ++#define HB_SIZE (1 << HB_BITS) ++#define DFLTCC_FACILITY 151 ++ ++local inline dfltcc_cc dfltcc OF((int fn, void *param, ++ Bytef **op1, size_t *len1, ++ z_const Bytef **op2, size_t *len2, ++ void *hist)); ++local inline dfltcc_cc dfltcc(fn, param, op1, len1, op2, len2, hist) ++ int fn; ++ void *param; ++ Bytef **op1; ++ size_t *len1; ++ z_const Bytef **op2; ++ size_t *len2; ++ void *hist; ++{ ++ Bytef *t2 = op1 ? *op1 : NULL; ++ size_t t3 = len1 ? *len1 : 0; ++ z_const Bytef *t4 = op2 ? *op2 : NULL; ++ size_t t5 = len2 ? *len2 : 0; ++ register int r0 __asm__("r0") = fn; ++ register void *r1 __asm__("r1") = param; ++ register Bytef *r2 __asm__("r2") = t2; ++ register size_t r3 __asm__("r3") = t3; ++ register z_const Bytef *r4 __asm__("r4") = t4; ++ register size_t r5 __asm__("r5") = t5; ++ int cc; ++ ++ __asm__ volatile( ++#ifdef HAVE_SYS_SDT_H ++ STAP_PROBE_ASM(zlib, dfltcc_entry, ++ STAP_PROBE_ASM_TEMPLATE(5)) ++#endif ++ ".insn rrf,0xb9390000,%[r2],%[r4],%[hist],0\n" ++#ifdef HAVE_SYS_SDT_H ++ STAP_PROBE_ASM(zlib, dfltcc_exit, ++ STAP_PROBE_ASM_TEMPLATE(5)) ++#endif ++ "ipm %[cc]\n" ++ : [r2] "+r" (r2) ++ , [r3] "+r" (r3) ++ , [r4] "+r" (r4) ++ , [r5] "+r" (r5) ++ , [cc] "=r" (cc) ++ : [r0] "r" (r0) ++ , [r1] "r" (r1) ++ , [hist] "r" (hist) ++#ifdef HAVE_SYS_SDT_H ++ , STAP_PROBE_ASM_OPERANDS(5, r2, r3, r4, r5, hist) ++#endif ++ : "cc", "memory"); ++ t2 = r2; t3 = r3; t4 = r4; t5 = r5; ++ ++ if (op1) ++ *op1 = t2; ++ if (len1) ++ *len1 = t3; ++ if (op2) ++ *op2 = t4; ++ if (len2) ++ *len2 = t5; ++ return (cc >> 28) & 3; ++} ++ ++/* ++ Parameter Block for Query Available Functions. ++ */ ++#define static_assert(c, msg) \ ++ __attribute__((unused)) \ ++ static char static_assert_failed_ ## msg[c ? 1 : -1] ++ ++struct dfltcc_qaf_param { ++ char fns[16]; ++ char reserved1[8]; ++ char fmts[2]; ++ char reserved2[6]; ++}; ++ ++static_assert(sizeof(struct dfltcc_qaf_param) == 32, ++ sizeof_struct_dfltcc_qaf_param_is_32); ++ ++local inline int is_bit_set OF((const char *bits, int n)); ++local inline int is_bit_set(bits, n) ++ const char *bits; ++ int n; ++{ ++ return bits[n / 8] & (1 << (7 - (n % 8))); ++} ++ ++local inline void clear_bit OF((char *bits, int n)); ++local inline void clear_bit(bits, n) ++ char *bits; ++ int n; ++{ ++ bits[n / 8] &= ~(1 << (7 - (n % 8))); ++} ++ ++#define DFLTCC_FMT0 0 ++ ++/* ++ Parameter Block for Generate Dynamic-Huffman Table, Compress and Expand. ++ */ ++#define CVT_CRC32 0 ++#define CVT_ADLER32 1 ++#define HTT_FIXED 0 ++#define HTT_DYNAMIC 1 ++ ++struct dfltcc_param_v0 { ++ uint16_t pbvn; /* Parameter-Block-Version Number */ ++ uint8_t mvn; /* Model-Version Number */ ++ uint8_t ribm; /* Reserved for IBM use */ ++ unsigned reserved32 : 31; ++ unsigned cf : 1; /* Continuation Flag */ ++ uint8_t reserved64[8]; ++ unsigned nt : 1; /* New Task */ ++ unsigned reserved129 : 1; ++ unsigned cvt : 1; /* Check Value Type */ ++ unsigned reserved131 : 1; ++ unsigned htt : 1; /* Huffman-Table Type */ ++ unsigned bcf : 1; /* Block-Continuation Flag */ ++ unsigned bcc : 1; /* Block Closing Control */ ++ unsigned bhf : 1; /* Block Header Final */ ++ unsigned reserved136 : 1; ++ unsigned reserved137 : 1; ++ unsigned dhtgc : 1; /* DHT Generation Control */ ++ unsigned reserved139 : 5; ++ unsigned reserved144 : 5; ++ unsigned sbb : 3; /* Sub-Byte Boundary */ ++ uint8_t oesc; /* Operation-Ending-Supplemental Code */ ++ unsigned reserved160 : 12; ++ unsigned ifs : 4; /* Incomplete-Function Status */ ++ uint16_t ifl; /* Incomplete-Function Length */ ++ uint8_t reserved192[8]; ++ uint8_t reserved256[8]; ++ uint8_t reserved320[4]; ++ uint16_t hl; /* History Length */ ++ unsigned reserved368 : 1; ++ uint16_t ho : 15; /* History Offset */ ++ uint32_t cv; /* Check Value */ ++ unsigned eobs : 15; /* End-of-block Symbol */ ++ unsigned reserved431: 1; ++ uint8_t eobl : 4; /* End-of-block Length */ ++ unsigned reserved436 : 12; ++ unsigned reserved448 : 4; ++ uint16_t cdhtl : 12; /* Compressed-Dynamic-Huffman Table ++ Length */ ++ uint8_t reserved464[6]; ++ uint8_t cdht[288]; ++ uint8_t reserved[32]; ++ uint8_t csb[1152]; ++}; ++ ++static_assert(sizeof(struct dfltcc_param_v0) == 1536, ++ sizeof_struct_dfltcc_param_v0_is_1536); ++ ++local z_const char *oesc_msg OF((char *buf, int oesc)); ++local z_const char *oesc_msg(buf, oesc) ++ char *buf; ++ int oesc; ++{ ++ if (oesc == 0x00) ++ return NULL; /* Successful completion */ ++ else { ++ sprintf(buf, "Operation-Ending-Supplemental Code is 0x%.2X", oesc); ++ return buf; ++ } ++} ++ ++/* ++ Extension of inflate_state and deflate_state. Must be doubleword-aligned. ++*/ ++struct dfltcc_state { ++ struct dfltcc_param_v0 param; /* Parameter block. */ ++ struct dfltcc_qaf_param af; /* Available functions. */ ++ uLong level_mask; /* Levels on which to use DFLTCC */ ++ uLong block_size; /* New block each X bytes */ ++ uLong block_threshold; /* New block after total_in > X */ ++ uLong dht_threshold; /* New block only if avail_in >= X */ ++ char msg[64]; /* Buffer for strm->msg */ ++}; ++ ++#define ALIGN_UP(p, size) \ ++ (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1)) ++ ++#define GET_DFLTCC_STATE(state) ((struct dfltcc_state FAR *)( \ ++ (char FAR *)(state) + ALIGN_UP(sizeof(*state), 8))) ++ ++/* ++ Compress. ++ */ ++local inline int dfltcc_are_params_ok(int level, ++ uInt window_bits, ++ int strategy, ++ uLong level_mask); ++local inline int dfltcc_are_params_ok(level, window_bits, strategy, level_mask) ++ int level; ++ uInt window_bits; ++ int strategy; ++ uLong level_mask; ++{ ++ return (level_mask & (1 << level)) != 0 && ++ (window_bits == HB_BITS) && ++ (strategy == Z_FIXED || strategy == Z_DEFAULT_STRATEGY); ++} ++ ++ ++int ZLIB_INTERNAL dfltcc_can_deflate(strm) ++ z_streamp strm; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ ++ /* Unsupported compression settings */ ++ if (!dfltcc_are_params_ok(state->level, state->w_bits, state->strategy, ++ dfltcc_state->level_mask)) ++ return 0; ++ ++ /* Unsupported hardware */ ++ if (!is_bit_set(dfltcc_state->af.fns, DFLTCC_GDHT) || ++ !is_bit_set(dfltcc_state->af.fns, DFLTCC_CMPR) || ++ !is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0)) ++ return 0; ++ ++ return 1; ++} ++ ++local void dfltcc_gdht OF((z_streamp strm)); ++local void dfltcc_gdht(strm) ++ z_streamp strm; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param; ++ size_t avail_in = avail_in = strm->avail_in; ++ ++ dfltcc(DFLTCC_GDHT, ++ param, NULL, NULL, ++ &strm->next_in, &avail_in, NULL); ++} ++ ++local dfltcc_cc dfltcc_cmpr OF((z_streamp strm)); ++local dfltcc_cc dfltcc_cmpr(strm) ++ z_streamp strm; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param; ++ size_t avail_in = strm->avail_in; ++ size_t avail_out = strm->avail_out; ++ dfltcc_cc cc; ++ ++ cc = dfltcc(DFLTCC_CMPR | HBT_CIRCULAR, ++ param, &strm->next_out, &avail_out, ++ &strm->next_in, &avail_in, state->window); ++ strm->total_in += (strm->avail_in - avail_in); ++ strm->total_out += (strm->avail_out - avail_out); ++ strm->avail_in = avail_in; ++ strm->avail_out = avail_out; ++ return cc; ++} ++ ++local void send_eobs OF((z_streamp strm, ++ z_const struct dfltcc_param_v0 FAR *param)); ++local void send_eobs(strm, param) ++ z_streamp strm; ++ z_const struct dfltcc_param_v0 FAR *param; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ ++ _tr_send_bits( ++ state, ++ bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), ++ param->eobl); ++ flush_pending(strm); ++ if (state->pending != 0) { ++ /* The remaining data is located in pending_out[0:pending]. If someone ++ * calls put_byte() - this might happen in deflate() - the byte will be ++ * placed into pending_buf[pending], which is incorrect. Move the ++ * remaining data to the beginning of pending_buf so that put_byte() is ++ * usable again. ++ */ ++ memmove(state->pending_buf, state->pending_out, state->pending); ++ state->pending_out = state->pending_buf; ++ } ++#ifdef ZLIB_DEBUG ++ state->compressed_len += param->eobl; ++#endif ++} ++ ++int ZLIB_INTERNAL dfltcc_deflate(strm, flush, result) ++ z_streamp strm; ++ int flush; ++ block_state *result; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; ++ uInt masked_avail_in; ++ dfltcc_cc cc; ++ int need_empty_block; ++ int soft_bcc; ++ int no_flush; ++ ++ if (!dfltcc_can_deflate(strm)) ++ return 0; ++ ++again: ++ masked_avail_in = 0; ++ soft_bcc = 0; ++ no_flush = flush == Z_NO_FLUSH; ++ ++ /* Trailing empty block. Switch to software, except when Continuation Flag ++ * is set, which means that DFLTCC has buffered some output in the ++ * parameter block and needs to be called again in order to flush it. ++ */ ++ if (flush == Z_FINISH && strm->avail_in == 0 && !param->cf) { ++ if (param->bcf) { ++ /* A block is still open, and the hardware does not support closing ++ * blocks without adding data. Thus, close it manually. ++ */ ++ send_eobs(strm, param); ++ param->bcf = 0; ++ } ++ return 0; ++ } ++ ++ if (strm->avail_in == 0 && !param->cf) { ++ *result = need_more; ++ return 1; ++ } ++ ++ /* There is an open non-BFINAL block, we are not going to close it just ++ * yet, we have compressed more than DFLTCC_BLOCK_SIZE bytes and we see ++ * more than DFLTCC_DHT_MIN_SAMPLE_SIZE bytes. Open a new block with a new ++ * DHT in order to adapt to a possibly changed input data distribution. ++ */ ++ if (param->bcf && no_flush && ++ strm->total_in > dfltcc_state->block_threshold && ++ strm->avail_in >= dfltcc_state->dht_threshold) { ++ if (param->cf) { ++ /* We need to flush the DFLTCC buffer before writing the ++ * End-of-block Symbol. Mask the input data and proceed as usual. ++ */ ++ masked_avail_in += strm->avail_in; ++ strm->avail_in = 0; ++ no_flush = 0; ++ } else { ++ /* DFLTCC buffer is empty, so we can manually write the ++ * End-of-block Symbol right away. ++ */ ++ send_eobs(strm, param); ++ param->bcf = 0; ++ dfltcc_state->block_threshold = ++ strm->total_in + dfltcc_state->block_size; ++ if (strm->avail_out == 0) { ++ *result = need_more; ++ return 1; ++ } ++ } ++ } ++ ++ /* The caller gave us too much data. Pass only one block worth of ++ * uncompressed data to DFLTCC and mask the rest, so that on the next ++ * iteration we start a new block. ++ */ ++ if (no_flush && strm->avail_in > dfltcc_state->block_size) { ++ masked_avail_in += (strm->avail_in - dfltcc_state->block_size); ++ strm->avail_in = dfltcc_state->block_size; ++ } ++ ++ /* When we have an open non-BFINAL deflate block and caller indicates that ++ * the stream is ending, we need to close an open deflate block and open a ++ * BFINAL one. ++ */ ++ need_empty_block = flush == Z_FINISH && param->bcf && !param->bhf; ++ ++ /* Translate stream to parameter block */ ++ param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32; ++ if (!no_flush) ++ /* We need to close a block. Always do this in software - when there is ++ * no input data, the hardware will not nohor BCC. */ ++ soft_bcc = 1; ++ if (flush == Z_FINISH && !param->bcf) ++ /* We are about to open a BFINAL block, set Block Header Final bit ++ * until the stream ends. ++ */ ++ param->bhf = 1; ++ /* DFLTCC-CMPR will write to next_out, so make sure that buffers with ++ * higher precedence are empty. ++ */ ++ Assert(state->pending == 0, "There must be no pending bytes"); ++ Assert(state->bi_valid < 8, "There must be less than 8 pending bits"); ++ param->sbb = (unsigned int)state->bi_valid; ++ if (param->sbb > 0) ++ *strm->next_out = (Bytef)state->bi_buf; ++ if (param->hl) ++ param->nt = 0; /* Honor history */ ++ param->cv = state->wrap == 2 ? ZSWAP32(strm->adler) : strm->adler; ++ ++ /* When opening a block, choose a Huffman-Table Type */ ++ if (!param->bcf) { ++ if (state->strategy == Z_FIXED || ++ (strm->total_in == 0 && dfltcc_state->block_threshold > 0)) ++ param->htt = HTT_FIXED; ++ else { ++ param->htt = HTT_DYNAMIC; ++ dfltcc_gdht(strm); ++ } ++ } ++ ++ /* Deflate */ ++ do { ++ cc = dfltcc_cmpr(strm); ++ if (strm->avail_in < 4096 && masked_avail_in > 0) ++ /* We are about to call DFLTCC with a small input buffer, which is ++ * inefficient. Since there is masked data, there will be at least ++ * one more DFLTCC call, so skip the current one and make the next ++ * one handle more data. ++ */ ++ break; ++ } while (cc == DFLTCC_CC_AGAIN); ++ ++ /* Translate parameter block to stream */ ++ strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); ++ state->bi_valid = param->sbb; ++ if (state->bi_valid == 0) ++ state->bi_buf = 0; /* Avoid accessing next_out */ ++ else ++ state->bi_buf = *strm->next_out & ((1 << state->bi_valid) - 1); ++ strm->adler = state->wrap == 2 ? ZSWAP32(param->cv) : param->cv; ++ ++ /* Unmask the input data */ ++ strm->avail_in += masked_avail_in; ++ masked_avail_in = 0; ++ ++ /* If we encounter an error, it means there is a bug in DFLTCC call */ ++ Assert(cc != DFLTCC_CC_OP2_CORRUPT || param->oesc == 0, "BUG"); ++ ++ /* Update Block-Continuation Flag. It will be used to check whether to call ++ * GDHT the next time. ++ */ ++ if (cc == DFLTCC_CC_OK) { ++ if (soft_bcc) { ++ send_eobs(strm, param); ++ param->bcf = 0; ++ dfltcc_state->block_threshold = ++ strm->total_in + dfltcc_state->block_size; ++ } else ++ param->bcf = 1; ++ if (flush == Z_FINISH) { ++ if (need_empty_block) ++ /* Make the current deflate() call also close the stream */ ++ return 0; ++ else { ++ bi_windup(state); ++ *result = finish_done; ++ } ++ } else { ++ if (flush == Z_FULL_FLUSH) ++ param->hl = 0; /* Clear history */ ++ *result = flush == Z_NO_FLUSH ? need_more : block_done; ++ } ++ } else { ++ param->bcf = 1; ++ *result = need_more; ++ } ++ if (strm->avail_in != 0 && strm->avail_out != 0) ++ goto again; /* deflate() must use all input or all output */ ++ return 1; ++} ++ ++/* ++ Expand. ++ */ ++int ZLIB_INTERNAL dfltcc_can_inflate(strm) ++ z_streamp strm; ++{ ++ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ ++ /* Unsupported compression settings */ ++ if (state->wbits != HB_BITS) ++ return 0; ++ ++ /* Unsupported hardware */ ++ return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && ++ is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0); ++} ++ ++local dfltcc_cc dfltcc_xpnd OF((z_streamp strm)); ++local dfltcc_cc dfltcc_xpnd(strm) ++ z_streamp strm; ++{ ++ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state; ++ struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param; ++ size_t avail_in = strm->avail_in; ++ size_t avail_out = strm->avail_out; ++ dfltcc_cc cc; ++ ++ cc = dfltcc(DFLTCC_XPND | HBT_CIRCULAR, ++ param, &strm->next_out, &avail_out, ++ &strm->next_in, &avail_in, state->window); ++ strm->avail_in = avail_in; ++ strm->avail_out = avail_out; ++ return cc; ++} ++ ++dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(strm, flush, ret) ++ z_streamp strm; ++ int flush; ++ int *ret; ++{ ++ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; ++ dfltcc_cc cc; ++ ++ if (flush == Z_BLOCK || flush == Z_TREES) { ++ /* DFLTCC does not support stopping on block boundaries */ ++ if (dfltcc_inflate_disable(strm)) { ++ *ret = Z_STREAM_ERROR; ++ return DFLTCC_INFLATE_BREAK; ++ } else ++ return DFLTCC_INFLATE_SOFTWARE; ++ } ++ ++ if (state->last) { ++ if (state->bits != 0) { ++ strm->next_in++; ++ strm->avail_in--; ++ state->bits = 0; ++ } ++ state->mode = CHECK; ++ return DFLTCC_INFLATE_CONTINUE; ++ } ++ ++ if (strm->avail_in == 0 && !param->cf) ++ return DFLTCC_INFLATE_BREAK; ++ ++ if (inflate_ensure_window(state)) { ++ state->mode = MEM; ++ return DFLTCC_INFLATE_CONTINUE; ++ } ++ ++ /* Translate stream to parameter block */ ++ param->cvt = state->flags ? CVT_CRC32 : CVT_ADLER32; ++ param->sbb = state->bits; ++ param->hl = state->whave; /* Software and hardware history formats match */ ++ param->ho = (state->wnext - state->whave) & ((1 << HB_BITS) - 1); ++ if (param->hl) ++ param->nt = 0; /* Honor history for the first block */ ++ param->cv = state->flags ? ZSWAP32(state->check) : state->check; ++ ++ /* Inflate */ ++ do { ++ cc = dfltcc_xpnd(strm); ++ } while (cc == DFLTCC_CC_AGAIN); ++ ++ /* Translate parameter block to stream */ ++ strm->msg = oesc_msg(dfltcc_state->msg, param->oesc); ++ state->last = cc == DFLTCC_CC_OK; ++ state->bits = param->sbb; ++ state->whave = param->hl; ++ state->wnext = (param->ho + param->hl) & ((1 << HB_BITS) - 1); ++ state->check = state->flags ? ZSWAP32(param->cv) : param->cv; ++ if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { ++ /* Report an error if stream is corrupted */ ++ state->mode = BAD; ++ return DFLTCC_INFLATE_CONTINUE; ++ } ++ state->mode = TYPEDO; ++ /* Break if operands are exhausted, otherwise continue looping */ ++ return (cc == DFLTCC_CC_OP1_TOO_SHORT || cc == DFLTCC_CC_OP2_TOO_SHORT) ? ++ DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE; ++} ++ ++int ZLIB_INTERNAL dfltcc_was_inflate_used(strm) ++ z_streamp strm; ++{ ++ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state; ++ struct dfltcc_param_v0 FAR *param = &GET_DFLTCC_STATE(state)->param; ++ ++ return !param->nt; ++} ++ ++int ZLIB_INTERNAL dfltcc_inflate_disable(strm) ++ z_streamp strm; ++{ ++ struct inflate_state FAR *state = (struct inflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ ++ if (!dfltcc_can_inflate(strm)) ++ return 0; ++ if (dfltcc_was_inflate_used(strm)) ++ /* DFLTCC has already decompressed some data. Since there is not ++ * enough information to resume decompression in software, the call ++ * must fail. ++ */ ++ return 1; ++ /* DFLTCC was not used yet - decompress in software */ ++ memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); ++ return 0; ++} ++ ++/* ++ Memory management. ++ DFLTCC requires parameter blocks and window to be aligned. zlib allows ++ users to specify their own allocation functions, so using e.g. ++ `posix_memalign' is not an option. Thus, we overallocate and take the ++ aligned portion of the buffer. ++*/ ++local inline int is_dfltcc_enabled OF((void)); ++local inline int is_dfltcc_enabled(void) ++{ ++ const char *env; ++ uint64_t facilities[(DFLTCC_FACILITY / 64) + 1]; ++ register char r0 __asm__("r0"); ++ ++ env = secure_getenv("DFLTCC"); ++ if (env && !strcmp(env, "0")) ++ /* User has explicitly disabled DFLTCC. */ ++ return 0; ++ ++ memset(facilities, 0, sizeof(facilities)); ++ r0 = sizeof(facilities) / sizeof(facilities[0]) - 1; ++ /* STFLE is supported since z9-109 and only in z/Architecture mode. When ++ * compiling with -m31, gcc defaults to ESA mode, however, since the kernel ++ * is 64-bit, it's always z/Architecture mode at runtime. ++ */ ++ __asm__ volatile(".machinemode push\n" ++ ".machinemode zarch\n" ++ "stfle %[facilities]\n" ++ ".machinemode pop\n" ++ : [facilities] "=Q" (facilities) ++ , [r0] "+r" (r0) ++ : ++ : "cc"); ++ return is_bit_set((const char *)facilities, DFLTCC_FACILITY); ++} ++ ++void ZLIB_INTERNAL dfltcc_reset(strm, size) ++ z_streamp strm; ++ uInt size; ++{ ++ struct dfltcc_state *dfltcc_state = ++ (struct dfltcc_state *)((char FAR *)strm->state + ALIGN_UP(size, 8)); ++ struct dfltcc_qaf_param *param = ++ (struct dfltcc_qaf_param *)&dfltcc_state->param; ++ const char *s; ++ ++ /* Initialize available functions */ ++ if (is_dfltcc_enabled()) { ++ dfltcc(DFLTCC_QAF, param, NULL, NULL, NULL, NULL, NULL); ++ memmove(&dfltcc_state->af, param, sizeof(dfltcc_state->af)); ++ } else ++ memset(&dfltcc_state->af, 0, sizeof(dfltcc_state->af)); ++ ++ if (secure_getenv("SOURCE_DATE_EPOCH")) ++ /* User needs reproducible results, but the output of DFLTCC_CMPR ++ * depends on buffers' page offsets. ++ */ ++ clear_bit(dfltcc_state->af.fns, DFLTCC_CMPR); ++ ++ /* Initialize parameter block */ ++ memset(&dfltcc_state->param, 0, sizeof(dfltcc_state->param)); ++ dfltcc_state->param.nt = 1; ++ ++ /* Initialize tuning parameters */ ++#ifndef DFLTCC_LEVEL_MASK ++#define DFLTCC_LEVEL_MASK 0x2 ++#endif ++ s = secure_getenv("DFLTCC_LEVEL_MASK"); ++ dfltcc_state->level_mask = (s && *s) ? strtoul(s, NULL, 0) : ++ DFLTCC_LEVEL_MASK; ++#ifndef DFLTCC_BLOCK_SIZE ++#define DFLTCC_BLOCK_SIZE 1048576 ++#endif ++ s = secure_getenv("DFLTCC_BLOCK_SIZE"); ++ dfltcc_state->block_size = (s && *s) ? strtoul(s, NULL, 0) : ++ DFLTCC_BLOCK_SIZE; ++#ifndef DFLTCC_FIRST_FHT_BLOCK_SIZE ++#define DFLTCC_FIRST_FHT_BLOCK_SIZE 4096 ++#endif ++ s = secure_getenv("DFLTCC_FIRST_FHT_BLOCK_SIZE"); ++ dfltcc_state->block_threshold = (s && *s) ? strtoul(s, NULL, 0) : ++ DFLTCC_FIRST_FHT_BLOCK_SIZE; ++#ifndef DFLTCC_DHT_MIN_SAMPLE_SIZE ++#define DFLTCC_DHT_MIN_SAMPLE_SIZE 4096 ++#endif ++ s = secure_getenv("DFLTCC_DHT_MIN_SAMPLE_SIZE"); ++ dfltcc_state->dht_threshold = (s && *s) ? strtoul(s, NULL, 0) : ++ DFLTCC_DHT_MIN_SAMPLE_SIZE; ++#ifndef DFLTCC_RIBM ++#define DFLTCC_RIBM 0 ++#endif ++ s = secure_getenv("DFLTCC_RIBM"); ++ dfltcc_state->param.ribm = (s && *s) ? strtoul(s, NULL, 0) : ++ DFLTCC_RIBM; ++} ++ ++voidpf ZLIB_INTERNAL dfltcc_alloc_state(strm, items, size) ++ z_streamp strm; ++ uInt items; ++ uInt size; ++{ ++ return ZALLOC(strm, ++ ALIGN_UP(items * size, 8) + sizeof(struct dfltcc_state), ++ sizeof(unsigned char)); ++} ++ ++void ZLIB_INTERNAL dfltcc_copy_state(dst, src, size) ++ voidpf dst; ++ const voidpf src; ++ uInt size; ++{ ++ zmemcpy(dst, src, ALIGN_UP(size, 8) + sizeof(struct dfltcc_state)); ++} ++ ++static const int PAGE_ALIGN = 0x1000; ++ ++voidpf ZLIB_INTERNAL dfltcc_alloc_window(strm, items, size) ++ z_streamp strm; ++ uInt items; ++ uInt size; ++{ ++ voidpf p, w; ++ ++ /* To simplify freeing, we store the pointer to the allocated buffer right ++ * before the window. ++ */ ++ p = ZALLOC(strm, sizeof(voidpf) + items * size + PAGE_ALIGN, ++ sizeof(unsigned char)); ++ if (p == NULL) ++ return NULL; ++ w = ALIGN_UP((char FAR *)p + sizeof(voidpf), PAGE_ALIGN); ++ *(voidpf *)((char FAR *)w - sizeof(voidpf)) = p; ++ return w; ++} ++ ++void ZLIB_INTERNAL dfltcc_free_window(strm, w) ++ z_streamp strm; ++ voidpf w; ++{ ++ if (w) ++ ZFREE(strm, *(voidpf *)((unsigned char FAR *)w - sizeof(voidpf))); ++} ++ ++/* ++ Switching between hardware and software compression. ++ DFLTCC does not support all zlib settings, e.g. generation of non-compressed ++ blocks or alternative window sizes. When such settings are applied on the ++ fly with deflateParams, we need to convert between hardware and software ++ window formats. ++*/ ++int ZLIB_INTERNAL dfltcc_deflate_params(strm, level, strategy) ++ z_streamp strm; ++ int level; ++ int strategy; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; ++ int could_deflate = dfltcc_can_deflate(strm); ++ int can_deflate = dfltcc_are_params_ok(level, state->w_bits, strategy, ++ dfltcc_state->level_mask); ++ ++ if (can_deflate == could_deflate) ++ /* We continue to work in the same mode - no changes needed */ ++ return Z_OK; ++ ++ if (strm->total_in == 0 && param->nt == 1 && param->hl == 0) ++ /* DFLTCC was not used yet - no changes needed */ ++ return Z_OK; ++ ++ /* Switching between hardware and software is not implemented */ ++ return Z_STREAM_ERROR; ++} ++ ++/* ++ Preloading history. ++*/ ++local void append_history OF((struct dfltcc_param_v0 FAR *param, ++ Bytef *history, ++ const Bytef *buf, ++ uInt count)); ++local void append_history(param, history, buf, count) ++ struct dfltcc_param_v0 FAR *param; ++ Bytef *history; ++ const Bytef *buf; ++ uInt count; ++{ ++ size_t offset; ++ size_t n; ++ ++ /* Do not use more than 32K */ ++ if (count > HB_SIZE) { ++ buf += count - HB_SIZE; ++ count = HB_SIZE; ++ } ++ offset = (param->ho + param->hl) % HB_SIZE; ++ if (offset + count <= HB_SIZE) ++ /* Circular history buffer does not wrap - copy one chunk */ ++ zmemcpy(history + offset, buf, count); ++ else { ++ /* Circular history buffer wraps - copy two chunks */ ++ n = HB_SIZE - offset; ++ zmemcpy(history + offset, buf, n); ++ zmemcpy(history, buf + n, count - n); ++ } ++ n = param->hl + count; ++ if (n <= HB_SIZE) ++ /* All history fits into buffer - no need to discard anything */ ++ param->hl = n; ++ else { ++ /* History does not fit into buffer - discard extra bytes */ ++ param->ho = (param->ho + (n - HB_SIZE)) % HB_SIZE; ++ param->hl = HB_SIZE; ++ } ++} ++ ++int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(strm, dictionary, dict_length) ++ z_streamp strm; ++ const Bytef *dictionary; ++ uInt dict_length; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; ++ ++ append_history(param, state->window, dictionary, dict_length); ++ state->strstart = 1; /* Add FDICT to zlib header */ ++ return Z_OK; ++} ++ ++int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(strm, dictionary, dict_length) ++ z_streamp strm; ++ Bytef *dictionary; ++ uInt *dict_length; ++{ ++ deflate_state FAR *state = (deflate_state FAR *)strm->state; ++ struct dfltcc_state FAR *dfltcc_state = GET_DFLTCC_STATE(state); ++ struct dfltcc_param_v0 FAR *param = &dfltcc_state->param; ++ ++ if (dictionary) { ++ if (param->ho + param->hl <= HB_SIZE) ++ /* Circular history buffer does not wrap - copy one chunk */ ++ zmemcpy(dictionary, state->window + param->ho, param->hl); ++ else { ++ /* Circular history buffer wraps - copy two chunks */ ++ zmemcpy(dictionary, ++ state->window + param->ho, ++ HB_SIZE - param->ho); ++ zmemcpy(dictionary + HB_SIZE - param->ho, ++ state->window, ++ param->ho + param->hl - HB_SIZE); ++ } ++ } ++ if (dict_length) ++ *dict_length = param->hl; ++ return Z_OK; ++} +\ No newline at end of file +diff --git a/contrib/s390/dfltcc.h b/contrib/s390/dfltcc.h +new file mode 100644 +index 0000000..574e84c +--- /dev/null ++++ b/contrib/s390/dfltcc.h +@@ -0,0 +1,55 @@ ++#ifndef DFLTCC_H ++#define DFLTCC_H ++ ++#include "../../zlib.h" ++#include "../../zutil.h" ++ ++voidpf ZLIB_INTERNAL dfltcc_alloc_state OF((z_streamp strm, uInt items, ++ uInt size)); ++void ZLIB_INTERNAL dfltcc_copy_state OF((voidpf dst, const voidpf src, ++ uInt size)); ++void ZLIB_INTERNAL dfltcc_reset OF((z_streamp strm, uInt size)); ++voidpf ZLIB_INTERNAL dfltcc_alloc_window OF((z_streamp strm, uInt items, ++ uInt size)); ++void ZLIB_INTERNAL dfltcc_free_window OF((z_streamp strm, voidpf w)); ++int ZLIB_INTERNAL dfltcc_can_inflate OF((z_streamp strm)); ++typedef enum { ++ DFLTCC_INFLATE_CONTINUE, ++ DFLTCC_INFLATE_BREAK, ++ DFLTCC_INFLATE_SOFTWARE, ++} dfltcc_inflate_action; ++dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate OF((z_streamp strm, ++ int flush, int *ret)); ++int ZLIB_INTERNAL dfltcc_was_inflate_used OF((z_streamp strm)); ++int ZLIB_INTERNAL dfltcc_inflate_disable OF((z_streamp strm)); ++ ++#define ZALLOC_STATE dfltcc_alloc_state ++#define ZFREE_STATE ZFREE ++#define ZCOPY_STATE dfltcc_copy_state ++#define ZALLOC_WINDOW dfltcc_alloc_window ++#define ZFREE_WINDOW dfltcc_free_window ++#define TRY_FREE_WINDOW dfltcc_free_window ++#define INFLATE_RESET_KEEP_HOOK(strm) \ ++ dfltcc_reset((strm), sizeof(struct inflate_state)) ++#define INFLATE_PRIME_HOOK(strm, bits, value) \ ++ do { if (dfltcc_inflate_disable((strm))) return Z_STREAM_ERROR; } while (0) ++#define INFLATE_TYPEDO_HOOK(strm, flush) \ ++ if (dfltcc_can_inflate((strm))) { \ ++ dfltcc_inflate_action action; \ ++\ ++ RESTORE(); \ ++ action = dfltcc_inflate((strm), (flush), &ret); \ ++ LOAD(); \ ++ if (action == DFLTCC_INFLATE_CONTINUE) \ ++ break; \ ++ else if (action == DFLTCC_INFLATE_BREAK) \ ++ goto inf_leave; \ ++ } ++#define INFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_inflate((strm))) ++#define INFLATE_NEED_UPDATEWINDOW(strm) (!dfltcc_can_inflate((strm))) ++#define INFLATE_MARK_HOOK(strm) \ ++ do { \ ++ if (dfltcc_was_inflate_used((strm))) return -(1L << 16); \ ++ } while (0) ++ ++#endif +\ No newline at end of file +diff --git a/contrib/s390/dfltcc_deflate.h b/contrib/s390/dfltcc_deflate.h +new file mode 100644 +index 0000000..a129a91 +--- /dev/null ++++ b/contrib/s390/dfltcc_deflate.h +@@ -0,0 +1,50 @@ ++#ifndef DFLTCC_DEFLATE_H ++#define DFLTCC_DEFLATE_H ++ ++#include "dfltcc.h" ++ ++int ZLIB_INTERNAL dfltcc_can_deflate OF((z_streamp strm)); ++int ZLIB_INTERNAL dfltcc_deflate OF((z_streamp strm, ++ int flush, ++ block_state *result)); ++int ZLIB_INTERNAL dfltcc_deflate_params OF((z_streamp strm, ++ int level, ++ int strategy)); ++int ZLIB_INTERNAL dfltcc_deflate_set_dictionary OF((z_streamp strm, ++ const Bytef *dictionary, ++ uInt dict_length)); ++int ZLIB_INTERNAL dfltcc_deflate_get_dictionary OF((z_streamp strm, ++ Bytef *dictionary, ++ uInt* dict_length)); ++ ++#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \ ++ do { \ ++ if (dfltcc_can_deflate((strm))) \ ++ return dfltcc_deflate_set_dictionary((strm), (dict), (dict_len)); \ ++ } while (0) ++#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) \ ++ do { \ ++ if (dfltcc_can_deflate((strm))) \ ++ return dfltcc_deflate_get_dictionary((strm), (dict), (dict_len)); \ ++ } while (0) ++#define DEFLATE_RESET_KEEP_HOOK(strm) \ ++ dfltcc_reset((strm), sizeof(deflate_state)) ++#define DEFLATE_PARAMS_HOOK(strm, level, strategy) \ ++ do { \ ++ int err; \ ++\ ++ err = dfltcc_deflate_params((strm), (level), (strategy)); \ ++ if (err == Z_STREAM_ERROR) \ ++ return err; \ ++ } while (0) ++#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \ ++ do { \ ++ if (dfltcc_can_deflate((strm))) \ ++ (complen) = (3 + 5 + 5 + 4 + 19 * 3 + (286 + 30) * 7 + \ ++ (source_len) * 16 + 15 + 7) >> 3; \ ++ } while (0) ++#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) (dfltcc_can_deflate((strm))) ++#define DEFLATE_HOOK dfltcc_deflate ++#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm))) ++ ++#endif +\ No newline at end of file +diff --git a/deflate.c b/deflate.c +index 1ec7614..089285a 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -61,15 +61,29 @@ const char deflate_copyright[] = + */ + + /* =========================================================================== +- * Function prototypes. ++ * Architecture-specific bits. + */ +-typedef enum { +- need_more, /* block not completed, need more input or more output */ +- block_done, /* block flush performed */ +- finish_started, /* finish started, need only more output at next deflate */ +- finish_done /* finish done, accept no more input or output */ +-} block_state; ++#ifdef DFLTCC ++# include "contrib/s390/dfltcc_deflate.h" ++#else ++#define ZALLOC_STATE ZALLOC ++#define ZFREE_STATE ZFREE ++#define ZCOPY_STATE zmemcpy ++#define ZALLOC_WINDOW ZALLOC ++#define TRY_FREE_WINDOW TRY_FREE ++#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) ++#define DEFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0) ++#define DEFLATE_RESET_KEEP_HOOK(strm) do {} while (0) ++#define DEFLATE_PARAMS_HOOK(strm, level, strategy) do {} while (0) ++#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen) do {} while (0) ++#define DEFLATE_NEED_CONSERVATIVE_BOUND(strm) 0 ++#define DEFLATE_HOOK(strm, flush, bstate) 0 ++#define DEFLATE_NEED_CHECKSUM(strm) 1 ++#endif + ++/* =========================================================================== ++ * Function prototypes. ++ */ + typedef block_state (*compress_func) OF((deflate_state *s, int flush)); + /* Compression function. Returns the block state after the call. */ + +@@ -85,7 +99,6 @@ local block_state deflate_rle OF((deflate_state *s, int flush)); + local block_state deflate_huff OF((deflate_state *s, int flush)); + local void lm_init OF((deflate_state *s)); + local void putShortMSB OF((deflate_state *s, uInt b)); +-local void flush_pending OF((z_streamp strm)); + local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); + #ifdef ASMV + # pragma message("Assembler code may have bugs -- use at your own risk") +@@ -301,7 +314,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + return Z_STREAM_ERROR; + } + if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ +- s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); ++ s = (deflate_state *) ZALLOC_STATE(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; +@@ -318,7 +331,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + +- s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); ++ s->window = (Bytef *) ZALLOC_WINDOW(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + +@@ -394,6 +407,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) + /* when using zlib wrappers, compute Adler-32 for provided dictionary */ + if (wrap == 1) + strm->adler = adler32(strm->adler, dictionary, dictLength); ++ DEFLATE_SET_DICTIONARY_HOOK(strm, dictionary, dictLength); + s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + + /* if dictionary would fill window, just replace the history */ +@@ -452,6 +466,7 @@ int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength) + + if (deflateStateCheck(strm)) + return Z_STREAM_ERROR; ++ DEFLATE_GET_DICTIONARY_HOOK(strm, dictionary, dictLength); + s = strm->state; + len = s->strstart + s->lookahead; + if (len > s->w_size) +@@ -498,6 +513,8 @@ int ZEXPORT deflateResetKeep (strm) + + _tr_init(s); + ++ DEFLATE_RESET_KEEP_HOOK(strm); ++ + return Z_OK; + } + +@@ -584,6 +601,7 @@ int ZEXPORT deflateParams(strm, level, strategy) + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } ++ DEFLATE_PARAMS_HOOK(strm, level, strategy); + func = configuration_table[s->level].func; + + if ((strategy != s->strategy || func != configuration_table[level].func) && +@@ -659,6 +677,7 @@ uLong ZEXPORT deflateBound(strm, sourceLen) + /* conservative upper bound for compressed data */ + complen = sourceLen + + ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5; ++ DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, sourceLen); + + /* if can't get parameters, return conservative bound plus zlib wrapper */ + if (deflateStateCheck(strm)) +@@ -700,7 +719,8 @@ uLong ZEXPORT deflateBound(strm, sourceLen) + } + + /* if not default parameters, return conservative bound */ +- if (s->w_bits != 15 || s->hash_bits != 8 + 7) ++ if (DEFLATE_NEED_CONSERVATIVE_BOUND(strm) || ++ s->w_bits != 15 || s->hash_bits != 8 + 7) + return complen + wraplen; + + /* default settings: return tight bound for that case */ +@@ -727,7 +747,7 @@ local void putShortMSB (s, b) + * applications may wish to modify it to avoid allocating a large + * strm->next_out buffer and copying into it. (See also read_buf()). + */ +-local void flush_pending(strm) ++void ZLIB_INTERNAL flush_pending(strm) + z_streamp strm; + { + unsigned len; +@@ -997,7 +1017,8 @@ int ZEXPORT deflate (strm, flush) + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + +- bstate = s->level == 0 ? deflate_stored(s, flush) : ++ bstate = DEFLATE_HOOK(strm, flush, &bstate) ? bstate : ++ s->level == 0 ? deflate_stored(s, flush) : + s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : + s->strategy == Z_RLE ? deflate_rle(s, flush) : + (*(configuration_table[s->level].func))(s, flush); +@@ -1086,9 +1107,9 @@ int ZEXPORT deflateEnd (strm) + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); +- TRY_FREE(strm, strm->state->window); ++ TRY_FREE_WINDOW(strm, strm->state->window); + +- ZFREE(strm, strm->state); ++ ZFREE_STATE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +@@ -1119,13 +1140,13 @@ int ZEXPORT deflateCopy (dest, source) + + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + +- ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); ++ ds = (deflate_state *) ZALLOC_STATE(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; +- zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); ++ ZCOPY_STATE((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); + ds->strm = dest; + +- ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); ++ ds->window = (Bytef *) ZALLOC_WINDOW(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); +@@ -1174,7 +1195,8 @@ local unsigned read_buf(strm, buf, size) + strm->avail_in -= len; + + zmemcpy(buf, strm->next_in, len); +- if (strm->state->wrap == 1) { ++ if (!DEFLATE_NEED_CHECKSUM(strm)) {} ++ else if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, buf, len); + } + #ifdef GZIP +diff --git a/deflate.h b/deflate.h +index 23ecdd3..821a4b9 100644 +--- a/deflate.h ++++ b/deflate.h +@@ -304,6 +304,7 @@ void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s)); + void ZLIB_INTERNAL _tr_align OF((deflate_state *s)); + void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + ulg stored_len, int last)); ++void ZLIB_INTERNAL _tr_send_bits OF((deflate_state *s, int value, int length)); + + #define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +@@ -346,4 +347,15 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + flush = _tr_tally(s, distance, length) + #endif + ++typedef enum { ++ need_more, /* block not completed, need more input or more output */ ++ block_done, /* block flush performed */ ++ finish_started, /* finish started, need only more output at next deflate */ ++ finish_done /* finish done, accept no more input or output */ ++} block_state; ++ ++unsigned ZLIB_INTERNAL bi_reverse OF((unsigned code, int len)); ++void ZLIB_INTERNAL bi_windup OF((deflate_state *s)); ++void ZLIB_INTERNAL flush_pending OF((z_streamp strm)); ++ + #endif /* DEFLATE_H */ +diff --git a/gzguts.h b/gzguts.h +index 990a4d2..3218395 100644 +--- a/gzguts.h ++++ b/gzguts.h +@@ -153,7 +153,11 @@ + + /* default i/o buffer size -- double this for output when reading (this and + twice this must be able to fit in an unsigned type) */ ++#ifdef DFLTCC ++#define GZBUFSIZE 131072 ++#else + #define GZBUFSIZE 8192 ++#endif + + /* gzip modes, also provide a little integrity check on the passed structure */ + #define GZ_NONE 0 +diff --git a/inflate.c b/inflate.c +index ac333e8..f77c2ae 100644 +--- a/inflate.c ++++ b/inflate.c +@@ -85,6 +85,23 @@ + #include "inflate.h" + #include "inffast.h" + ++/* architecture-specific bits */ ++#ifdef DFLTCC ++# include "contrib/s390/dfltcc.h" ++#else ++#define ZALLOC_STATE ZALLOC ++#define ZFREE_STATE ZFREE ++#define ZCOPY_STATE zmemcpy ++#define ZALLOC_WINDOW ZALLOC ++#define ZFREE_WINDOW ZFREE ++#define INFLATE_RESET_KEEP_HOOK(strm) do {} while (0) ++#define INFLATE_PRIME_HOOK(strm, bits, value) do {} while (0) ++#define INFLATE_TYPEDO_HOOK(strm, flush) do {} while (0) ++#define INFLATE_NEED_CHECKSUM(strm) 1 ++#define INFLATE_NEED_UPDATEWINDOW(strm) 1 ++#define INFLATE_MARK_HOOK(strm) do {} while (0) ++#endif ++ + #ifdef MAKEFIXED + # ifndef BUILDFIXED + # define BUILDFIXED +@@ -137,6 +154,7 @@ z_streamp strm; + state->lencode = state->distcode = state->next = state->codes; + state->sane = 1; + state->back = -1; ++ INFLATE_RESET_KEEP_HOOK(strm); + Tracev((stderr, "inflate: reset\n")); + return Z_OK; + } +@@ -182,7 +200,7 @@ int windowBits; + if (windowBits && (windowBits < 8 || windowBits > 15)) + return Z_STREAM_ERROR; + if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { +- ZFREE(strm, state->window); ++ ZFREE_WINDOW(strm, state->window); + state->window = Z_NULL; + } + +@@ -221,7 +239,7 @@ int stream_size; + strm->zfree = zcfree; + #endif + state = (struct inflate_state FAR *) +- ZALLOC(strm, 1, sizeof(struct inflate_state)); ++ ZALLOC_STATE(strm, 1, sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; +@@ -230,7 +248,7 @@ int stream_size; + state->mode = HEAD; /* to pass state test in inflateReset2() */ + ret = inflateReset2(strm, windowBits); + if (ret != Z_OK) { +- ZFREE(strm, state); ++ ZFREE_STATE(strm, state); + strm->state = Z_NULL; + } + return ret; +@@ -252,6 +270,7 @@ int value; + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; ++ INFLATE_PRIME_HOOK(strm, bits, value); + state = (struct inflate_state FAR *)strm->state; + if (bits < 0) { + state->hold = 0; +@@ -379,6 +398,27 @@ void makefixed() + } + #endif /* MAKEFIXED */ + ++int ZLIB_INTERNAL inflate_ensure_window(state) ++ struct inflate_state *state; ++{ ++ /* if it hasn't been done already, allocate space for the window */ ++ if (state->window == Z_NULL) { ++ state->window = (unsigned char FAR *) ++ ZALLOC_WINDOW(state->strm, 1U << state->wbits, ++ sizeof(unsigned char)); ++ if (state->window == Z_NULL) return 1; ++ } ++ ++ /* if window not in use yet, initialize */ ++ if (state->wsize == 0) { ++ state->wsize = 1U << state->wbits; ++ state->wnext = 0; ++ state->whave = 0; ++ } ++ ++ return 0; ++} ++ + /* + Update the window with the last wsize (normally 32K) bytes written before + returning. If window does not exist yet, create it. This is only called +@@ -403,20 +443,7 @@ unsigned copy; + + state = (struct inflate_state FAR *)strm->state; + +- /* if it hasn't been done already, allocate space for the window */ +- if (state->window == Z_NULL) { +- state->window = (unsigned char FAR *) +- ZALLOC(strm, 1U << state->wbits, +- sizeof(unsigned char)); +- if (state->window == Z_NULL) return 1; +- } +- +- /* if window not in use yet, initialize */ +- if (state->wsize == 0) { +- state->wsize = 1U << state->wbits; +- state->wnext = 0; +- state->whave = 0; +- } ++ if (inflate_ensure_window(state)) return 1; + + /* copy state->wsize or less output bytes into the circular window */ + if (copy >= state->wsize) { +@@ -849,6 +876,7 @@ int flush; + case TYPE: + if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + case TYPEDO: ++ INFLATE_TYPEDO_HOOK(strm, flush); + if (state->last) { + BYTEBITS(); + state->mode = CHECK; +@@ -1200,7 +1228,7 @@ int flush; + out -= left; + strm->total_out += out; + state->total += out; +- if ((state->wrap & 4) && out) ++ if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out) + strm->adler = state->check = + UPDATE(state->check, put - out, out); + out = left; +@@ -1252,8 +1280,9 @@ int flush; + */ + inf_leave: + RESTORE(); +- if (state->wsize || (out != strm->avail_out && state->mode < BAD && +- (state->mode < CHECK || flush != Z_FINISH))) ++ if (INFLATE_NEED_UPDATEWINDOW(strm) && ++ (state->wsize || (out != strm->avail_out && state->mode < BAD && ++ (state->mode < CHECK || flush != Z_FINISH)))) + if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { + state->mode = MEM; + return Z_MEM_ERROR; +@@ -1263,7 +1292,7 @@ int flush; + strm->total_in += in; + strm->total_out += out; + state->total += out; +- if ((state->wrap & 4) && out) ++ if (INFLATE_NEED_CHECKSUM(strm) && (state->wrap & 4) && out) + strm->adler = state->check = + UPDATE(state->check, strm->next_out - out, out); + strm->data_type = (int)state->bits + (state->last ? 64 : 0) + +@@ -1281,8 +1310,8 @@ z_streamp strm; + if (inflateStateCheck(strm)) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; +- if (state->window != Z_NULL) ZFREE(strm, state->window); +- ZFREE(strm, strm->state); ++ if (state->window != Z_NULL) ZFREE_WINDOW(strm, state->window); ++ ZFREE_STATE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +@@ -1474,21 +1503,21 @@ z_streamp source; + + /* allocate space */ + copy = (struct inflate_state FAR *) +- ZALLOC(source, 1, sizeof(struct inflate_state)); ++ ZALLOC_STATE(source, 1, sizeof(struct inflate_state)); + if (copy == Z_NULL) return Z_MEM_ERROR; + window = Z_NULL; + if (state->window != Z_NULL) { + window = (unsigned char FAR *) +- ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); ++ ZALLOC_WINDOW(source, 1U << state->wbits, sizeof(unsigned char)); + if (window == Z_NULL) { +- ZFREE(source, copy); ++ ZFREE_STATE(source, copy); + return Z_MEM_ERROR; + } + } + + /* copy state */ + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); +- zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); ++ ZCOPY_STATE((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); + copy->strm = dest; + if (state->lencode >= state->codes && + state->lencode <= state->codes + ENOUGH - 1) { +@@ -1545,6 +1574,7 @@ z_streamp strm; + + if (inflateStateCheck(strm)) + return -(1L << 16); ++ INFLATE_MARK_HOOK(strm); + state = (struct inflate_state FAR *)strm->state; + return (long)(((unsigned long)((long)state->back)) << 16) + + (state->mode == COPY ? state->length : +diff --git a/inflate.h b/inflate.h +index a46cce6..7b19617 100644 +--- a/inflate.h ++++ b/inflate.h +@@ -123,3 +123,5 @@ struct inflate_state { + int back; /* bits back of last unprocessed length/lit */ + unsigned was; /* initial length of match */ + }; ++ ++int ZLIB_INTERNAL inflate_ensure_window OF((struct inflate_state *state)); +diff --git a/test/infcover.c b/test/infcover.c +index 2be0164..a34cd17 100644 +--- a/test/infcover.c ++++ b/test/infcover.c +@@ -444,7 +444,7 @@ local void cover_wrap(void) + } + + /* input and output functions for inflateBack() */ +-local unsigned pull(void *desc, unsigned char **buf) ++local unsigned pull(void *desc, z_const unsigned char **buf) + { + static unsigned int next = 0; + static unsigned char dat[] = {0x63, 0, 2, 0}; +diff --git a/test/minigzip.c b/test/minigzip.c +index e22fb08..4b5f4ef 100644 +--- a/test/minigzip.c ++++ b/test/minigzip.c +@@ -132,7 +132,11 @@ static void pwinerror (s) + #endif + #define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1) + ++#ifdef DFLTCC ++#define BUFLEN 262144 ++#else + #define BUFLEN 16384 ++#endif + #define MAX_NAME_LEN 1024 + + #ifdef MAXSEG_64K +diff --git a/trees.c b/trees.c +index 50cf4b4..ad51207 100644 +--- a/trees.c ++++ b/trees.c +@@ -149,8 +149,6 @@ local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + local void compress_block OF((deflate_state *s, const ct_data *ltree, + const ct_data *dtree)); + local int detect_data_type OF((deflate_state *s)); +-local unsigned bi_reverse OF((unsigned value, int length)); +-local void bi_windup OF((deflate_state *s)); + local void bi_flush OF((deflate_state *s)); + + #ifdef GEN_TREES_H +@@ -223,6 +221,13 @@ local void send_bits(s, value, length) + } + #endif /* ZLIB_DEBUG */ + ++void ZLIB_INTERNAL _tr_send_bits(s, value, length) ++ deflate_state *s; ++ int value; ++ int length; ++{ ++ send_bits(s, value, length); ++} + + /* the arguments must not have side effects */ + +@@ -1155,7 +1160,7 @@ local int detect_data_type(s) + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +-local unsigned bi_reverse(code, len) ++unsigned ZLIB_INTERNAL bi_reverse(code, len) + unsigned code; /* the value to invert */ + int len; /* its bit length */ + { +@@ -1187,7 +1192,7 @@ local void bi_flush(s) + /* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +-local void bi_windup(s) ++void ZLIB_INTERNAL bi_windup(s) + deflate_state *s; + { + if (s->bi_valid > 8) { +-- +2.19.1 + diff --git a/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-strm-adler-fix.patch b/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-strm-adler-fix.patch new file mode 100644 index 0000000000000000000000000000000000000000..8f70f9b24397bdcd4068ad7ae1036da48efbead7 --- /dev/null +++ b/zlib-1.2.11-IBM-Z-hw-accelrated-deflate-strm-adler-fix.patch @@ -0,0 +1,11 @@ +--- a/contrib/s390/dfltcc.c ++++ b/contrib/s390/dfltcc.c +@@ -623,7 +623,7 @@ + state->bits = param->sbb; + state->whave = param->hl; + state->wnext = (param->ho + param->hl) & ((1 << HB_BITS) - 1); +- state->check = state->flags ? ZSWAP32(param->cv) : param->cv; ++ strm->adler = state->check = state->flags ? ZSWAP32(param->cv) : param->cv; + if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) { + /* Report an error if stream is corrupted */ + state->mode = BAD; diff --git a/zlib-1.2.11-covscan-issues.patch b/zlib-1.2.11-covscan-issues.patch new file mode 100644 index 0000000000000000000000000000000000000000..28e8604dfdcc58fd534d150149c7c40539a8882d --- /dev/null +++ b/zlib-1.2.11-covscan-issues.patch @@ -0,0 +1,74 @@ +From f776e1609cc63bf486634ee9bc6226dac2c0d2f3 Mon Sep 17 00:00:00 2001 +From: Ondrej Dubaj +Date: Tue, 15 Oct 2019 11:27:15 +0200 +Subject: [PATCH] fixed covscan issues + +--- + crc32.c | 2 +- + deflate.c | 2 +- + test/crc32_test.c | 8 ++++---- + 3 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/crc32.c b/crc32.c +index 406d350..34132ea 100644 +--- a/crc32.c ++++ b/crc32.c +@@ -302,7 +302,7 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) + + if (!crc32_func) + crc32_func = crc32_z_ifunc(); +- return (*crc32_func)(crc, buf, len); ++ return (*crc32_func)(crc, buf, len); + } + + #endif /* defined(Z_IFUNC_ASM) || defined(Z_IFUNC_NATIVE) */ +diff --git a/deflate.c b/deflate.c +index 089285a..9b09718 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -1015,7 +1015,7 @@ int ZEXPORT deflate (strm, flush) + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { +- block_state bstate; ++ block_state bstate = 0; + + bstate = DEFLATE_HOOK(strm, flush, &bstate) ? bstate : + s->level == 0 ? deflate_stored(s, flush) : +diff --git a/test/crc32_test.c b/test/crc32_test.c +index 5d73128..2d2a6c7 100644 +--- a/test/crc32_test.c ++++ b/test/crc32_test.c +@@ -11,25 +11,25 @@ + # include + #endif + +-void test_crc32 OF((uLong crc, Byte* buf, z_size_t len, uLong chk, int line)); ++void test_crc32 OF((uLong crc, char* buf, z_size_t len, uLong chk, int line)); + int main OF((void)); + + typedef struct { + int line; + uLong crc; +- Byte* buf; ++ char* buf; + int len; + uLong expect; + } crc32_test; + + void test_crc32(crc, buf, len, chk, line) + uLong crc; +- Byte *buf; ++ char *buf; + z_size_t len; + uLong chk; + int line; + { +- uLong res = crc32(crc, buf, len); ++ uLong res = crc32(crc, (Bytef *) buf, len); + if (res != chk) { + fprintf(stderr, "FAIL [%d]: crc32 returned 0x%08X expected 0x%08X\n", + line, (unsigned int)res, (unsigned int)chk); +-- +2.19.1 + diff --git a/zlib-1.2.11-firefox-crash-fix.patch b/zlib-1.2.11-firefox-crash-fix.patch new file mode 100644 index 0000000000000000000000000000000000000000..27068bbfee1224c0017b23b6c715d5222e2f3576 --- /dev/null +++ b/zlib-1.2.11-firefox-crash-fix.patch @@ -0,0 +1,365 @@ +From 27a84de4a30cd35f8565937397f6d1205b912818 Mon Sep 17 00:00:00 2001 +From: Ondrej Dubaj +Date: Thu, 5 Sep 2019 09:16:35 +0200 +Subject: [PATCH 1/2] fix: power8 crc32 - return 0 with 0 ptr passed + +--- + contrib/power8-crc/vec_crc32.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c +index bb2204b..5ce9cd2 100644 +--- a/contrib/power8-crc/vec_crc32.c ++++ b/contrib/power8-crc/vec_crc32.c +@@ -74,6 +74,7 @@ unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p, + unsigned int prealign; + unsigned int tail; + ++ if (p == (const unsigned char *) 0x0) return 0; + #ifdef CRC_XOR + crc ^= 0xffffffff; + #endif +-- +2.19.1 + + +From c066ac92982a2ffe5b1e9bd36000058927437bd5 Mon Sep 17 00:00:00 2001 +From: Ondrej Dubaj +Date: Thu, 5 Sep 2019 09:36:47 +0200 +Subject: [PATCH 2/2] Add CRC32 tests (crc32_test) + +This commit includes a CRC32 test (crc32_test). This tests are important +since some architectures may want include CPU dependent optimizations for +CRC32 algorithm like using vector instructions and we may want to +validate those. +--- + Makefile.in | 35 +++++--- + test/crc32_test.c | 205 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 230 insertions(+), 10 deletions(-) + create mode 100644 test/crc32_test.c + +diff --git a/Makefile.in b/Makefile.in +index 40b5cfb..6070dcc 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -75,11 +75,11 @@ PIC_OBJS = $(PIC_OBJC) $(PIC_OBJA) + + all: static shared + +-static: example$(EXE) minigzip$(EXE) ++static: crc32_test$(EXE) example$(EXE) minigzip$(EXE) + +-shared: examplesh$(EXE) minigzipsh$(EXE) ++shared: crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) + +-all64: example64$(EXE) minigzip64$(EXE) ++all64: crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE) + + check: test + +@@ -87,7 +87,7 @@ test: all teststatic testshared + + teststatic: static + @TMPST=tmpst_$$; \ +- if echo hello world | ./minigzip | ./minigzip -d && ./example $$TMPST ; then \ ++ if echo hello world | ./minigzip | ./minigzip -d && ./example $$TMPST && ./crc32_test; then \ + echo ' *** zlib test OK ***'; \ + else \ + echo ' *** zlib test FAILED ***'; false; \ +@@ -100,7 +100,7 @@ testshared: shared + DYLD_LIBRARY_PATH=`pwd`:$(DYLD_LIBRARY_PATH) ; export DYLD_LIBRARY_PATH; \ + SHLIB_PATH=`pwd`:$(SHLIB_PATH) ; export SHLIB_PATH; \ + TMPSH=tmpsh_$$; \ +- if echo hello world | ./minigzipsh | ./minigzipsh -d && ./examplesh $$TMPSH; then \ ++ if echo hello world | ./minigzipsh | ./minigzipsh -d && ./examplesh $$TMPSH && ./crc32_testsh; then \ + echo ' *** zlib shared test OK ***'; \ + else \ + echo ' *** zlib shared test FAILED ***'; false; \ +@@ -109,7 +109,7 @@ testshared: shared + + test64: all64 + @TMP64=tmp64_$$; \ +- if echo hello world | ./minigzip64 | ./minigzip64 -d && ./example64 $$TMP64; then \ ++ if echo hello world | ./minigzip64 | ./minigzip64 -d && ./example64 $$TMP64 && ./crc32_test64; then \ + echo ' *** zlib 64-bit test OK ***'; \ + else \ + echo ' *** zlib 64-bit test FAILED ***'; false; \ +@@ -157,6 +157,12 @@ example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h + minigzip.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/minigzip.c + ++crc32_test.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h ++ $(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/crc32_test.c ++ ++crc32_test64.o: $(SRCDIR)test/crc32_test.c $(SRCDIR)zlib.h zconf.h ++ $(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/crc32_test.c ++ + example64.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h + $(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/example.c + +@@ -307,12 +313,21 @@ example$(EXE): example.o $(STATICLIB) + minigzip$(EXE): minigzip.o $(STATICLIB) + $(CC) $(CFLAGS) -o $@ minigzip.o $(TEST_LDFLAGS) + ++crc32_test$(EXE): crc32_test.o $(STATICLIB) ++ $(CC) $(CFLAGS) -o $@ crc32_test.o $(TEST_LDFLAGS) ++ ++crc32_testsh$(EXE): crc32_test.o $(SHAREDLIBV) ++ $(CC) $(CFLAGS) -o $@ crc32_test.o -L. $(SHAREDLIBV) ++ + examplesh$(EXE): example.o $(SHAREDLIBV) + $(CC) $(CFLAGS) -o $@ example.o -L. $(SHAREDLIBV) + + minigzipsh$(EXE): minigzip.o $(SHAREDLIBV) + $(CC) $(CFLAGS) -o $@ minigzip.o -L. $(SHAREDLIBV) + ++crc32_test64$(EXE): crc32_test64.o $(STATICLIB) ++ $(CC) $(CFLAGS) -o $@ crc32_test64.o $(TEST_LDFLAGS) ++ + example64$(EXE): example64.o $(STATICLIB) + $(CC) $(CFLAGS) -o $@ example64.o $(TEST_LDFLAGS) + +@@ -382,8 +397,8 @@ zconf: $(SRCDIR)zconf.h.in + mostlyclean: clean + clean: + rm -f *.o *.lo *~ \ +- example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \ +- example64$(EXE) minigzip64$(EXE) \ ++ crc32_test$(EXE) example$(EXE) minigzip$(EXE) crc32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) \ ++ crc32_test64$(EXE) example64$(EXE) minigzip64$(EXE) \ + infcover \ + libz.* foo.gz so_locations \ + _match.s maketree contrib/infback9/*.o +@@ -407,7 +422,7 @@ tags: + + adler32.o zutil.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h + gzclose.o gzlib.o gzread.o gzwrite.o: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h +-compress.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h ++compress.o crc32_test.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h + crc32.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h + deflate.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h + infback.o inflate.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h +@@ -417,7 +432,7 @@ trees.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)tr + + adler32.lo zutil.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h + gzclose.lo gzlib.lo gzread.lo gzwrite.lo: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h +-compress.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h ++compress.lo crc32_test.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h + crc32.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h + deflate.lo: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h + infback.lo inflate.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h +diff --git a/test/crc32_test.c b/test/crc32_test.c +new file mode 100644 +index 0000000..5d73128 +--- /dev/null ++++ b/test/crc32_test.c +@@ -0,0 +1,205 @@ ++/* crc32_tes.c -- unit test for crc32 in the zlib compression library ++ * Copyright (C) 1995-2006, 2010, 2011, 2016, 2019 Rogerio Alves ++ * For conditions of distribution and use, see copyright notice in zlib.h ++ */ ++ ++#include "zlib.h" ++#include ++ ++#ifdef STDC ++# include ++# include ++#endif ++ ++void test_crc32 OF((uLong crc, Byte* buf, z_size_t len, uLong chk, int line)); ++int main OF((void)); ++ ++typedef struct { ++ int line; ++ uLong crc; ++ Byte* buf; ++ int len; ++ uLong expect; ++} crc32_test; ++ ++void test_crc32(crc, buf, len, chk, line) ++ uLong crc; ++ Byte *buf; ++ z_size_t len; ++ uLong chk; ++ int line; ++{ ++ uLong res = crc32(crc, buf, len); ++ if (res != chk) { ++ fprintf(stderr, "FAIL [%d]: crc32 returned 0x%08X expected 0x%08X\n", ++ line, (unsigned int)res, (unsigned int)chk); ++ exit(1); ++ } ++} ++ ++static const crc32_test tests[] = { ++ {__LINE__, 0x0, 0x0, 0, 0x0}, ++ {__LINE__, 0xffffffff, 0x0, 0, 0x0}, ++ {__LINE__, 0x0, 0x0, 255, 0x0}, /* BZ 174799. */ ++ {__LINE__, 0x0, 0x0, 256, 0x0}, ++ {__LINE__, 0x0, 0x0, 257, 0x0}, ++ {__LINE__, 0x0, 0x0, 32767, 0x0}, ++ {__LINE__, 0x0, 0x0, 32768, 0x0}, ++ {__LINE__, 0x0, 0x0, 32769, 0x0}, ++ {__LINE__, 0x0, "", 0, 0x0}, ++ {__LINE__, 0xffffffff, "", 0, 0xffffffff}, ++ {__LINE__, 0x0, "abacus", 6, 0xc3d7115b}, ++ {__LINE__, 0x0, "backlog", 7, 0x269205}, ++ {__LINE__, 0x0, "campfire", 8, 0x22a515f8}, ++ {__LINE__, 0x0, "delta", 5, 0x9643fed9}, ++ {__LINE__, 0x0, "executable", 10, 0xd68eda01}, ++ {__LINE__, 0x0, "file", 4, 0x8c9f3610}, ++ {__LINE__, 0x0, "greatest", 8, 0xc1abd6cd}, ++ {__LINE__, 0x0, "hello", 5, 0x3610a686}, ++ {__LINE__, 0x0, "inverter", 8, 0xc9e962c9}, ++ {__LINE__, 0x0, "jigsaw", 6, 0xce4e3f69}, ++ {__LINE__, 0x0, "karate", 6, 0x890be0e2}, ++ {__LINE__, 0x0, "landscape", 9, 0xc4e0330b}, ++ {__LINE__, 0x0, "machine", 7, 0x1505df84}, ++ {__LINE__, 0x0, "nanometer", 9, 0xd4e19f39}, ++ {__LINE__, 0x0, "oblivion", 8, 0xdae9de77}, ++ {__LINE__, 0x0, "panama", 6, 0x66b8979c}, ++ {__LINE__, 0x0, "quest", 5, 0x4317f817}, ++ {__LINE__, 0x0, "resource", 8, 0xbc91f416}, ++ {__LINE__, 0x0, "secret", 6, 0x5ca2e8e5}, ++ {__LINE__, 0x0, "test", 4, 0xd87f7e0c}, ++ {__LINE__, 0x0, "ultimate", 8, 0x3fc79b0b}, ++ {__LINE__, 0x0, "vector", 6, 0x1b6e485b}, ++ {__LINE__, 0x0, "walrus", 6, 0xbe769b97}, ++ {__LINE__, 0x0, "xeno", 4, 0xe7a06444}, ++ {__LINE__, 0x0, "yelling", 7, 0xfe3944e5}, ++ {__LINE__, 0x0, "zlib", 4, 0x73887d3a}, ++ {__LINE__, 0x0, "4BJD7PocN1VqX0jXVpWB", 20, 0xd487a5a1}, ++ {__LINE__, 0x0, "F1rPWI7XvDs6nAIRx41l", 20, 0x61a0132e}, ++ {__LINE__, 0x0, "ldhKlsVkPFOveXgkGtC2", 20, 0xdf02f76}, ++ {__LINE__, 0x0, "5KKnGOOrs8BvJ35iKTOS", 20, 0x579b2b0a}, ++ {__LINE__, 0x0, "0l1tw7GOcem06Ddu7yn4", 20, 0xf7d16e2d}, ++ {__LINE__, 0x0, "MCr47CjPIn9R1IvE1Tm5", 20, 0x731788f5}, ++ {__LINE__, 0x0, "UcixbzPKTIv0SvILHVdO", 20, 0x7112bb11}, ++ {__LINE__, 0x0, "dGnAyAhRQDsWw0ESou24", 20, 0xf32a0dac}, ++ {__LINE__, 0x0, "di0nvmY9UYMYDh0r45XT", 20, 0x625437bb}, ++ {__LINE__, 0x0, "2XKDwHfAhFsV0RhbqtvH", 20, 0x896930f9}, ++ {__LINE__, 0x0, "ZhrANFIiIvRnqClIVyeD", 20, 0x8579a37}, ++ {__LINE__, 0x0, "v7Q9ehzioTOVeDIZioT1", 20, 0x632aa8e0}, ++ {__LINE__, 0x0, "Yod5hEeKcYqyhfXbhxj2", 20, 0xc829af29}, ++ {__LINE__, 0x0, "GehSWY2ay4uUKhehXYb0", 20, 0x1b08b7e8}, ++ {__LINE__, 0x0, "kwytJmq6UqpflV8Y8GoE", 20, 0x4e33b192}, ++ {__LINE__, 0x0, "70684206568419061514", 20, 0x59a179f0}, ++ {__LINE__, 0x0, "42015093765128581010", 20, 0xcd1013d7}, ++ {__LINE__, 0x0, "88214814356148806939", 20, 0xab927546}, ++ {__LINE__, 0x0, "43472694284527343838", 20, 0x11f3b20c}, ++ {__LINE__, 0x0, "49769333513942933689", 20, 0xd562d4ca}, ++ {__LINE__, 0x0, "54979784887993251199", 20, 0x233395f7}, ++ {__LINE__, 0x0, "58360544869206793220", 20, 0x2d167fd5}, ++ {__LINE__, 0x0, "27347953487840714234", 20, 0x8b5108ba}, ++ {__LINE__, 0x0, "07650690295365319082", 20, 0xc46b3cd8}, ++ {__LINE__, 0x0, "42655507906821911703", 20, 0xc10b2662}, ++ {__LINE__, 0x0, "29977409200786225655", 20, 0xc9a0f9d2}, ++ {__LINE__, 0x0, "85181542907229116674", 20, 0x9341357b}, ++ {__LINE__, 0x0, "87963594337989416799", 20, 0xf0424937}, ++ {__LINE__, 0x0, "21395988329504168551", 20, 0xd7c4c31f}, ++ {__LINE__, 0x0, "51991013580943379423", 20, 0xf11edcc4}, ++ {__LINE__, 0x0, "*]+@!);({_$;}[_},?{?;(_?,=-][@", 30, 0x40795df4}, ++ {__LINE__, 0x0, "_@:_).&(#.[:[{[:)$++-($_;@[)}+", 30, 0xdd61a631}, ++ {__LINE__, 0x0, "&[!,[$_==}+.]@!;*(+},[;:)$;)-@", 30, 0xca907a99}, ++ {__LINE__, 0x0, "]{.[.+?+[[=;[?}_#&;[=)__$$:+=_", 30, 0xf652deac}, ++ {__LINE__, 0x0, "-%.)=/[@].:.(:,()$;=%@-$?]{%+%", 30, 0xaf39a5a9}, ++ {__LINE__, 0x0, "+]#$(@&.=:,*];/.!]%/{:){:@(;)$", 30, 0x6bebb4cf}, ++ {__LINE__, 0x0, ")-._.:?[&:.=+}(*$/=!.${;(=$@!}", 30, 0x76430bac}, ++ {__LINE__, 0x0, ":(_*&%/[[}+,?#$&*+#[([*-/#;%(]", 30, 0x6c80c388}, ++ {__LINE__, 0x0, "{[#-;:$/{)(+[}#]/{&!%(@)%:@-$:", 30, 0xd54d977d}, ++ {__LINE__, 0x0, "_{$*,}(&,@.)):=!/%(&(,,-?$}}}!", 30, 0xe3966ad5}, ++ {__LINE__, 0x0, "e$98KNzqaV)Y:2X?]77].{gKRD4G5{mHZk,Z)SpU%L3FSgv!Wb8MLAFdi{+fp)c,@8m6v)yXg@]HBDFk?.4&}g5_udE*JHCiH=aL", 100, 0xe7c71db9}, ++ {__LINE__, 0x0, "r*Fd}ef+5RJQ;+W=4jTR9)R*p!B;]Ed7tkrLi;88U7g@3v!5pk2X6D)vt,.@N8c]@yyEcKi[vwUu@.Ppm@C6%Mv*3Nw}Y,58_aH)", 100, 0xeaa52777}, ++ {__LINE__, 0x0, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 100, 0xcd472048}, ++ {__LINE__, 0x7a30360d, "abacus", 6, 0xf8655a84}, ++ {__LINE__, 0x6fd767ee, "backlog", 7, 0x1ed834b1}, ++ {__LINE__, 0xefeb7589, "campfire", 8, 0x686cfca}, ++ {__LINE__, 0x61cf7e6b, "delta", 5, 0x1554e4b1}, ++ {__LINE__, 0xdc712e2, "executable", 10, 0x761b4254}, ++ {__LINE__, 0xad23c7fd, "file", 4, 0x7abdd09b}, ++ {__LINE__, 0x85cb2317, "greatest", 8, 0x4ba91c6b}, ++ {__LINE__, 0x9eed31b0, "inverter", 8, 0xd5e78ba5}, ++ {__LINE__, 0xb94f34ca, "jigsaw", 6, 0x23649109}, ++ {__LINE__, 0xab058a2, "karate", 6, 0xc5591f41}, ++ {__LINE__, 0x5bff2b7a, "landscape", 9, 0xf10eb644}, ++ {__LINE__, 0x605c9a5f, "machine", 7, 0xbaa0a636}, ++ {__LINE__, 0x51bdeea5, "nanometer", 9, 0x6af89afb}, ++ {__LINE__, 0x85c21c79, "oblivion", 8, 0xecae222b}, ++ {__LINE__, 0x97216f56, "panama", 6, 0x47dffac4}, ++ {__LINE__, 0x18444af2, "quest", 5, 0x70c2fe36}, ++ {__LINE__, 0xbe6ce359, "resource", 8, 0x1471d925}, ++ {__LINE__, 0x843071f1, "secret", 6, 0x50c9a0db}, ++ {__LINE__, 0xf2480c60, "ultimate", 8, 0xf973daf8}, ++ {__LINE__, 0x2d2feb3d, "vector", 6, 0x344ac03d}, ++ {__LINE__, 0x7490310a, "walrus", 6, 0x6d1408ef}, ++ {__LINE__, 0x97d247d4, "xeno", 4, 0xe62670b5}, ++ {__LINE__, 0x93cf7599, "yelling", 7, 0x1b36da38}, ++ {__LINE__, 0x73c84278, "zlib", 4, 0x6432d127}, ++ {__LINE__, 0x228a87d1, "4BJD7PocN1VqX0jXVpWB", 20, 0x997107d0}, ++ {__LINE__, 0xa7a048d0, "F1rPWI7XvDs6nAIRx41l", 20, 0xdc567274}, ++ {__LINE__, 0x1f0ded40, "ldhKlsVkPFOveXgkGtC2", 20, 0xdcc63870}, ++ {__LINE__, 0xa804a62f, "5KKnGOOrs8BvJ35iKTOS", 20, 0x6926cffd}, ++ {__LINE__, 0x508fae6a, "0l1tw7GOcem06Ddu7yn4", 20, 0xb52b38bc}, ++ {__LINE__, 0xe5adaf4f, "MCr47CjPIn9R1IvE1Tm5", 20, 0xf83b8178}, ++ {__LINE__, 0x67136a40, "UcixbzPKTIv0SvILHVdO", 20, 0xc5213070}, ++ {__LINE__, 0xb00c4a10, "dGnAyAhRQDsWw0ESou24", 20, 0xbc7648b0}, ++ {__LINE__, 0x2e0c84b5, "di0nvmY9UYMYDh0r45XT", 20, 0xd8123a72}, ++ {__LINE__, 0x81238d44, "2XKDwHfAhFsV0RhbqtvH", 20, 0xd5ac5620}, ++ {__LINE__, 0xf853aa92, "ZhrANFIiIvRnqClIVyeD", 20, 0xceae099d}, ++ {__LINE__, 0x5a692325, "v7Q9ehzioTOVeDIZioT1", 20, 0xb07d2b24}, ++ {__LINE__, 0x3275b9f, "Yod5hEeKcYqyhfXbhxj2", 20, 0x24ce91df}, ++ {__LINE__, 0x38371feb, "GehSWY2ay4uUKhehXYb0", 20, 0x707b3b30}, ++ {__LINE__, 0xafc8bf62, "kwytJmq6UqpflV8Y8GoE", 20, 0x16abc6a9}, ++ {__LINE__, 0x9b07db73, "70684206568419061514", 20, 0xae1fb7b7}, ++ {__LINE__, 0xe75b214, "42015093765128581010", 20, 0xd4eecd2d}, ++ {__LINE__, 0x72d0fe6f, "88214814356148806939", 20, 0x4660ec7}, ++ {__LINE__, 0xf857a4b1, "43472694284527343838", 20, 0xfd8afdf7}, ++ {__LINE__, 0x54b8e14, "49769333513942933689", 20, 0xc6d1b5f2}, ++ {__LINE__, 0xd6aa5616, "54979784887993251199", 20, 0x32476461}, ++ {__LINE__, 0x11e63098, "58360544869206793220", 20, 0xd917cf1a}, ++ {__LINE__, 0xbe92385, "27347953487840714234", 20, 0x4ad14a12}, ++ {__LINE__, 0x49511de0, "07650690295365319082", 20, 0xe37b5c6c}, ++ {__LINE__, 0x3db13bc1, "42655507906821911703", 20, 0x7cc497f1}, ++ {__LINE__, 0xbb899bea, "29977409200786225655", 20, 0x99781bb2}, ++ {__LINE__, 0xf6cd9436, "85181542907229116674", 20, 0x132256a1}, ++ {__LINE__, 0x9109e6c3, "87963594337989416799", 20, 0xbfdb2c83}, ++ {__LINE__, 0x75770fc, "21395988329504168551", 20, 0x8d9d1e81}, ++ {__LINE__, 0x69b1d19b, "51991013580943379423", 20, 0x7b6d4404}, ++ {__LINE__, 0xc6132975, "*]+@!);({_$;}[_},?{?;(_?,=-][@", 30, 0x8619f010}, ++ {__LINE__, 0xd58cb00c, "_@:_).&(#.[:[{[:)$++-($_;@[)}+", 30, 0x15746ac3}, ++ {__LINE__, 0xb63b8caa, "&[!,[$_==}+.]@!;*(+},[;:)$;)-@", 30, 0xaccf812f}, ++ {__LINE__, 0x8a45a2b8, "]{.[.+?+[[=;[?}_#&;[=)__$$:+=_", 30, 0x78af45de}, ++ {__LINE__, 0xcbe95b78, "-%.)=/[@].:.(:,()$;=%@-$?]{%+%", 30, 0x25b06b59}, ++ {__LINE__, 0x4ef8a54b, "+]#$(@&.=:,*];/.!]%/{:){:@(;)$", 30, 0x4ba0d08f}, ++ {__LINE__, 0x76ad267a, ")-._.:?[&:.=+}(*$/=!.${;(=$@!}", 30, 0xe26b6aac}, ++ {__LINE__, 0x569e613c, ":(_*&%/[[}+,?#$&*+#[([*-/#;%(]", 30, 0x7e2b0a66}, ++ {__LINE__, 0x36aa61da, "{[#-;:$/{)(+[}#]/{&!%(@)%:@-$:", 30, 0xb3430dc7}, ++ {__LINE__, 0xf67222df, "_{$*,}(&,@.)):=!/%(&(,,-?$}}}!", 30, 0x626c17a}, ++ {__LINE__, 0x74b34fd3, "e$98KNzqaV)Y:2X?]77].{gKRD4G5{mHZk,Z)SpU%L3FSgv!Wb8MLAFdi{+fp)c,@8m6v)yXg@]HBDFk?.4&}g5_udE*JHCiH=aL", 100, 0xccf98060}, ++ {__LINE__, 0x351fd770, "r*Fd}ef+5RJQ;+W=4jTR9)R*p!B;]Ed7tkrLi;88U7g@3v!5pk2X6D)vt,.@N8c]@yyEcKi[vwUu@.Ppm@C6%Mv*3Nw}Y,58_aH)", 100, 0xd8b95312}, ++ {__LINE__, 0xc45aef77, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 100, 0xbb1c9912}, ++ {__LINE__, 0xc45aef77, "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" ++ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" ++ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" ++ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" ++ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&" ++ "h{bcmdC+a;t+Cf{6Y_dFq-{X4Yu&7uNfVDh?q&_u.UWJU],-GiH7ADzb7-V.Q%4=+v!$L9W+T=bP]$_:]Vyg}A.ygD.r;h-D]m%&", 600, 0x888AFA5B} ++}; ++ ++static const int test_size = sizeof(tests) / sizeof(tests[0]); ++ ++int main(void) ++{ ++ int i; ++ for (i = 0; i < test_size; i++) { ++ test_crc32(tests[i].crc, tests[i].buf, tests[i].len, ++ tests[i].expect, tests[i].line); ++ } ++ return 0; ++} +\ No newline at end of file +-- +2.19.1 + diff --git a/zlib-1.2.11-inflateSyncPoint-return-value-fix.patch b/zlib-1.2.11-inflateSyncPoint-return-value-fix.patch new file mode 100644 index 0000000000000000000000000000000000000000..f9b3756383f90be91d7d17df01908011c1ac627e --- /dev/null +++ b/zlib-1.2.11-inflateSyncPoint-return-value-fix.patch @@ -0,0 +1,45 @@ +Subject: [PATCH] Fixed inflateSyncPoint() bad return value on z15 + +--- + contrib/s390/dfltcc.h | 4 ++++ + inflate.c | 2 ++ + 2 files changed, 6 insertions(+) + +diff --git a/contrib/s390/dfltcc.h b/contrib/s390/dfltcc.h +index 574e84c..7960626 100644 +--- a/contrib/s390/dfltcc.h ++++ b/contrib/s390/dfltcc.h +@@ -51,5 +51,9 @@ int ZLIB_INTERNAL dfltcc_inflate_disable OF((z_streamp strm)); + do { \ + if (dfltcc_was_inflate_used((strm))) return -(1L << 16); \ + } while (0) ++#define INFLATE_SYNC_POINT_HOOK(strm) \ ++ do { \ ++ if (dfltcc_was_inflate_used((strm))) return Z_STREAM_ERROR; \ ++ } while (0) + + #endif +\ No newline at end of file +diff --git a/inflate.c b/inflate.c +index f77c2ae..596034c 100644 +--- a/inflate.c ++++ b/inflate.c +@@ -100,6 +100,7 @@ + #define INFLATE_NEED_CHECKSUM(strm) 1 + #define INFLATE_NEED_UPDATEWINDOW(strm) 1 + #define INFLATE_MARK_HOOK(strm) do {} while (0) ++#define INFLATE_SYNC_POINT_HOOK(strm) do {} while (0) + #endif + + #ifdef MAKEFIXED +@@ -1483,6 +1484,7 @@ z_streamp strm; + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; ++ INFLATE_SYNC_POINT_HOOK(strm); + state = (struct inflate_state FAR *)strm->state; + return state->mode == STORED && state->bits == 0; + } +-- +2.26.0 + diff --git a/zlib-1.2.11-optimized-CRC32-framework.patch b/zlib-1.2.11-optimized-CRC32-framework.patch new file mode 100644 index 0000000000000000000000000000000000000000..18255fa19543548bfde97a99326675ea825b5512 --- /dev/null +++ b/zlib-1.2.11-optimized-CRC32-framework.patch @@ -0,0 +1,2258 @@ +From d1155b9ab9a2ef643ec82285d1fb767dcfd00d16 Mon Sep 17 00:00:00 2001 +From: Ondrej Dubaj +Date: Thu, 1 Aug 2019 12:17:06 +0200 +Subject: [PATCH] Optimized CRC32 for POWER 8+ architectures. + +--- + Makefile.in | 8 + + configure | 77 ++ + contrib/power8-crc/clang_workaround.h | 82 ++ + contrib/power8-crc/crc32_constants.h | 1206 +++++++++++++++++++++++++ + contrib/power8-crc/vec_crc32.c | 674 ++++++++++++++ + crc32.c | 100 +- + 6 files changed, 2135 insertions(+), 12 deletions(-) + create mode 100644 contrib/power8-crc/clang_workaround.h + create mode 100644 contrib/power8-crc/crc32_constants.h + create mode 100644 contrib/power8-crc/vec_crc32.c + +diff --git a/Makefile.in b/Makefile.in +index b7bdbf2..55f6489 100644 +--- a/Makefile.in ++++ b/Makefile.in +@@ -167,6 +167,9 @@ minigzip64.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h + adler32.o: $(SRCDIR)adler32.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)adler32.c + ++crc32_power8.o: $(SRCDIR)contrib/power8-crc/vec_crc32.c ++ $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)contrib/power8-crc/vec_crc32.c ++ + crc32.o: $(SRCDIR)crc32.c + $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c + +@@ -215,6 +218,11 @@ adler32.lo: $(SRCDIR)adler32.c + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/adler32.o $(SRCDIR)adler32.c + -@mv objs/adler32.o $@ + ++crc32_power8.lo: $(SRCDIR)contrib/power8-crc/vec_crc32.c ++ -@mkdir objs 2>/dev/null || test -d objs ++ $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32_power8.o $(SRCDIR)contrib/power8-crc/vec_crc32.c ++ -@mv objs/crc32_power8.o $@ ++ + crc32.lo: $(SRCDIR)crc32.c + -@mkdir objs 2>/dev/null || test -d objs + $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c +diff --git a/configure b/configure +index cd9eeef..e93ff99 100755 +--- a/configure ++++ b/configure +@@ -839,6 +839,83 @@ else + echo "Checking for sys/sdt.h ... No." | tee -a configure.log + fi + ++# test to see if Power8+ implementation is compile time possible ++echo >> configure.log ++cat > $test.c < ++#include ++int main() ++{ ++ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); ++} ++#endif ++#else ++#error No Power 8 or newer architecture, may need -mcpu=power8 ++#endif ++EOF ++ ++if tryboth $CC -c $CFLAGS $test.c; then ++ OBJC="$OBJC crc32_power8.o" ++ PIC_OBJC="$PIC_OBJC crc32_power8.lo" ++ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... Yes." | tee -a configure.log ++else ++ echo "Checking for runtime cpu detection and Power 8 (or newer) Architecture support... No." | tee -a configure.log ++fi ++ ++# test to see if we can use a gnu indirection function to detect and load optimized code at runtime ++echo >> configure.log ++cat > $test.c <> configure.log ++ cat > $test.c <> configure.log + echo ALL = $ALL >> configure.log +diff --git a/contrib/power8-crc/clang_workaround.h b/contrib/power8-crc/clang_workaround.h +new file mode 100644 +index 0000000..09c411b +--- /dev/null ++++ b/contrib/power8-crc/clang_workaround.h +@@ -0,0 +1,82 @@ ++#ifndef CLANG_WORKAROUNDS_H ++#define CLANG_WORKAROUNDS_H ++ ++/* ++ * These stubs fix clang incompatibilities with GCC builtins. ++ */ ++ ++#ifndef __builtin_crypto_vpmsumw ++#define __builtin_crypto_vpmsumw __builtin_crypto_vpmsumb ++#endif ++#ifndef __builtin_crypto_vpmsumd ++#define __builtin_crypto_vpmsumd __builtin_crypto_vpmsumb ++#endif ++ ++static inline ++__vector unsigned long long __attribute__((overloadable)) ++vec_ld(int __a, const __vector unsigned long long* __b) ++{ ++ return (__vector unsigned long long)__builtin_altivec_lvx(__a, __b); ++} ++ ++/* ++ * GCC __builtin_pack_vector_int128 returns a vector __int128_t but Clang ++ * does not recognize this type. On GCC this builtin is translated to a ++ * xxpermdi instruction that only moves the registers __a, __b instead generates ++ * a load. ++ * ++ * Clang has vec_xxpermdi intrinsics. It was implemented in 4.0.0. ++ */ ++static inline ++__vector unsigned long long __builtin_pack_vector (unsigned long __a, ++ unsigned long __b) ++{ ++ #if defined(__BIG_ENDIAN__) ++ __vector unsigned long long __v = {__a, __b}; ++ #else ++ __vector unsigned long long __v = {__b, __a}; ++ #endif ++ return __v; ++} ++ ++#ifndef vec_xxpermdi ++ ++static inline ++unsigned long __builtin_unpack_vector (__vector unsigned long long __v, ++ int __o) ++{ ++ return __v[__o]; ++} ++ ++#if defined(__BIG_ENDIAN__) ++#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 0) ++#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 1) ++#else ++#define __builtin_unpack_vector_0(a) __builtin_unpack_vector ((a), 1) ++#define __builtin_unpack_vector_1(a) __builtin_unpack_vector ((a), 0) ++#endif ++ ++#else ++ ++static inline ++unsigned long __builtin_unpack_vector_0 (__vector unsigned long long __v) ++{ ++ #if defined(__BIG_ENDIAN__) ++ return vec_xxpermdi(__v, __v, 0x0)[1]; ++ #else ++ return vec_xxpermdi(__v, __v, 0x0)[0]; ++ #endif ++} ++ ++static inline ++unsigned long __builtin_unpack_vector_1 (__vector unsigned long long __v) ++{ ++ #if defined(__BIG_ENDIAN__) ++ return vec_xxpermdi(__v, __v, 0x3)[1]; ++ #else ++ return vec_xxpermdi(__v, __v, 0x3)[0]; ++ #endif ++} ++#endif /* vec_xxpermdi */ ++ ++#endif +\ No newline at end of file +diff --git a/contrib/power8-crc/crc32_constants.h b/contrib/power8-crc/crc32_constants.h +new file mode 100644 +index 0000000..58088dc +--- /dev/null ++++ b/contrib/power8-crc/crc32_constants.h +@@ -0,0 +1,1206 @@ ++/* ++* ++* THIS FILE IS GENERATED WITH ++./crc32_constants -c -r -x 0x04C11DB7 ++ ++* This is from https://github.com/antonblanchard/crc32-vpmsum/ ++* DO NOT MODIFY IT MANUALLY! ++* ++*/ ++ ++#define CRC 0x4c11db7 ++#define CRC_XOR ++#define REFLECT ++#define MAX_SIZE 32768 ++ ++#ifndef __ASSEMBLER__ ++#ifdef CRC_TABLE ++static const unsigned int crc_table[] = { ++ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, ++ 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, ++ 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, ++ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, ++ 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, ++ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, ++ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, ++ 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, ++ 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, ++ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, ++ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, ++ 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, ++ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, ++ 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, ++ 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, ++ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, ++ 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, ++ 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, ++ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, ++ 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, ++ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, ++ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, ++ 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, ++ 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, ++ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, ++ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, ++ 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, ++ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, ++ 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, ++ 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, ++ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, ++ 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, ++ 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, ++ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, ++ 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, ++ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, ++ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, ++ 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, ++ 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, ++ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, ++ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, ++ 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, ++ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, ++ 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, ++ 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, ++ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, ++ 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, ++ 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, ++ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, ++ 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, ++ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, ++ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, ++ 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, ++ 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, ++ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, ++ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, ++ 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, ++ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, ++ 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, ++ 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, ++ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, ++ 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, ++ 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, ++ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d,}; ++ ++#endif /* CRC_TABLE */ ++#ifdef POWER8_INTRINSICS ++ ++/* Constants */ ++ ++/* Reduce 262144 kbits to 1024 bits */ ++static const __vector unsigned long long vcrc_const[255] ++ __attribute__((aligned (16))) = { ++#ifdef __LITTLE_ENDIAN__ ++ /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ ++ { 0x0000000099ea94a8, 0x00000001651797d2 }, ++ /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ ++ { 0x00000000945a8420, 0x0000000021e0d56c }, ++ /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ ++ { 0x0000000030762706, 0x000000000f95ecaa }, ++ /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ ++ { 0x00000001a52fc582, 0x00000001ebd224ac }, ++ /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ ++ { 0x00000001a4a7167a, 0x000000000ccb97ca }, ++ /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ ++ { 0x000000000c18249a, 0x00000001006ec8a8 }, ++ /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ ++ { 0x00000000a924ae7c, 0x000000014f58f196 }, ++ /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ ++ { 0x00000001e12ccc12, 0x00000001a7192ca6 }, ++ /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ ++ { 0x00000000a0b9d4ac, 0x000000019a64bab2 }, ++ /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ ++ { 0x0000000095e8ddfe, 0x0000000014f4ed2e }, ++ /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ ++ { 0x00000000233fddc4, 0x000000011092b6a2 }, ++ /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ ++ { 0x00000001b4529b62, 0x00000000c8a1629c }, ++ /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ ++ { 0x00000001a7fa0e64, 0x000000017bf32e8e }, ++ /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ ++ { 0x00000001b5334592, 0x00000001f8cc6582 }, ++ /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ ++ { 0x000000011f8ee1b4, 0x000000008631ddf0 }, ++ /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ ++ { 0x000000006252e632, 0x000000007e5a76d0 }, ++ /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ ++ { 0x00000000ab973e84, 0x000000002b09b31c }, ++ /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ ++ { 0x000000007734f5ec, 0x00000001b2df1f84 }, ++ /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ ++ { 0x000000007c547798, 0x00000001d6f56afc }, ++ /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ ++ { 0x000000007ec40210, 0x00000001b9b5e70c }, ++ /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ ++ { 0x00000001ab1695a8, 0x0000000034b626d2 }, ++ /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ ++ { 0x0000000090494bba, 0x000000014c53479a }, ++ /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ ++ { 0x00000001123fb816, 0x00000001a6d179a4 }, ++ /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ ++ { 0x00000001e188c74c, 0x000000015abd16b4 }, ++ /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ ++ { 0x00000001c2d3451c, 0x00000000018f9852 }, ++ /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ ++ { 0x00000000f55cf1ca, 0x000000001fb3084a }, ++ /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ ++ { 0x00000001a0531540, 0x00000000c53dfb04 }, ++ /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ ++ { 0x0000000132cd7ebc, 0x00000000e10c9ad6 }, ++ /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ ++ { 0x0000000073ab7f36, 0x0000000025aa994a }, ++ /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ ++ { 0x0000000041aed1c2, 0x00000000fa3a74c4 }, ++ /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ ++ { 0x0000000136c53800, 0x0000000033eb3f40 }, ++ /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ ++ { 0x0000000126835a30, 0x000000017193f296 }, ++ /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ ++ { 0x000000006241b502, 0x0000000043f6c86a }, ++ /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ ++ { 0x00000000d5196ad4, 0x000000016b513ec6 }, ++ /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ ++ { 0x000000009cfa769a, 0x00000000c8f25b4e }, ++ /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ ++ { 0x00000000920e5df4, 0x00000001a45048ec }, ++ /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ ++ { 0x0000000169dc310e, 0x000000000c441004 }, ++ /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ ++ { 0x0000000009fc331c, 0x000000000e17cad6 }, ++ /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ ++ { 0x000000010d94a81e, 0x00000001253ae964 }, ++ /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ ++ { 0x0000000027a20ab2, 0x00000001d7c88ebc }, ++ /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ ++ { 0x0000000114f87504, 0x00000001e7ca913a }, ++ /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ ++ { 0x000000004b076d96, 0x0000000033ed078a }, ++ /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ ++ { 0x00000000da4d1e74, 0x00000000e1839c78 }, ++ /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ ++ { 0x000000001b81f672, 0x00000001322b267e }, ++ /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ ++ { 0x000000009367c988, 0x00000000638231b6 }, ++ /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ ++ { 0x00000001717214ca, 0x00000001ee7f16f4 }, ++ /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ ++ { 0x000000009f47d820, 0x0000000117d9924a }, ++ /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ ++ { 0x000000010d9a47d2, 0x00000000e1a9e0c4 }, ++ /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ ++ { 0x00000000a696c58c, 0x00000001403731dc }, ++ /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ ++ { 0x000000002aa28ec6, 0x00000001a5ea9682 }, ++ /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ ++ { 0x00000001fe18fd9a, 0x0000000101c5c578 }, ++ /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ ++ { 0x000000019d4fc1ae, 0x00000000dddf6494 }, ++ /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ ++ { 0x00000001ba0e3dea, 0x00000000f1c3db28 }, ++ /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ ++ { 0x0000000074b59a5e, 0x000000013112fb9c }, ++ /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ ++ { 0x00000000f2b5ea98, 0x00000000b680b906 }, ++ /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ ++ { 0x0000000187132676, 0x000000001a282932 }, ++ /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ ++ { 0x000000010a8c6ad4, 0x0000000089406e7e }, ++ /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ ++ { 0x00000001e21dfe70, 0x00000001def6be8c }, ++ /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ ++ { 0x00000001da0050e4, 0x0000000075258728 }, ++ /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ ++ { 0x00000000772172ae, 0x000000019536090a }, ++ /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ ++ { 0x00000000e47724aa, 0x00000000f2455bfc }, ++ /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ ++ { 0x000000003cd63ac4, 0x000000018c40baf4 }, ++ /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ ++ { 0x00000001bf47d352, 0x000000004cd390d4 }, ++ /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ ++ { 0x000000018dc1d708, 0x00000001e4ece95a }, ++ /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ ++ { 0x000000002d4620a4, 0x000000001a3ee918 }, ++ /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ ++ { 0x0000000058fd1740, 0x000000007c652fb8 }, ++ /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ ++ { 0x00000000dadd9bfc, 0x000000011c67842c }, ++ /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ ++ { 0x00000001ea2140be, 0x00000000254f759c }, ++ /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ ++ { 0x000000009de128ba, 0x000000007ece94ca }, ++ /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ ++ { 0x000000013ac3aa8e, 0x0000000038f258c2 }, ++ /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ ++ { 0x0000000099980562, 0x00000001cdf17b00 }, ++ /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ ++ { 0x00000001c1579c86, 0x000000011f882c16 }, ++ /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ ++ { 0x0000000068dbbf94, 0x0000000100093fc8 }, ++ /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ ++ { 0x000000004509fb04, 0x00000001cd684f16 }, ++ /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ ++ { 0x00000001202f6398, 0x000000004bc6a70a }, ++ /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ ++ { 0x000000013aea243e, 0x000000004fc7e8e4 }, ++ /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ ++ { 0x00000001b4052ae6, 0x0000000130103f1c }, ++ /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ ++ { 0x00000001cd2a0ae8, 0x0000000111b0024c }, ++ /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ ++ { 0x00000001fe4aa8b4, 0x000000010b3079da }, ++ /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ ++ { 0x00000001d1559a42, 0x000000010192bcc2 }, ++ /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ ++ { 0x00000001f3e05ecc, 0x0000000074838d50 }, ++ /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ ++ { 0x0000000104ddd2cc, 0x000000001b20f520 }, ++ /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ ++ { 0x000000015393153c, 0x0000000050c3590a }, ++ /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ ++ { 0x0000000057e942c6, 0x00000000b41cac8e }, ++ /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ ++ { 0x000000012c633850, 0x000000000c72cc78 }, ++ /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ ++ { 0x00000000ebcaae4c, 0x0000000030cdb032 }, ++ /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ ++ { 0x000000013ee532a6, 0x000000013e09fc32 }, ++ /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ ++ { 0x00000001bf0cbc7e, 0x000000001ed624d2 }, ++ /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ ++ { 0x00000000d50b7a5a, 0x00000000781aee1a }, ++ /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ ++ { 0x0000000002fca6e8, 0x00000001c4d8348c }, ++ /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ ++ { 0x000000007af40044, 0x0000000057a40336 }, ++ /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ ++ { 0x0000000016178744, 0x0000000085544940 }, ++ /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ ++ { 0x000000014c177458, 0x000000019cd21e80 }, ++ /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ ++ { 0x000000011b6ddf04, 0x000000013eb95bc0 }, ++ /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ ++ { 0x00000001f3e29ccc, 0x00000001dfc9fdfc }, ++ /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ ++ { 0x0000000135ae7562, 0x00000000cd028bc2 }, ++ /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ ++ { 0x0000000190ef812c, 0x0000000090db8c44 }, ++ /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ ++ { 0x0000000067a2c786, 0x000000010010a4ce }, ++ /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ ++ { 0x0000000048b9496c, 0x00000001c8f4c72c }, ++ /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ ++ { 0x000000015a422de6, 0x000000001c26170c }, ++ /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ ++ { 0x00000001ef0e3640, 0x00000000e3fccf68 }, ++ /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ ++ { 0x00000001006d2d26, 0x00000000d513ed24 }, ++ /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ ++ { 0x00000001170d56d6, 0x00000000141beada }, ++ /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ ++ { 0x00000000a5fb613c, 0x000000011071aea0 }, ++ /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ ++ { 0x0000000040bbf7fc, 0x000000012e19080a }, ++ /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ ++ { 0x000000016ac3a5b2, 0x0000000100ecf826 }, ++ /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ ++ { 0x00000000abf16230, 0x0000000069b09412 }, ++ /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ ++ { 0x00000001ebe23fac, 0x0000000122297bac }, ++ /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ ++ { 0x000000008b6a0894, 0x00000000e9e4b068 }, ++ /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ ++ { 0x00000001288ea478, 0x000000004b38651a }, ++ /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ ++ { 0x000000016619c442, 0x00000001468360e2 }, ++ /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ ++ { 0x0000000086230038, 0x00000000121c2408 }, ++ /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ ++ { 0x000000017746a756, 0x00000000da7e7d08 }, ++ /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ ++ { 0x0000000191b8f8f8, 0x00000001058d7652 }, ++ /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ ++ { 0x000000008e167708, 0x000000014a098a90 }, ++ /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ ++ { 0x0000000148b22d54, 0x0000000020dbe72e }, ++ /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ ++ { 0x0000000044ba2c3c, 0x000000011e7323e8 }, ++ /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ ++ { 0x00000000b54d2b52, 0x00000000d5d4bf94 }, ++ /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ ++ { 0x0000000005a4fd8a, 0x0000000199d8746c }, ++ /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ ++ { 0x0000000139f9fc46, 0x00000000ce9ca8a0 }, ++ /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ ++ { 0x000000015a1fa824, 0x00000000136edece }, ++ /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ ++ { 0x000000000a61ae4c, 0x000000019b92a068 }, ++ /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ ++ { 0x0000000145e9113e, 0x0000000071d62206 }, ++ /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ ++ { 0x000000006a348448, 0x00000000dfc50158 }, ++ /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ ++ { 0x000000004d80a08c, 0x00000001517626bc }, ++ /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ ++ { 0x000000014b6837a0, 0x0000000148d1e4fa }, ++ /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ ++ { 0x000000016896a7fc, 0x0000000094d8266e }, ++ /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ ++ { 0x000000014f187140, 0x00000000606c5e34 }, ++ /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ ++ { 0x000000019581b9da, 0x000000019766beaa }, ++ /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ ++ { 0x00000001091bc984, 0x00000001d80c506c }, ++ /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ ++ { 0x000000001067223c, 0x000000001e73837c }, ++ /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ ++ { 0x00000001ab16ea02, 0x0000000064d587de }, ++ /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ ++ { 0x000000013c4598a8, 0x00000000f4a507b0 }, ++ /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ ++ { 0x00000000b3735430, 0x0000000040e342fc }, ++ /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ ++ { 0x00000001bb3fc0c0, 0x00000001d5ad9c3a }, ++ /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ ++ { 0x00000001570ae19c, 0x0000000094a691a4 }, ++ /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ ++ { 0x00000001ea910712, 0x00000001271ecdfa }, ++ /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ ++ { 0x0000000167127128, 0x000000009e54475a }, ++ /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ ++ { 0x0000000019e790a2, 0x00000000c9c099ee }, ++ /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ ++ { 0x000000003788f710, 0x000000009a2f736c }, ++ /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ ++ { 0x00000001682a160e, 0x00000000bb9f4996 }, ++ /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ ++ { 0x000000007f0ebd2e, 0x00000001db688050 }, ++ /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ ++ { 0x000000002b032080, 0x00000000e9b10af4 }, ++ /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ ++ { 0x00000000cfd1664a, 0x000000012d4545e4 }, ++ /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ ++ { 0x00000000aa1181c2, 0x000000000361139c }, ++ /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ ++ { 0x00000000ddd08002, 0x00000001a5a1a3a8 }, ++ /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ ++ { 0x00000000e8dd0446, 0x000000006844e0b0 }, ++ /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ ++ { 0x00000001bbd94a00, 0x00000000c3762f28 }, ++ /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ ++ { 0x00000000ab6cd180, 0x00000001d26287a2 }, ++ /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ ++ { 0x0000000031803ce2, 0x00000001f6f0bba8 }, ++ /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ ++ { 0x0000000024f40b0c, 0x000000002ffabd62 }, ++ /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ ++ { 0x00000001ba1d9834, 0x00000000fb4516b8 }, ++ /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ ++ { 0x0000000104de61aa, 0x000000018cfa961c }, ++ /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ ++ { 0x0000000113e40d46, 0x000000019e588d52 }, ++ /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ ++ { 0x00000001415598a0, 0x00000001180f0bbc }, ++ /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ ++ { 0x00000000bf6c8c90, 0x00000000e1d9177a }, ++ /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ ++ { 0x00000001788b0504, 0x0000000105abc27c }, ++ /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ ++ { 0x0000000038385d02, 0x00000000972e4a58 }, ++ /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ ++ { 0x00000001b6c83844, 0x0000000183499a5e }, ++ /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ ++ { 0x0000000051061a8a, 0x00000001c96a8cca }, ++ /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ ++ { 0x000000017351388a, 0x00000001a1a5b60c }, ++ /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ ++ { 0x0000000132928f92, 0x00000000e4b6ac9c }, ++ /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ ++ { 0x00000000e6b4f48a, 0x00000001807e7f5a }, ++ /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ ++ { 0x0000000039d15e90, 0x000000017a7e3bc8 }, ++ /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ ++ { 0x00000000312d6074, 0x00000000d73975da }, ++ /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ ++ { 0x000000017bbb2cc4, 0x000000017375d038 }, ++ /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ ++ { 0x000000016ded3e18, 0x00000000193680bc }, ++ /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ ++ { 0x00000000f1638b16, 0x00000000999b06f6 }, ++ /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ ++ { 0x00000001d38b9ecc, 0x00000001f685d2b8 }, ++ /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ ++ { 0x000000018b8d09dc, 0x00000001f4ecbed2 }, ++ /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ ++ { 0x00000000e7bc27d2, 0x00000000ba16f1a0 }, ++ /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ ++ { 0x00000000275e1e96, 0x0000000115aceac4 }, ++ /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ ++ { 0x00000000e2e3031e, 0x00000001aeff6292 }, ++ /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ ++ { 0x00000001041c84d8, 0x000000009640124c }, ++ /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ ++ { 0x00000000706ce672, 0x0000000114f41f02 }, ++ /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ ++ { 0x000000015d5070da, 0x000000009c5f3586 }, ++ /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ ++ { 0x0000000038f9493a, 0x00000001878275fa }, ++ /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ ++ { 0x00000000a3348a76, 0x00000000ddc42ce8 }, ++ /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ ++ { 0x00000001ad0aab92, 0x0000000181d2c73a }, ++ /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ ++ { 0x000000019e85f712, 0x0000000141c9320a }, ++ /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ ++ { 0x000000005a871e76, 0x000000015235719a }, ++ /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ ++ { 0x000000017249c662, 0x00000000be27d804 }, ++ /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ ++ { 0x000000003a084712, 0x000000006242d45a }, ++ /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ ++ { 0x00000000ed438478, 0x000000009a53638e }, ++ /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ ++ { 0x00000000abac34cc, 0x00000001001ecfb6 }, ++ /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ ++ { 0x000000005f35ef3e, 0x000000016d7c2d64 }, ++ /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ ++ { 0x0000000047d6608c, 0x00000001d0ce46c0 }, ++ /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ ++ { 0x000000002d01470e, 0x0000000124c907b4 }, ++ /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ ++ { 0x0000000158bbc7b0, 0x0000000018a555ca }, ++ /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ ++ { 0x00000000c0a23e8e, 0x000000006b0980bc }, ++ /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ ++ { 0x00000001ebd85c88, 0x000000008bbba964 }, ++ /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ ++ { 0x000000019ee20bb2, 0x00000001070a5a1e }, ++ /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ ++ { 0x00000001acabf2d6, 0x000000002204322a }, ++ /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ ++ { 0x00000001b7963d56, 0x00000000a27524d0 }, ++ /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ ++ { 0x000000017bffa1fe, 0x0000000020b1e4ba }, ++ /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ ++ { 0x000000001f15333e, 0x0000000032cc27fc }, ++ /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ ++ { 0x000000018593129e, 0x0000000044dd22b8 }, ++ /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ ++ { 0x000000019cb32602, 0x00000000dffc9e0a }, ++ /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ ++ { 0x0000000142b05cc8, 0x00000001b7a0ed14 }, ++ /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ ++ { 0x00000001be49e7a4, 0x00000000c7842488 }, ++ /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ ++ { 0x0000000108f69d6c, 0x00000001c02a4fee }, ++ /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ ++ { 0x000000006c0971f0, 0x000000003c273778 }, ++ /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ ++ { 0x000000005b16467a, 0x00000001d63f8894 }, ++ /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ ++ { 0x00000001551a628e, 0x000000006be557d6 }, ++ /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ ++ { 0x000000019e42ea92, 0x000000006a7806ea }, ++ /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ ++ { 0x000000012fa83ff2, 0x000000016155aa0c }, ++ /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ ++ { 0x000000011ca9cde0, 0x00000000908650ac }, ++ /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ ++ { 0x00000000c8e5cd74, 0x00000000aa5a8084 }, ++ /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ ++ { 0x0000000096c27f0c, 0x0000000191bb500a }, ++ /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ ++ { 0x000000002baed926, 0x0000000064e9bed0 }, ++ /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ ++ { 0x000000017c8de8d2, 0x000000009444f302 }, ++ /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ ++ { 0x00000000d43d6068, 0x000000019db07d3c }, ++ /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ ++ { 0x00000000cb2c4b26, 0x00000001359e3e6e }, ++ /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ ++ { 0x0000000145b8da26, 0x00000001e4f10dd2 }, ++ /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ ++ { 0x000000018fff4b08, 0x0000000124f5735e }, ++ /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ ++ { 0x0000000150b58ed0, 0x0000000124760a4c }, ++ /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ ++ { 0x00000001549f39bc, 0x000000000f1fc186 }, ++ /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ ++ { 0x00000000ef4d2f42, 0x00000000150e4cc4 }, ++ /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ ++ { 0x00000001b1468572, 0x000000002a6204e8 }, ++ /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ ++ { 0x000000013d7403b2, 0x00000000beb1d432 }, ++ /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ ++ { 0x00000001a4681842, 0x0000000135f3f1f0 }, ++ /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ ++ { 0x0000000167714492, 0x0000000074fe2232 }, ++ /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ ++ { 0x00000001e599099a, 0x000000001ac6e2ba }, ++ /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ ++ { 0x00000000fe128194, 0x0000000013fca91e }, ++ /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ ++ { 0x0000000077e8b990, 0x0000000183f4931e }, ++ /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ ++ { 0x00000001a267f63a, 0x00000000b6d9b4e4 }, ++ /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ ++ { 0x00000001945c245a, 0x00000000b5188656 }, ++ /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ ++ { 0x0000000149002e76, 0x0000000027a81a84 }, ++ /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ ++ { 0x00000001bb8310a4, 0x0000000125699258 }, ++ /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ ++ { 0x000000019ec60bcc, 0x00000001b23de796 }, ++ /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ ++ { 0x000000012d8590ae, 0x00000000fe4365dc }, ++ /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ ++ { 0x0000000065b00684, 0x00000000c68f497a }, ++ /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ ++ { 0x000000015e5aeadc, 0x00000000fbf521ee }, ++ /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ ++ { 0x00000000b77ff2b0, 0x000000015eac3378 }, ++ /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ ++ { 0x0000000188da2ff6, 0x0000000134914b90 }, ++ /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ ++ { 0x0000000063da929a, 0x0000000016335cfe }, ++ /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ ++ { 0x00000001389caa80, 0x000000010372d10c }, ++ /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ ++ { 0x000000013db599d2, 0x000000015097b908 }, ++ /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ ++ { 0x0000000122505a86, 0x00000001227a7572 }, ++ /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ ++ { 0x000000016bd72746, 0x000000009a8f75c0 }, ++ /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ ++ { 0x00000001c3faf1d4, 0x00000000682c77a2 }, ++ /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ ++ { 0x00000001111c826c, 0x00000000231f091c }, ++ /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ ++ { 0x00000000153e9fb2, 0x000000007d4439f2 }, ++ /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ ++ { 0x000000002b1f7b60, 0x000000017e221efc }, ++ /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ ++ { 0x00000000b1dba570, 0x0000000167457c38 }, ++ /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ ++ { 0x00000001f6397b76, 0x00000000bdf081c4 }, ++ /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ ++ { 0x0000000156335214, 0x000000016286d6b0 }, ++ /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ ++ { 0x00000001d70e3986, 0x00000000c84f001c }, ++ /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ ++ { 0x000000003701a774, 0x0000000064efe7c0 }, ++ /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ ++ { 0x00000000ac81ef72, 0x000000000ac2d904 }, ++ /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ ++ { 0x0000000133212464, 0x00000000fd226d14 }, ++ /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ ++ { 0x00000000e4e45610, 0x000000011cfd42e0 }, ++ /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ ++ { 0x000000000c1bd370, 0x000000016e5a5678 }, ++ /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ ++ { 0x00000001a7b9e7a6, 0x00000001d888fe22 }, ++ /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ ++ { 0x000000007d657a10, 0x00000001af77fcd4 } ++#else /* __LITTLE_ENDIAN__ */ ++ /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ ++ { 0x00000001651797d2, 0x0000000099ea94a8 }, ++ /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ ++ { 0x0000000021e0d56c, 0x00000000945a8420 }, ++ /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ ++ { 0x000000000f95ecaa, 0x0000000030762706 }, ++ /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ ++ { 0x00000001ebd224ac, 0x00000001a52fc582 }, ++ /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ ++ { 0x000000000ccb97ca, 0x00000001a4a7167a }, ++ /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ ++ { 0x00000001006ec8a8, 0x000000000c18249a }, ++ /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ ++ { 0x000000014f58f196, 0x00000000a924ae7c }, ++ /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ ++ { 0x00000001a7192ca6, 0x00000001e12ccc12 }, ++ /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ ++ { 0x000000019a64bab2, 0x00000000a0b9d4ac }, ++ /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ ++ { 0x0000000014f4ed2e, 0x0000000095e8ddfe }, ++ /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ ++ { 0x000000011092b6a2, 0x00000000233fddc4 }, ++ /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ ++ { 0x00000000c8a1629c, 0x00000001b4529b62 }, ++ /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ ++ { 0x000000017bf32e8e, 0x00000001a7fa0e64 }, ++ /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ ++ { 0x00000001f8cc6582, 0x00000001b5334592 }, ++ /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ ++ { 0x000000008631ddf0, 0x000000011f8ee1b4 }, ++ /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ ++ { 0x000000007e5a76d0, 0x000000006252e632 }, ++ /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ ++ { 0x000000002b09b31c, 0x00000000ab973e84 }, ++ /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ ++ { 0x00000001b2df1f84, 0x000000007734f5ec }, ++ /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ ++ { 0x00000001d6f56afc, 0x000000007c547798 }, ++ /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ ++ { 0x00000001b9b5e70c, 0x000000007ec40210 }, ++ /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ ++ { 0x0000000034b626d2, 0x00000001ab1695a8 }, ++ /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ ++ { 0x000000014c53479a, 0x0000000090494bba }, ++ /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ ++ { 0x00000001a6d179a4, 0x00000001123fb816 }, ++ /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ ++ { 0x000000015abd16b4, 0x00000001e188c74c }, ++ /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ ++ { 0x00000000018f9852, 0x00000001c2d3451c }, ++ /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ ++ { 0x000000001fb3084a, 0x00000000f55cf1ca }, ++ /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ ++ { 0x00000000c53dfb04, 0x00000001a0531540 }, ++ /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ ++ { 0x00000000e10c9ad6, 0x0000000132cd7ebc }, ++ /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ ++ { 0x0000000025aa994a, 0x0000000073ab7f36 }, ++ /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ ++ { 0x00000000fa3a74c4, 0x0000000041aed1c2 }, ++ /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ ++ { 0x0000000033eb3f40, 0x0000000136c53800 }, ++ /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ ++ { 0x000000017193f296, 0x0000000126835a30 }, ++ /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ ++ { 0x0000000043f6c86a, 0x000000006241b502 }, ++ /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ ++ { 0x000000016b513ec6, 0x00000000d5196ad4 }, ++ /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ ++ { 0x00000000c8f25b4e, 0x000000009cfa769a }, ++ /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ ++ { 0x00000001a45048ec, 0x00000000920e5df4 }, ++ /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ ++ { 0x000000000c441004, 0x0000000169dc310e }, ++ /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ ++ { 0x000000000e17cad6, 0x0000000009fc331c }, ++ /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ ++ { 0x00000001253ae964, 0x000000010d94a81e }, ++ /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ ++ { 0x00000001d7c88ebc, 0x0000000027a20ab2 }, ++ /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ ++ { 0x00000001e7ca913a, 0x0000000114f87504 }, ++ /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ ++ { 0x0000000033ed078a, 0x000000004b076d96 }, ++ /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ ++ { 0x00000000e1839c78, 0x00000000da4d1e74 }, ++ /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ ++ { 0x00000001322b267e, 0x000000001b81f672 }, ++ /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ ++ { 0x00000000638231b6, 0x000000009367c988 }, ++ /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ ++ { 0x00000001ee7f16f4, 0x00000001717214ca }, ++ /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ ++ { 0x0000000117d9924a, 0x000000009f47d820 }, ++ /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ ++ { 0x00000000e1a9e0c4, 0x000000010d9a47d2 }, ++ /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ ++ { 0x00000001403731dc, 0x00000000a696c58c }, ++ /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ ++ { 0x00000001a5ea9682, 0x000000002aa28ec6 }, ++ /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ ++ { 0x0000000101c5c578, 0x00000001fe18fd9a }, ++ /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ ++ { 0x00000000dddf6494, 0x000000019d4fc1ae }, ++ /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ ++ { 0x00000000f1c3db28, 0x00000001ba0e3dea }, ++ /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ ++ { 0x000000013112fb9c, 0x0000000074b59a5e }, ++ /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ ++ { 0x00000000b680b906, 0x00000000f2b5ea98 }, ++ /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ ++ { 0x000000001a282932, 0x0000000187132676 }, ++ /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ ++ { 0x0000000089406e7e, 0x000000010a8c6ad4 }, ++ /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ ++ { 0x00000001def6be8c, 0x00000001e21dfe70 }, ++ /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ ++ { 0x0000000075258728, 0x00000001da0050e4 }, ++ /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ ++ { 0x000000019536090a, 0x00000000772172ae }, ++ /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ ++ { 0x00000000f2455bfc, 0x00000000e47724aa }, ++ /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ ++ { 0x000000018c40baf4, 0x000000003cd63ac4 }, ++ /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ ++ { 0x000000004cd390d4, 0x00000001bf47d352 }, ++ /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ ++ { 0x00000001e4ece95a, 0x000000018dc1d708 }, ++ /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ ++ { 0x000000001a3ee918, 0x000000002d4620a4 }, ++ /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ ++ { 0x000000007c652fb8, 0x0000000058fd1740 }, ++ /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ ++ { 0x000000011c67842c, 0x00000000dadd9bfc }, ++ /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ ++ { 0x00000000254f759c, 0x00000001ea2140be }, ++ /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ ++ { 0x000000007ece94ca, 0x000000009de128ba }, ++ /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ ++ { 0x0000000038f258c2, 0x000000013ac3aa8e }, ++ /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ ++ { 0x00000001cdf17b00, 0x0000000099980562 }, ++ /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ ++ { 0x000000011f882c16, 0x00000001c1579c86 }, ++ /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ ++ { 0x0000000100093fc8, 0x0000000068dbbf94 }, ++ /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ ++ { 0x00000001cd684f16, 0x000000004509fb04 }, ++ /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ ++ { 0x000000004bc6a70a, 0x00000001202f6398 }, ++ /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ ++ { 0x000000004fc7e8e4, 0x000000013aea243e }, ++ /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ ++ { 0x0000000130103f1c, 0x00000001b4052ae6 }, ++ /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ ++ { 0x0000000111b0024c, 0x00000001cd2a0ae8 }, ++ /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ ++ { 0x000000010b3079da, 0x00000001fe4aa8b4 }, ++ /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ ++ { 0x000000010192bcc2, 0x00000001d1559a42 }, ++ /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ ++ { 0x0000000074838d50, 0x00000001f3e05ecc }, ++ /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ ++ { 0x000000001b20f520, 0x0000000104ddd2cc }, ++ /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ ++ { 0x0000000050c3590a, 0x000000015393153c }, ++ /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ ++ { 0x00000000b41cac8e, 0x0000000057e942c6 }, ++ /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ ++ { 0x000000000c72cc78, 0x000000012c633850 }, ++ /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ ++ { 0x0000000030cdb032, 0x00000000ebcaae4c }, ++ /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ ++ { 0x000000013e09fc32, 0x000000013ee532a6 }, ++ /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ ++ { 0x000000001ed624d2, 0x00000001bf0cbc7e }, ++ /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ ++ { 0x00000000781aee1a, 0x00000000d50b7a5a }, ++ /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ ++ { 0x00000001c4d8348c, 0x0000000002fca6e8 }, ++ /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ ++ { 0x0000000057a40336, 0x000000007af40044 }, ++ /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ ++ { 0x0000000085544940, 0x0000000016178744 }, ++ /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ ++ { 0x000000019cd21e80, 0x000000014c177458 }, ++ /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ ++ { 0x000000013eb95bc0, 0x000000011b6ddf04 }, ++ /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ ++ { 0x00000001dfc9fdfc, 0x00000001f3e29ccc }, ++ /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ ++ { 0x00000000cd028bc2, 0x0000000135ae7562 }, ++ /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ ++ { 0x0000000090db8c44, 0x0000000190ef812c }, ++ /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ ++ { 0x000000010010a4ce, 0x0000000067a2c786 }, ++ /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ ++ { 0x00000001c8f4c72c, 0x0000000048b9496c }, ++ /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ ++ { 0x000000001c26170c, 0x000000015a422de6 }, ++ /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ ++ { 0x00000000e3fccf68, 0x00000001ef0e3640 }, ++ /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ ++ { 0x00000000d513ed24, 0x00000001006d2d26 }, ++ /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ ++ { 0x00000000141beada, 0x00000001170d56d6 }, ++ /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ ++ { 0x000000011071aea0, 0x00000000a5fb613c }, ++ /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ ++ { 0x000000012e19080a, 0x0000000040bbf7fc }, ++ /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ ++ { 0x0000000100ecf826, 0x000000016ac3a5b2 }, ++ /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ ++ { 0x0000000069b09412, 0x00000000abf16230 }, ++ /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ ++ { 0x0000000122297bac, 0x00000001ebe23fac }, ++ /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ ++ { 0x00000000e9e4b068, 0x000000008b6a0894 }, ++ /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ ++ { 0x000000004b38651a, 0x00000001288ea478 }, ++ /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ ++ { 0x00000001468360e2, 0x000000016619c442 }, ++ /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ ++ { 0x00000000121c2408, 0x0000000086230038 }, ++ /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ ++ { 0x00000000da7e7d08, 0x000000017746a756 }, ++ /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ ++ { 0x00000001058d7652, 0x0000000191b8f8f8 }, ++ /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ ++ { 0x000000014a098a90, 0x000000008e167708 }, ++ /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ ++ { 0x0000000020dbe72e, 0x0000000148b22d54 }, ++ /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ ++ { 0x000000011e7323e8, 0x0000000044ba2c3c }, ++ /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ ++ { 0x00000000d5d4bf94, 0x00000000b54d2b52 }, ++ /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ ++ { 0x0000000199d8746c, 0x0000000005a4fd8a }, ++ /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ ++ { 0x00000000ce9ca8a0, 0x0000000139f9fc46 }, ++ /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ ++ { 0x00000000136edece, 0x000000015a1fa824 }, ++ /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ ++ { 0x000000019b92a068, 0x000000000a61ae4c }, ++ /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ ++ { 0x0000000071d62206, 0x0000000145e9113e }, ++ /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ ++ { 0x00000000dfc50158, 0x000000006a348448 }, ++ /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ ++ { 0x00000001517626bc, 0x000000004d80a08c }, ++ /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ ++ { 0x0000000148d1e4fa, 0x000000014b6837a0 }, ++ /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ ++ { 0x0000000094d8266e, 0x000000016896a7fc }, ++ /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ ++ { 0x00000000606c5e34, 0x000000014f187140 }, ++ /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ ++ { 0x000000019766beaa, 0x000000019581b9da }, ++ /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ ++ { 0x00000001d80c506c, 0x00000001091bc984 }, ++ /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ ++ { 0x000000001e73837c, 0x000000001067223c }, ++ /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ ++ { 0x0000000064d587de, 0x00000001ab16ea02 }, ++ /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ ++ { 0x00000000f4a507b0, 0x000000013c4598a8 }, ++ /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ ++ { 0x0000000040e342fc, 0x00000000b3735430 }, ++ /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ ++ { 0x00000001d5ad9c3a, 0x00000001bb3fc0c0 }, ++ /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ ++ { 0x0000000094a691a4, 0x00000001570ae19c }, ++ /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ ++ { 0x00000001271ecdfa, 0x00000001ea910712 }, ++ /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ ++ { 0x000000009e54475a, 0x0000000167127128 }, ++ /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ ++ { 0x00000000c9c099ee, 0x0000000019e790a2 }, ++ /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ ++ { 0x000000009a2f736c, 0x000000003788f710 }, ++ /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ ++ { 0x00000000bb9f4996, 0x00000001682a160e }, ++ /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ ++ { 0x00000001db688050, 0x000000007f0ebd2e }, ++ /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ ++ { 0x00000000e9b10af4, 0x000000002b032080 }, ++ /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ ++ { 0x000000012d4545e4, 0x00000000cfd1664a }, ++ /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ ++ { 0x000000000361139c, 0x00000000aa1181c2 }, ++ /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ ++ { 0x00000001a5a1a3a8, 0x00000000ddd08002 }, ++ /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ ++ { 0x000000006844e0b0, 0x00000000e8dd0446 }, ++ /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ ++ { 0x00000000c3762f28, 0x00000001bbd94a00 }, ++ /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ ++ { 0x00000001d26287a2, 0x00000000ab6cd180 }, ++ /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ ++ { 0x00000001f6f0bba8, 0x0000000031803ce2 }, ++ /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ ++ { 0x000000002ffabd62, 0x0000000024f40b0c }, ++ /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ ++ { 0x00000000fb4516b8, 0x00000001ba1d9834 }, ++ /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ ++ { 0x000000018cfa961c, 0x0000000104de61aa }, ++ /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ ++ { 0x000000019e588d52, 0x0000000113e40d46 }, ++ /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ ++ { 0x00000001180f0bbc, 0x00000001415598a0 }, ++ /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ ++ { 0x00000000e1d9177a, 0x00000000bf6c8c90 }, ++ /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ ++ { 0x0000000105abc27c, 0x00000001788b0504 }, ++ /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ ++ { 0x00000000972e4a58, 0x0000000038385d02 }, ++ /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ ++ { 0x0000000183499a5e, 0x00000001b6c83844 }, ++ /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ ++ { 0x00000001c96a8cca, 0x0000000051061a8a }, ++ /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ ++ { 0x00000001a1a5b60c, 0x000000017351388a }, ++ /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ ++ { 0x00000000e4b6ac9c, 0x0000000132928f92 }, ++ /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ ++ { 0x00000001807e7f5a, 0x00000000e6b4f48a }, ++ /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ ++ { 0x000000017a7e3bc8, 0x0000000039d15e90 }, ++ /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ ++ { 0x00000000d73975da, 0x00000000312d6074 }, ++ /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ ++ { 0x000000017375d038, 0x000000017bbb2cc4 }, ++ /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ ++ { 0x00000000193680bc, 0x000000016ded3e18 }, ++ /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ ++ { 0x00000000999b06f6, 0x00000000f1638b16 }, ++ /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ ++ { 0x00000001f685d2b8, 0x00000001d38b9ecc }, ++ /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ ++ { 0x00000001f4ecbed2, 0x000000018b8d09dc }, ++ /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ ++ { 0x00000000ba16f1a0, 0x00000000e7bc27d2 }, ++ /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ ++ { 0x0000000115aceac4, 0x00000000275e1e96 }, ++ /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ ++ { 0x00000001aeff6292, 0x00000000e2e3031e }, ++ /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ ++ { 0x000000009640124c, 0x00000001041c84d8 }, ++ /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ ++ { 0x0000000114f41f02, 0x00000000706ce672 }, ++ /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ ++ { 0x000000009c5f3586, 0x000000015d5070da }, ++ /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ ++ { 0x00000001878275fa, 0x0000000038f9493a }, ++ /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ ++ { 0x00000000ddc42ce8, 0x00000000a3348a76 }, ++ /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ ++ { 0x0000000181d2c73a, 0x00000001ad0aab92 }, ++ /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ ++ { 0x0000000141c9320a, 0x000000019e85f712 }, ++ /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ ++ { 0x000000015235719a, 0x000000005a871e76 }, ++ /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ ++ { 0x00000000be27d804, 0x000000017249c662 }, ++ /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ ++ { 0x000000006242d45a, 0x000000003a084712 }, ++ /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ ++ { 0x000000009a53638e, 0x00000000ed438478 }, ++ /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ ++ { 0x00000001001ecfb6, 0x00000000abac34cc }, ++ /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ ++ { 0x000000016d7c2d64, 0x000000005f35ef3e }, ++ /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ ++ { 0x00000001d0ce46c0, 0x0000000047d6608c }, ++ /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ ++ { 0x0000000124c907b4, 0x000000002d01470e }, ++ /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ ++ { 0x0000000018a555ca, 0x0000000158bbc7b0 }, ++ /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ ++ { 0x000000006b0980bc, 0x00000000c0a23e8e }, ++ /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ ++ { 0x000000008bbba964, 0x00000001ebd85c88 }, ++ /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ ++ { 0x00000001070a5a1e, 0x000000019ee20bb2 }, ++ /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ ++ { 0x000000002204322a, 0x00000001acabf2d6 }, ++ /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ ++ { 0x00000000a27524d0, 0x00000001b7963d56 }, ++ /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ ++ { 0x0000000020b1e4ba, 0x000000017bffa1fe }, ++ /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ ++ { 0x0000000032cc27fc, 0x000000001f15333e }, ++ /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ ++ { 0x0000000044dd22b8, 0x000000018593129e }, ++ /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ ++ { 0x00000000dffc9e0a, 0x000000019cb32602 }, ++ /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ ++ { 0x00000001b7a0ed14, 0x0000000142b05cc8 }, ++ /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ ++ { 0x00000000c7842488, 0x00000001be49e7a4 }, ++ /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ ++ { 0x00000001c02a4fee, 0x0000000108f69d6c }, ++ /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ ++ { 0x000000003c273778, 0x000000006c0971f0 }, ++ /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ ++ { 0x00000001d63f8894, 0x000000005b16467a }, ++ /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ ++ { 0x000000006be557d6, 0x00000001551a628e }, ++ /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ ++ { 0x000000006a7806ea, 0x000000019e42ea92 }, ++ /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ ++ { 0x000000016155aa0c, 0x000000012fa83ff2 }, ++ /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ ++ { 0x00000000908650ac, 0x000000011ca9cde0 }, ++ /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ ++ { 0x00000000aa5a8084, 0x00000000c8e5cd74 }, ++ /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ ++ { 0x0000000191bb500a, 0x0000000096c27f0c }, ++ /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ ++ { 0x0000000064e9bed0, 0x000000002baed926 }, ++ /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ ++ { 0x000000009444f302, 0x000000017c8de8d2 }, ++ /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ ++ { 0x000000019db07d3c, 0x00000000d43d6068 }, ++ /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ ++ { 0x00000001359e3e6e, 0x00000000cb2c4b26 }, ++ /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ ++ { 0x00000001e4f10dd2, 0x0000000145b8da26 }, ++ /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ ++ { 0x0000000124f5735e, 0x000000018fff4b08 }, ++ /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ ++ { 0x0000000124760a4c, 0x0000000150b58ed0 }, ++ /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ ++ { 0x000000000f1fc186, 0x00000001549f39bc }, ++ /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ ++ { 0x00000000150e4cc4, 0x00000000ef4d2f42 }, ++ /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ ++ { 0x000000002a6204e8, 0x00000001b1468572 }, ++ /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ ++ { 0x00000000beb1d432, 0x000000013d7403b2 }, ++ /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ ++ { 0x0000000135f3f1f0, 0x00000001a4681842 }, ++ /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ ++ { 0x0000000074fe2232, 0x0000000167714492 }, ++ /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ ++ { 0x000000001ac6e2ba, 0x00000001e599099a }, ++ /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ ++ { 0x0000000013fca91e, 0x00000000fe128194 }, ++ /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ ++ { 0x0000000183f4931e, 0x0000000077e8b990 }, ++ /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ ++ { 0x00000000b6d9b4e4, 0x00000001a267f63a }, ++ /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ ++ { 0x00000000b5188656, 0x00000001945c245a }, ++ /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ ++ { 0x0000000027a81a84, 0x0000000149002e76 }, ++ /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ ++ { 0x0000000125699258, 0x00000001bb8310a4 }, ++ /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ ++ { 0x00000001b23de796, 0x000000019ec60bcc }, ++ /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ ++ { 0x00000000fe4365dc, 0x000000012d8590ae }, ++ /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ ++ { 0x00000000c68f497a, 0x0000000065b00684 }, ++ /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ ++ { 0x00000000fbf521ee, 0x000000015e5aeadc }, ++ /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ ++ { 0x000000015eac3378, 0x00000000b77ff2b0 }, ++ /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ ++ { 0x0000000134914b90, 0x0000000188da2ff6 }, ++ /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ ++ { 0x0000000016335cfe, 0x0000000063da929a }, ++ /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ ++ { 0x000000010372d10c, 0x00000001389caa80 }, ++ /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ ++ { 0x000000015097b908, 0x000000013db599d2 }, ++ /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ ++ { 0x00000001227a7572, 0x0000000122505a86 }, ++ /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ ++ { 0x000000009a8f75c0, 0x000000016bd72746 }, ++ /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ ++ { 0x00000000682c77a2, 0x00000001c3faf1d4 }, ++ /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ ++ { 0x00000000231f091c, 0x00000001111c826c }, ++ /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ ++ { 0x000000007d4439f2, 0x00000000153e9fb2 }, ++ /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ ++ { 0x000000017e221efc, 0x000000002b1f7b60 }, ++ /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ ++ { 0x0000000167457c38, 0x00000000b1dba570 }, ++ /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ ++ { 0x00000000bdf081c4, 0x00000001f6397b76 }, ++ /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ ++ { 0x000000016286d6b0, 0x0000000156335214 }, ++ /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ ++ { 0x00000000c84f001c, 0x00000001d70e3986 }, ++ /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ ++ { 0x0000000064efe7c0, 0x000000003701a774 }, ++ /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ ++ { 0x000000000ac2d904, 0x00000000ac81ef72 }, ++ /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ ++ { 0x00000000fd226d14, 0x0000000133212464 }, ++ /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ ++ { 0x000000011cfd42e0, 0x00000000e4e45610 }, ++ /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ ++ { 0x000000016e5a5678, 0x000000000c1bd370 }, ++ /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ ++ { 0x00000001d888fe22, 0x00000001a7b9e7a6 }, ++ /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ ++ { 0x00000001af77fcd4, 0x000000007d657a10 } ++#endif /* __LITTLE_ENDIAN__ */ ++ }; ++ ++/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ ++ ++static const __vector unsigned long long vcrc_short_const[16] ++ __attribute__((aligned (16))) = { ++#ifdef __LITTLE_ENDIAN__ ++ /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ ++ { 0x99168a18ec447f11, 0xed837b2613e8221e }, ++ /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ ++ { 0xe23e954e8fd2cd3c, 0xc8acdd8147b9ce5a }, ++ /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ ++ { 0x92f8befe6b1d2b53, 0xd9ad6d87d4277e25 }, ++ /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ ++ { 0xf38a3556291ea462, 0xc10ec5e033fbca3b }, ++ /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ ++ { 0x974ac56262b6ca4b, 0xc0b55b0e82e02e2f }, ++ /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ ++ { 0x855712b3784d2a56, 0x71aa1df0e172334d }, ++ /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ ++ { 0xa5abe9f80eaee722, 0xfee3053e3969324d }, ++ /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ ++ { 0x1fa0943ddb54814c, 0xf44779b93eb2bd08 }, ++ /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ ++ { 0xa53ff440d7bbfe6a, 0xf5449b3f00cc3374 }, ++ /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ ++ { 0xebe7e3566325605c, 0x6f8346e1d777606e }, ++ /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ ++ { 0xc65a272ce5b592b8, 0xe3ab4f2ac0b95347 }, ++ /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ ++ { 0x5705a9ca4721589f, 0xaa2215ea329ecc11 }, ++ /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ ++ { 0xe3720acb88d14467, 0x1ed8f66ed95efd26 }, ++ /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ ++ { 0xba1aca0315141c31, 0x78ed02d5a700e96a }, ++ /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ ++ { 0xad2a31b3ed627dae, 0xba8ccbe832b39da3 }, ++ /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ ++ { 0x6655004fa06a2517, 0xedb88320b1e6b092 } ++#else /* __LITTLE_ENDIAN__ */ ++ /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ ++ { 0xed837b2613e8221e, 0x99168a18ec447f11 }, ++ /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ ++ { 0xc8acdd8147b9ce5a, 0xe23e954e8fd2cd3c }, ++ /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ ++ { 0xd9ad6d87d4277e25, 0x92f8befe6b1d2b53 }, ++ /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ ++ { 0xc10ec5e033fbca3b, 0xf38a3556291ea462 }, ++ /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ ++ { 0xc0b55b0e82e02e2f, 0x974ac56262b6ca4b }, ++ /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ ++ { 0x71aa1df0e172334d, 0x855712b3784d2a56 }, ++ /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ ++ { 0xfee3053e3969324d, 0xa5abe9f80eaee722 }, ++ /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ ++ { 0xf44779b93eb2bd08, 0x1fa0943ddb54814c }, ++ /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ ++ { 0xf5449b3f00cc3374, 0xa53ff440d7bbfe6a }, ++ /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ ++ { 0x6f8346e1d777606e, 0xebe7e3566325605c }, ++ /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ ++ { 0xe3ab4f2ac0b95347, 0xc65a272ce5b592b8 }, ++ /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ ++ { 0xaa2215ea329ecc11, 0x5705a9ca4721589f }, ++ /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ ++ { 0x1ed8f66ed95efd26, 0xe3720acb88d14467 }, ++ /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ ++ { 0x78ed02d5a700e96a, 0xba1aca0315141c31 }, ++ /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ ++ { 0xba8ccbe832b39da3, 0xad2a31b3ed627dae }, ++ /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ ++ { 0xedb88320b1e6b092, 0x6655004fa06a2517 } ++#endif /* __LITTLE_ENDIAN__ */ ++ }; ++ ++/* Barrett constants */ ++/* 33 bit reflected Barrett constant m - (4^32)/n */ ++ ++static const __vector unsigned long long v_Barrett_const[2] ++ __attribute__((aligned (16))) = { ++ /* x^64 div p(x) */ ++#ifdef __LITTLE_ENDIAN__ ++ { 0x00000001f7011641, 0x0000000000000000 }, ++ { 0x00000001db710641, 0x0000000000000000 } ++#else /* __LITTLE_ENDIAN__ */ ++ { 0x0000000000000000, 0x00000001f7011641 }, ++ { 0x0000000000000000, 0x00000001db710641 } ++#endif /* __LITTLE_ENDIAN__ */ ++ }; ++#endif /* POWER8_INTRINSICS */ ++ ++#endif /* __ASSEMBLER__ */ +diff --git a/contrib/power8-crc/vec_crc32.c b/contrib/power8-crc/vec_crc32.c +new file mode 100644 +index 0000000..bb2204b +--- /dev/null ++++ b/contrib/power8-crc/vec_crc32.c +@@ -0,0 +1,674 @@ ++/* ++ * Calculate the checksum of data that is 16 byte aligned and a multiple of ++ * 16 bytes. ++ * ++ * The first step is to reduce it to 1024 bits. We do this in 8 parallel ++ * chunks in order to mask the latency of the vpmsum instructions. If we ++ * have more than 32 kB of data to checksum we repeat this step multiple ++ * times, passing in the previous 1024 bits. ++ * ++ * The next step is to reduce the 1024 bits to 64 bits. This step adds ++ * 32 bits of 0s to the end - this matches what a CRC does. We just ++ * calculate constants that land the data in this 32 bits. ++ * ++ * We then use fixed point Barrett reduction to compute a mod n over GF(2) ++ * for n = CRC using POWER8 instructions. We use x = 32. ++ * ++ * http://en.wikipedia.org/wiki/Barrett_reduction ++ * ++ * This code uses gcc vector builtins instead using assembly directly. ++ * ++ * Copyright (C) 2017 Rogerio Alves , IBM ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of either: ++ * ++ * a) the GNU General Public License as published by the Free Software ++ * Foundation; either version 2 of the License, or (at your option) ++ * any later version, or ++ * b) the Apache License, Version 2.0 ++ */ ++ ++#include ++ ++#define POWER8_INTRINSICS ++#define CRC_TABLE ++ ++#ifdef CRC32_CONSTANTS_HEADER ++#include CRC32_CONSTANTS_HEADER ++#else ++#include "crc32_constants.h" ++#endif ++ ++#define VMX_ALIGN 16 ++#define VMX_ALIGN_MASK (VMX_ALIGN-1) ++ ++#ifdef REFLECT ++static unsigned int crc32_align(unsigned int crc, const unsigned char *p, ++ unsigned long len) ++{ ++ while (len--) ++ crc = crc_table[(crc ^ *p++) & 0xff] ^ (crc >> 8); ++ return crc; ++} ++#else ++static unsigned int crc32_align(unsigned int crc, const unsigned char *p, ++ unsigned long len) ++{ ++ while (len--) ++ crc = crc_table[((crc >> 24) ^ *p++) & 0xff] ^ (crc << 8); ++ return crc; ++} ++#endif ++ ++static unsigned int __attribute__ ((aligned (32))) ++__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len); ++ ++#ifndef CRC32_FUNCTION ++#define CRC32_FUNCTION crc32_vpmsum ++#endif ++ ++unsigned int CRC32_FUNCTION(unsigned int crc, const unsigned char *p, ++ unsigned long len) ++{ ++ unsigned int prealign; ++ unsigned int tail; ++ ++#ifdef CRC_XOR ++ crc ^= 0xffffffff; ++#endif ++ ++ if (len < VMX_ALIGN + VMX_ALIGN_MASK) { ++ crc = crc32_align(crc, p, len); ++ goto out; ++ } ++ ++ if ((unsigned long)p & VMX_ALIGN_MASK) { ++ prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); ++ crc = crc32_align(crc, p, prealign); ++ len -= prealign; ++ p += prealign; ++ } ++ ++ crc = __crc32_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); ++ ++ tail = len & VMX_ALIGN_MASK; ++ if (tail) { ++ p += len & ~VMX_ALIGN_MASK; ++ crc = crc32_align(crc, p, tail); ++ } ++ ++out: ++#ifdef CRC_XOR ++ crc ^= 0xffffffff; ++#endif ++ ++ return crc; ++} ++ ++#if defined (__clang__) ++#include "clang_workaround.h" ++#else ++#define __builtin_pack_vector(a, b) __builtin_pack_vector_int128 ((a), (b)) ++#define __builtin_unpack_vector_0(a) __builtin_unpack_vector_int128 ((vector __int128_t)(a), 0) ++#define __builtin_unpack_vector_1(a) __builtin_unpack_vector_int128 ((vector __int128_t)(a), 1) ++#endif ++ ++/* When we have a load-store in a single-dispatch group and address overlap ++ * such that foward is not allowed (load-hit-store) the group must be flushed. ++ * A group ending NOP prevents the flush. ++ */ ++#define GROUP_ENDING_NOP asm("ori 2,2,0" ::: "memory") ++ ++#if defined(__BIG_ENDIAN__) && defined (REFLECT) ++#define BYTESWAP_DATA ++#elif defined(__LITTLE_ENDIAN__) && !defined(REFLECT) ++#define BYTESWAP_DATA ++#endif ++ ++#ifdef BYTESWAP_DATA ++#define VEC_PERM(vr, va, vb, vc) vr = vec_perm(va, vb,\ ++ (__vector unsigned char) vc) ++#if defined(__LITTLE_ENDIAN__) ++/* Byte reverse permute constant LE. */ ++static const __vector unsigned long long vperm_const ++ __attribute__ ((aligned(16))) = { 0x08090A0B0C0D0E0FUL, ++ 0x0001020304050607UL }; ++#else ++static const __vector unsigned long long vperm_const ++ __attribute__ ((aligned(16))) = { 0x0F0E0D0C0B0A0908UL, ++ 0X0706050403020100UL }; ++#endif ++#else ++#define VEC_PERM(vr, va, vb, vc) ++#endif ++ ++static unsigned int __attribute__ ((aligned (32))) ++__crc32_vpmsum(unsigned int crc, const void* p, unsigned long len) { ++ ++ const __vector unsigned long long vzero = {0,0}; ++ const __vector unsigned long long vones = {0xffffffffffffffffUL, ++ 0xffffffffffffffffUL}; ++ ++#ifdef REFLECT ++ const __vector unsigned long long vmask_32bit = ++ (__vector unsigned long long)vec_sld((__vector unsigned char)vzero, ++ (__vector unsigned char)vones, 4); ++#endif ++ ++ const __vector unsigned long long vmask_64bit = ++ (__vector unsigned long long)vec_sld((__vector unsigned char)vzero, ++ (__vector unsigned char)vones, 8); ++ ++ __vector unsigned long long vcrc; ++ ++ __vector unsigned long long vconst1, vconst2; ++ ++ /* vdata0-vdata7 will contain our data (p). */ ++ __vector unsigned long long vdata0, vdata1, vdata2, vdata3, vdata4, ++ vdata5, vdata6, vdata7; ++ ++ /* v0-v7 will contain our checksums */ ++ __vector unsigned long long v0 = {0,0}; ++ __vector unsigned long long v1 = {0,0}; ++ __vector unsigned long long v2 = {0,0}; ++ __vector unsigned long long v3 = {0,0}; ++ __vector unsigned long long v4 = {0,0}; ++ __vector unsigned long long v5 = {0,0}; ++ __vector unsigned long long v6 = {0,0}; ++ __vector unsigned long long v7 = {0,0}; ++ ++ ++ /* Vector auxiliary variables. */ ++ __vector unsigned long long va0, va1, va2, va3, va4, va5, va6, va7; ++ ++ unsigned int result = 0; ++ unsigned int offset; /* Constant table offset. */ ++ ++ unsigned long i; /* Counter. */ ++ unsigned long chunks; ++ ++ unsigned long block_size; ++ int next_block = 0; ++ ++ /* Align by 128 bits. The last 128 bit block will be processed at end. */ ++ unsigned long length = len & 0xFFFFFFFFFFFFFF80UL; ++ ++#ifdef REFLECT ++ vcrc = (__vector unsigned long long)__builtin_pack_vector(0UL, crc); ++#else ++ vcrc = (__vector unsigned long long)__builtin_pack_vector(crc, 0UL); ++ ++ /* Shift into top 32 bits */ ++ vcrc = (__vector unsigned long long)vec_sld((__vector unsigned char)vcrc, ++ (__vector unsigned char)vzero, 4); ++#endif ++ ++ /* Short version. */ ++ if (len < 256) { ++ /* Calculate where in the constant table we need to start. */ ++ offset = 256 - len; ++ ++ vconst1 = vec_ld(offset, vcrc_short_const); ++ vdata0 = vec_ld(0, (__vector unsigned long long*) p); ++ VEC_PERM(vdata0, vdata0, vconst1, vperm_const); ++ ++ /* xor initial value*/ ++ vdata0 = vec_xor(vdata0, vcrc); ++ ++ vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw ++ ((__vector unsigned int)vdata0, (__vector unsigned int)vconst1); ++ v0 = vec_xor(v0, vdata0); ++ ++ for (i = 16; i < len; i += 16) { ++ vconst1 = vec_ld(offset + i, vcrc_short_const); ++ vdata0 = vec_ld(i, (__vector unsigned long long*) p); ++ VEC_PERM(vdata0, vdata0, vconst1, vperm_const); ++ vdata0 = (__vector unsigned long long) __builtin_crypto_vpmsumw ++ ((__vector unsigned int)vdata0, (__vector unsigned int)vconst1); ++ v0 = vec_xor(v0, vdata0); ++ } ++ } else { ++ ++ /* Load initial values. */ ++ vdata0 = vec_ld(0, (__vector unsigned long long*) p); ++ vdata1 = vec_ld(16, (__vector unsigned long long*) p); ++ ++ VEC_PERM(vdata0, vdata0, vdata0, vperm_const); ++ VEC_PERM(vdata1, vdata1, vdata1, vperm_const); ++ ++ vdata2 = vec_ld(32, (__vector unsigned long long*) p); ++ vdata3 = vec_ld(48, (__vector unsigned long long*) p); ++ ++ VEC_PERM(vdata2, vdata2, vdata2, vperm_const); ++ VEC_PERM(vdata3, vdata3, vdata3, vperm_const); ++ ++ vdata4 = vec_ld(64, (__vector unsigned long long*) p); ++ vdata5 = vec_ld(80, (__vector unsigned long long*) p); ++ ++ VEC_PERM(vdata4, vdata4, vdata4, vperm_const); ++ VEC_PERM(vdata5, vdata5, vdata5, vperm_const); ++ ++ vdata6 = vec_ld(96, (__vector unsigned long long*) p); ++ vdata7 = vec_ld(112, (__vector unsigned long long*) p); ++ ++ VEC_PERM(vdata6, vdata6, vdata6, vperm_const); ++ VEC_PERM(vdata7, vdata7, vdata7, vperm_const); ++ ++ /* xor in initial value */ ++ vdata0 = vec_xor(vdata0, vcrc); ++ ++ p = (char *)p + 128; ++ ++ do { ++ /* Checksum in blocks of MAX_SIZE. */ ++ block_size = length; ++ if (block_size > MAX_SIZE) { ++ block_size = MAX_SIZE; ++ } ++ ++ length = length - block_size; ++ ++ /* ++ * Work out the offset into the constants table to start at. Each ++ * constant is 16 bytes, and it is used against 128 bytes of input ++ * data - 128 / 16 = 8 ++ */ ++ offset = (MAX_SIZE/8) - (block_size/8); ++ /* We reduce our final 128 bytes in a separate step */ ++ chunks = (block_size/128)-1; ++ ++ vconst1 = vec_ld(offset, vcrc_const); ++ ++ va0 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata0, ++ (__vector unsigned long long)vconst1); ++ va1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata1, ++ (__vector unsigned long long)vconst1); ++ va2 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata2, ++ (__vector unsigned long long)vconst1); ++ va3 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata3, ++ (__vector unsigned long long)vconst1); ++ va4 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata4, ++ (__vector unsigned long long)vconst1); ++ va5 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata5, ++ (__vector unsigned long long)vconst1); ++ va6 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata6, ++ (__vector unsigned long long)vconst1); ++ va7 = __builtin_crypto_vpmsumd ((__vector unsigned long long)vdata7, ++ (__vector unsigned long long)vconst1); ++ ++ if (chunks > 1) { ++ offset += 16; ++ vconst2 = vec_ld(offset, vcrc_const); ++ GROUP_ENDING_NOP; ++ ++ vdata0 = vec_ld(0, (__vector unsigned long long*) p); ++ VEC_PERM(vdata0, vdata0, vdata0, vperm_const); ++ ++ vdata1 = vec_ld(16, (__vector unsigned long long*) p); ++ VEC_PERM(vdata1, vdata1, vdata1, vperm_const); ++ ++ vdata2 = vec_ld(32, (__vector unsigned long long*) p); ++ VEC_PERM(vdata2, vdata2, vdata2, vperm_const); ++ ++ vdata3 = vec_ld(48, (__vector unsigned long long*) p); ++ VEC_PERM(vdata3, vdata3, vdata3, vperm_const); ++ ++ vdata4 = vec_ld(64, (__vector unsigned long long*) p); ++ VEC_PERM(vdata4, vdata4, vdata4, vperm_const); ++ ++ vdata5 = vec_ld(80, (__vector unsigned long long*) p); ++ VEC_PERM(vdata5, vdata5, vdata5, vperm_const); ++ ++ vdata6 = vec_ld(96, (__vector unsigned long long*) p); ++ VEC_PERM(vdata6, vdata6, vdata6, vperm_const); ++ ++ vdata7 = vec_ld(112, (__vector unsigned long long*) p); ++ VEC_PERM(vdata7, vdata7, vdata7, vperm_const); ++ ++ p = (char *)p + 128; ++ ++ /* ++ * main loop. We modulo schedule it such that it takes three ++ * iterations to complete - first iteration load, second ++ * iteration vpmsum, third iteration xor. ++ */ ++ for (i = 0; i < chunks-2; i++) { ++ vconst1 = vec_ld(offset, vcrc_const); ++ offset += 16; ++ GROUP_ENDING_NOP; ++ ++ v0 = vec_xor(v0, va0); ++ va0 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata0, (__vector unsigned long long)vconst2); ++ vdata0 = vec_ld(0, (__vector unsigned long long*) p); ++ VEC_PERM(vdata0, vdata0, vdata0, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v1 = vec_xor(v1, va1); ++ va1 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata1, (__vector unsigned long long)vconst2); ++ vdata1 = vec_ld(16, (__vector unsigned long long*) p); ++ VEC_PERM(vdata1, vdata1, vdata1, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v2 = vec_xor(v2, va2); ++ va2 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata2, (__vector unsigned long long)vconst2); ++ vdata2 = vec_ld(32, (__vector unsigned long long*) p); ++ VEC_PERM(vdata2, vdata2, vdata2, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v3 = vec_xor(v3, va3); ++ va3 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata3, (__vector unsigned long long)vconst2); ++ vdata3 = vec_ld(48, (__vector unsigned long long*) p); ++ VEC_PERM(vdata3, vdata3, vdata3, vperm_const); ++ ++ vconst2 = vec_ld(offset, vcrc_const); ++ GROUP_ENDING_NOP; ++ ++ v4 = vec_xor(v4, va4); ++ va4 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata4, (__vector unsigned long long)vconst1); ++ vdata4 = vec_ld(64, (__vector unsigned long long*) p); ++ VEC_PERM(vdata4, vdata4, vdata4, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v5 = vec_xor(v5, va5); ++ va5 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata5, (__vector unsigned long long)vconst1); ++ vdata5 = vec_ld(80, (__vector unsigned long long*) p); ++ VEC_PERM(vdata5, vdata5, vdata5, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v6 = vec_xor(v6, va6); ++ va6 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata6, (__vector unsigned long long)vconst1); ++ vdata6 = vec_ld(96, (__vector unsigned long long*) p); ++ VEC_PERM(vdata6, vdata6, vdata6, vperm_const); ++ GROUP_ENDING_NOP; ++ ++ v7 = vec_xor(v7, va7); ++ va7 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata7, (__vector unsigned long long)vconst1); ++ vdata7 = vec_ld(112, (__vector unsigned long long*) p); ++ VEC_PERM(vdata7, vdata7, vdata7, vperm_const); ++ ++ p = (char *)p + 128; ++ } ++ ++ /* First cool down*/ ++ vconst1 = vec_ld(offset, vcrc_const); ++ offset += 16; ++ ++ v0 = vec_xor(v0, va0); ++ va0 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata0, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v1 = vec_xor(v1, va1); ++ va1 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata1, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v2 = vec_xor(v2, va2); ++ va2 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata2, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v3 = vec_xor(v3, va3); ++ va3 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata3, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v4 = vec_xor(v4, va4); ++ va4 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata4, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v5 = vec_xor(v5, va5); ++ va5 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata5, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v6 = vec_xor(v6, va6); ++ va6 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata6, (__vector unsigned long long)vconst1); ++ GROUP_ENDING_NOP; ++ ++ v7 = vec_xor(v7, va7); ++ va7 = __builtin_crypto_vpmsumd ((__vector unsigned long ++ long)vdata7, (__vector unsigned long long)vconst1); ++ }/* else */ ++ ++ /* Second cool down. */ ++ v0 = vec_xor(v0, va0); ++ v1 = vec_xor(v1, va1); ++ v2 = vec_xor(v2, va2); ++ v3 = vec_xor(v3, va3); ++ v4 = vec_xor(v4, va4); ++ v5 = vec_xor(v5, va5); ++ v6 = vec_xor(v6, va6); ++ v7 = vec_xor(v7, va7); ++ ++#ifdef REFLECT ++ /* ++ * vpmsumd produces a 96 bit result in the least significant bits ++ * of the register. Since we are bit reflected we have to shift it ++ * left 32 bits so it occupies the least significant bits in the ++ * bit reflected domain. ++ */ ++ v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, ++ (__vector unsigned char)vzero, 4); ++ v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v1, ++ (__vector unsigned char)vzero, 4); ++ v2 = (__vector unsigned long long)vec_sld((__vector unsigned char)v2, ++ (__vector unsigned char)vzero, 4); ++ v3 = (__vector unsigned long long)vec_sld((__vector unsigned char)v3, ++ (__vector unsigned char)vzero, 4); ++ v4 = (__vector unsigned long long)vec_sld((__vector unsigned char)v4, ++ (__vector unsigned char)vzero, 4); ++ v5 = (__vector unsigned long long)vec_sld((__vector unsigned char)v5, ++ (__vector unsigned char)vzero, 4); ++ v6 = (__vector unsigned long long)vec_sld((__vector unsigned char)v6, ++ (__vector unsigned char)vzero, 4); ++ v7 = (__vector unsigned long long)vec_sld((__vector unsigned char)v7, ++ (__vector unsigned char)vzero, 4); ++#endif ++ ++ /* xor with the last 1024 bits. */ ++ va0 = vec_ld(0, (__vector unsigned long long*) p); ++ VEC_PERM(va0, va0, va0, vperm_const); ++ ++ va1 = vec_ld(16, (__vector unsigned long long*) p); ++ VEC_PERM(va1, va1, va1, vperm_const); ++ ++ va2 = vec_ld(32, (__vector unsigned long long*) p); ++ VEC_PERM(va2, va2, va2, vperm_const); ++ ++ va3 = vec_ld(48, (__vector unsigned long long*) p); ++ VEC_PERM(va3, va3, va3, vperm_const); ++ ++ va4 = vec_ld(64, (__vector unsigned long long*) p); ++ VEC_PERM(va4, va4, va4, vperm_const); ++ ++ va5 = vec_ld(80, (__vector unsigned long long*) p); ++ VEC_PERM(va5, va5, va5, vperm_const); ++ ++ va6 = vec_ld(96, (__vector unsigned long long*) p); ++ VEC_PERM(va6, va6, va6, vperm_const); ++ ++ va7 = vec_ld(112, (__vector unsigned long long*) p); ++ VEC_PERM(va7, va7, va7, vperm_const); ++ ++ p = (char *)p + 128; ++ ++ vdata0 = vec_xor(v0, va0); ++ vdata1 = vec_xor(v1, va1); ++ vdata2 = vec_xor(v2, va2); ++ vdata3 = vec_xor(v3, va3); ++ vdata4 = vec_xor(v4, va4); ++ vdata5 = vec_xor(v5, va5); ++ vdata6 = vec_xor(v6, va6); ++ vdata7 = vec_xor(v7, va7); ++ ++ /* Check if we have more blocks to process */ ++ next_block = 0; ++ if (length != 0) { ++ next_block = 1; ++ ++ /* zero v0-v7 */ ++ v0 = vec_xor(v0, v0); ++ v1 = vec_xor(v1, v1); ++ v2 = vec_xor(v2, v2); ++ v3 = vec_xor(v3, v3); ++ v4 = vec_xor(v4, v4); ++ v5 = vec_xor(v5, v5); ++ v6 = vec_xor(v6, v6); ++ v7 = vec_xor(v7, v7); ++ } ++ length = length + 128; ++ ++ } while (next_block); ++ ++ /* Calculate how many bytes we have left. */ ++ length = (len & 127); ++ ++ /* Calculate where in (short) constant table we need to start. */ ++ offset = 128 - length; ++ ++ v0 = vec_ld(offset, vcrc_short_const); ++ v1 = vec_ld(offset + 16, vcrc_short_const); ++ v2 = vec_ld(offset + 32, vcrc_short_const); ++ v3 = vec_ld(offset + 48, vcrc_short_const); ++ v4 = vec_ld(offset + 64, vcrc_short_const); ++ v5 = vec_ld(offset + 80, vcrc_short_const); ++ v6 = vec_ld(offset + 96, vcrc_short_const); ++ v7 = vec_ld(offset + 112, vcrc_short_const); ++ ++ offset += 128; ++ ++ v0 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata0,(__vector unsigned int)v0); ++ v1 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata1,(__vector unsigned int)v1); ++ v2 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata2,(__vector unsigned int)v2); ++ v3 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata3,(__vector unsigned int)v3); ++ v4 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata4,(__vector unsigned int)v4); ++ v5 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata5,(__vector unsigned int)v5); ++ v6 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata6,(__vector unsigned int)v6); ++ v7 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata7,(__vector unsigned int)v7); ++ ++ /* Now reduce the tail (0-112 bytes). */ ++ for (i = 0; i < length; i+=16) { ++ vdata0 = vec_ld(i,(__vector unsigned long long*)p); ++ VEC_PERM(vdata0, vdata0, vdata0, vperm_const); ++ va0 = vec_ld(offset + i,vcrc_short_const); ++ va0 = (__vector unsigned long long)__builtin_crypto_vpmsumw ( ++ (__vector unsigned int)vdata0,(__vector unsigned int)va0); ++ v0 = vec_xor(v0, va0); ++ } ++ ++ /* xor all parallel chunks together. */ ++ v0 = vec_xor(v0, v1); ++ v2 = vec_xor(v2, v3); ++ v4 = vec_xor(v4, v5); ++ v6 = vec_xor(v6, v7); ++ ++ v0 = vec_xor(v0, v2); ++ v4 = vec_xor(v4, v6); ++ ++ v0 = vec_xor(v0, v4); ++ } ++ ++ /* Barrett Reduction */ ++ vconst1 = vec_ld(0, v_Barrett_const); ++ vconst2 = vec_ld(16, v_Barrett_const); ++ ++ v1 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, ++ (__vector unsigned char)v0, 8); ++ v0 = vec_xor(v1,v0); ++ ++#ifdef REFLECT ++ /* shift left one bit */ ++ __vector unsigned char vsht_splat = vec_splat_u8 (1); ++ v0 = (__vector unsigned long long)vec_sll ((__vector unsigned char)v0, ++ vsht_splat); ++#endif ++ ++ v0 = vec_and(v0, vmask_64bit); ++ ++#ifndef REFLECT ++ ++ /* ++ * Now for the actual algorithm. The idea is to calculate q, ++ * the multiple of our polynomial that we need to subtract. By ++ * doing the computation 2x bits higher (ie 64 bits) and shifting the ++ * result back down 2x bits, we round down to the nearest multiple. ++ */ ++ ++ /* ma */ ++ v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v0, ++ (__vector unsigned long long)vconst1); ++ /* q = floor(ma/(2^64)) */ ++ v1 = (__vector unsigned long long)vec_sld ((__vector unsigned char)vzero, ++ (__vector unsigned char)v1, 8); ++ /* qn */ ++ v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1, ++ (__vector unsigned long long)vconst2); ++ /* a - qn, subtraction is xor in GF(2) */ ++ v0 = vec_xor (v0, v1); ++ /* ++ * Get the result into r3. We need to shift it left 8 bytes: ++ * V0 [ 0 1 2 X ] ++ * V0 [ 0 X 2 3 ] ++ */ ++ result = __builtin_unpack_vector_1 (v0); ++#else ++ ++ /* ++ * The reflected version of Barrett reduction. Instead of bit ++ * reflecting our data (which is expensive to do), we bit reflect our ++ * constants and our algorithm, which means the intermediate data in ++ * our vector registers goes from 0-63 instead of 63-0. We can reflect ++ * the algorithm because we don't carry in mod 2 arithmetic. ++ */ ++ ++ /* bottom 32 bits of a */ ++ v1 = vec_and(v0, vmask_32bit); ++ ++ /* ma */ ++ v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1, ++ (__vector unsigned long long)vconst1); ++ ++ /* bottom 32bits of ma */ ++ v1 = vec_and(v1, vmask_32bit); ++ /* qn */ ++ v1 = __builtin_crypto_vpmsumd ((__vector unsigned long long)v1, ++ (__vector unsigned long long)vconst2); ++ /* a - qn, subtraction is xor in GF(2) */ ++ v0 = vec_xor (v0, v1); ++ ++ /* ++ * Since we are bit reflected, the result (ie the low 32 bits) is in ++ * the high 32 bits. We just need to shift it left 4 bytes ++ * V0 [ 0 1 X 3 ] ++ * V0 [ 0 X 2 3 ] ++ */ ++ ++ /* shift result into top 64 bits of */ ++ v0 = (__vector unsigned long long)vec_sld((__vector unsigned char)v0, ++ (__vector unsigned char)vzero, 4); ++ ++ result = __builtin_unpack_vector_0 (v0); ++#endif ++ ++ return result; ++} +diff --git a/crc32.c b/crc32.c +index 9580440..406d350 100644 +--- a/crc32.c ++++ b/crc32.c +@@ -199,13 +199,78 @@ const z_crc_t FAR * ZEXPORT get_crc_table() + #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 + + /* ========================================================================= */ +-unsigned long ZEXPORT crc32_z(crc, buf, len) ++local ++unsigned long ZEXPORT crc32_table_lookup(crc, buf, len) + unsigned long crc; + const unsigned char FAR *buf; + z_size_t len; + { + if (buf == Z_NULL) return 0UL; + ++ crc = crc ^ 0xffffffffUL; ++ while (len >= 8) { ++ DO8; ++ len -= 8; ++ } ++ if (len) do { ++ DO1; ++ } while (--len); ++ return crc ^ 0xffffffffUL; ++} ++ ++/* Small helper function to compare optfun against the reference table lookup ++ * return test_ref_comparision_##optfn in crc32_z_ifunc ++#include ++#define TEST_COMPARE(optfn) \ ++ static unsigned long test_ref_comparision_ ## optfn(unsigned long crc, const unsigned char FAR *p, z_size_t len) \ ++ { \ ++ unsigned long crc_tbl_lookup = crc32_table_lookup(crc, p, len); \ ++ unsigned long optcrc = optfn(crc, p, len); \ ++ assert( optcrc == crc_tbl_lookup ); \ ++ return optcrc; \ ++ } ++*/ ++ ++#ifdef Z_IFUNC_ASM ++unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t) ++ __asm__ ("crc32_z"); ++__asm__(".type crc32_z, %gnu_indirect_function"); ++#elif defined(Z_IFUNC_NATIVE) ++unsigned long ZEXPORT crc32_z( ++ unsigned long crc, ++ const unsigned char FAR *buf, ++ z_size_t len) ++ __attribute__ ((ifunc ("crc32_z_ifunc"))); ++#endif ++ ++#if _ARCH_PWR8==1 ++unsigned long crc32_vpmsum(unsigned long, const unsigned char FAR *, z_size_t); ++/* for testing TEST_COMPARE(crc32_vpmsum) */ ++#ifndef __BUILTIN_CPU_SUPPORTS__ ++#include ++#include ++#endif ++#endif ++ ++/* due to a quirk of gnu_indirect_function - "local" (aka static) is applied to ++ * crc32_z which is not desired. crc32_z_ifunc is implictly "local" */ ++#ifndef Z_IFUNC_ASM ++local ++#endif ++unsigned long (*(crc32_z_ifunc(void)))(unsigned long, const unsigned char FAR *, z_size_t) ++{ ++#if _ARCH_PWR8==1 ++#if defined(__BUILTIN_CPU_SUPPORTS__) ++ if (__builtin_cpu_supports("arch_2_07")) ++ return crc32_vpmsum; ++#else ++ if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07) ++ return crc32_vpmsum; ++#endif ++#endif /* _ARCH_PWR8 */ ++ ++/* return a function pointer for optimized arches here */ ++ + #ifdef DYNAMIC_CRC_TABLE + if (crc_table_empty) + make_crc_table(); +@@ -217,22 +282,31 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) + + endian = 1; + if (*((unsigned char *)(&endian))) +- return crc32_little(crc, buf, len); ++ return crc32_little; + else +- return crc32_big(crc, buf, len); ++ return crc32_big; + } + #endif /* BYFOUR */ +- crc = crc ^ 0xffffffffUL; +- while (len >= 8) { +- DO8; +- len -= 8; +- } +- if (len) do { +- DO1; +- } while (--len); +- return crc ^ 0xffffffffUL; ++ ++ return crc32_table_lookup; + } + ++#if !defined(Z_IFUNC_ASM) && !defined(Z_IFUNC_NATIVE) ++ ++unsigned long ZEXPORT crc32_z(crc, buf, len) ++ unsigned long crc; ++ const unsigned char FAR *buf; ++ z_size_t len; ++{ ++ static unsigned long ZEXPORT (*crc32_func)(unsigned long, const unsigned char FAR *, z_size_t) = NULL; ++ ++ if (!crc32_func) ++ crc32_func = crc32_z_ifunc(); ++ return (*crc32_func)(crc, buf, len); ++} ++ ++#endif /* defined(Z_IFUNC_ASM) || defined(Z_IFUNC_NATIVE) */ ++ + /* ========================================================================= */ + unsigned long ZEXPORT crc32(crc, buf, len) + unsigned long crc; +@@ -271,6 +345,7 @@ local unsigned long crc32_little(crc, buf, len) + register z_crc_t c; + register const z_crc_t FAR *buf4; + ++ if (buf == Z_NULL) return 0UL; + c = (z_crc_t)crc; + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { +@@ -311,6 +386,7 @@ local unsigned long crc32_big(crc, buf, len) + register z_crc_t c; + register const z_crc_t FAR *buf4; + ++ if (buf == Z_NULL) return 0UL; + c = ZSWAP32((z_crc_t)crc); + c = ~c; + while (len && ((ptrdiff_t)buf & 3)) { +-- +2.19.1 + diff --git a/zlib-1.2.11-optimized-s390.patch b/zlib-1.2.11-optimized-s390.patch new file mode 100644 index 0000000000000000000000000000000000000000..50a51510ab236ca6372f9ab8544cb23a35ea0139 --- /dev/null +++ b/zlib-1.2.11-optimized-s390.patch @@ -0,0 +1,41 @@ +diff --git a/deflate.c b/deflate.c +index 1ec7614..b724c8d 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -1233,15 +1233,16 @@ local void lm_init (s) + /* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +-local uInt longest_match(s, cur_match) ++local uInt longest_match(s, pcur_match) + deflate_state *s; +- IPos cur_match; /* current match */ ++ IPos pcur_match; /* current match */ + { ++ ptrdiff_t cur_match = pcur_match; /* extend to pointer width */ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ +- int best_len = (int)s->prev_length; /* best match length so far */ ++ ptrdiff_t best_len = s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; +@@ -1256,12 +1257,12 @@ local uInt longest_match(s, cur_match) + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; +- register ush scan_start = *(ushf*)scan; +- register ush scan_end = *(ushf*)(scan+best_len-1); ++ register uInt scan_start = *(ushf*)scan; ++ register uInt scan_end = *(ushf*)(scan+best_len-1); + #else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; +- register Byte scan_end1 = scan[best_len-1]; +- register Byte scan_end = scan[best_len]; ++ register uInt scan_end1 = scan[best_len-1]; ++ register uInt scan_end = scan[best_len]; + #endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. diff --git a/zlib-1.2.11-permit-deflateParams-change.patch b/zlib-1.2.11-permit-deflateParams-change.patch index 3a8393020a31fd42e162191de605c1a930e168c9..4f7b0effef71774ff95c22cc21704c089967cd67 100644 --- a/zlib-1.2.11-permit-deflateParams-change.patch +++ b/zlib-1.2.11-permit-deflateParams-change.patch @@ -1,10 +1,9 @@ -From f9694097dd69354b03cb8af959094c7f260db0a1 Mon Sep 17 00:00:00 2001 -From: Mark Adler -Date: Mon, 16 Jan 2017 09:49:35 -0800 -Subject: [PATCH] Permit a deflateParams() parameter change as soon as - possible. +From d09bb1ab8ef9bb91457c0ead09589e8807489260 Mon Sep 17 00:00:00 2001 +From: Ondrej Dubaj +Date: Thu, 6 Aug 2020 08:09:53 +0200 +Subject: [PATCH] Permit a deflateParams() parameter change. -This commit allows a parameter change even if the input data has +This change allows a parameter change even if the input data has not all been compressed and copied to the application output buffer, so long as all of the input data has been compressed to the internal pending output buffer. This also allows an immediate @@ -16,10 +15,10 @@ since initialization or reset. 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/deflate.c b/deflate.c -index b63311a..20bda4f 100644 +index 9705c1c..f3c9924 100644 --- a/deflate.c +++ b/deflate.c -@@ -494,7 +494,7 @@ int ZEXPORT deflateResetKeep (strm) +@@ -509,7 +509,7 @@ int ZEXPORT deflateResetKeep (strm) s->wrap == 2 ? crc32(0L, Z_NULL, 0) : #endif adler32(0L, Z_NULL, 0); @@ -28,14 +27,15 @@ index b63311a..20bda4f 100644 _tr_init(s); -@@ -587,12 +587,12 @@ int ZEXPORT deflateParams(strm, level, strategy) +@@ -606,13 +606,13 @@ int ZEXPORT deflateParams(strm, level, strategy) func = configuration_table[s->level].func; - if ((strategy != s->strategy || func != configuration_table[level].func) && -- s->high_water) { -+ s->last_flush != -2) { + if ((strategy != s->strategy || func != configuration_table[level].func || +- hook_flush != Z_NO_FLUSH) && s->high_water) { ++ hook_flush != Z_NO_FLUSH) && s->last_flush != -2) { /* Flush the last buffer: */ - int err = deflate(strm, Z_BLOCK); + int err = deflate(strm, RANK(hook_flush) > RANK(Z_BLOCK) ? + hook_flush : Z_BLOCK); if (err == Z_STREAM_ERROR) return err; - if (strm->avail_out == 0) @@ -44,7 +44,7 @@ index b63311a..20bda4f 100644 } if (s->level != level) { diff --git a/zlib.h b/zlib.h -index 5daf4f2..577d81e 100644 +index f09cdaf..001624e 100644 --- a/zlib.h +++ b/zlib.h @@ -712,11 +712,12 @@ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm, @@ -66,5 +66,5 @@ index 5daf4f2..577d81e 100644 If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does not have enough output space to complete, then the parameter change will not -- -2.29.2 +2.26.0 diff --git a/zlib.spec b/zlib.spec index ab4cafa7af1af49dd146745a987b41a1f0313388..acf66c0b1d45035fffad5bca1b9fc98a13068570 100644 --- a/zlib.spec +++ b/zlib.spec @@ -1,10 +1,9 @@ -%define anolis_release .0.1 # disabled, per rhbz#1609830 and rhbz#1602742 %bcond_with minizip Name: zlib Version: 1.2.11 -Release: 19%{anolis_release}%{?dist} +Release: 21%{?dist} Summary: The compression and decompression library # /contrib/dotzlib/ have Boost license License: zlib and Boost @@ -13,24 +12,35 @@ URL: http://www.zlib.net/ Source: http://www.zlib.net/zlib-%{version}.tar.xz # https://github.com/madler/zlib/pull/210 Patch0: zlib-1.2.5-minizip-fixuncrypt.patch +# resolves: #805113 +Patch1: zlib-1.2.11-optimized-s390.patch +# IBM Z optimalizations +Patch2: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-s390x.patch +# IBM CRC32 optimalization for POWER archs +Patch3: zlib-1.2.11-optimized-CRC32-framework.patch +# fixed firefox crash + added test case +Patch4: zlib-1.2.11-firefox-crash-fix.patch +# fixed covscan issues +Patch5: zlib-1.2.11-covscan-issues.patch +# fix for IBM Z optimalizations +Patch6: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-fix.patch # permit a deflateParams() parameter change -Patch1: zlib-1.2.11-permit-deflateParams-change.patch -Patch2: zlib-1.2.11-CVE-2018-25032.patch - -# Patches for CVE-2022-37434 -Patch3: zlib-1.2.11-cve-2022-37434.patch -Patch4: zlib-1.2.11-cve-2022-37434_2.patch - -# general aarch64 optimizations -Patch1001: 1001-zlib-anolis-Neon-Optimized-hash-chain-rebase.patch -Patch1002: 1002-zlib-anolis-Porting-optimized-longest_match.patch -Patch1003: 1003-zlib-anolis-arm64-specific-build-patch.patch -# optimized CRC32 function in armv8 -Patch1004: 1004-zlib-anolis-compute-crc32-using-armv8-specific-instruction.patch -# ARM optimized insert_string -Patch1005: 1005-zlib-anolis-ARM-optimized-insert_string.patch -# x86_64 optimized slide_hash -Patch1006: 1006-zlib-anolis-Optimize-slide_hash.patch +Patch7: zlib-1.2.11-permit-deflateParams-change.patch +# fixed DFLTCC compression level switching issues +# enabled HW compression for compression levels 1 through 6 +Patch8: zlib-1.2.11-IBM-DFLTCC-compression-level-switching-issues.patch +# fixed inflateSyncPoint() bad return value on z15 +Patch9: zlib-1.2.11-inflateSyncPoint-return-value-fix.patch +Patch10: zlib-1.2.11-CVE-2018-25032.patch +# Fix the compressBound() on z15 +Patch11: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-compressBound-fix.patch + +# Fix CVE-2022-37434 +Patch12: zlib-1.2.11-cve-2022-37434.patch +Patch13: zlib-1.2.11-cve-2022-37434_2.patch + +# Fix setting strm.adler on z15 +Patch14: zlib-1.2.11-IBM-Z-hw-accelrated-deflate-strm-adler-fix.patch BuildRequires: automake, autoconf, libtool @@ -82,22 +92,23 @@ developing applications which use minizip. %prep %setup -q %patch0 -p1 -b .fixuncrypt +%ifarch s390 s390x %patch1 -p1 -b .optimized-deflate +%endif %patch2 -p1 %patch3 -p1 %patch4 -p1 +%patch5 -p1 +%patch6 -p1 +%patch7 -p1 +%patch8 -p1 +%patch9 -p1 +%patch10 -p1 +%patch11 -p1 +%patch12 -p1 +%patch13 -p1 +%patch14 -p1 -%ifarch aarch64 -%patch1001 -p1 -%patch1002 -p1 -%patch1003 -p1 -%patch1004 -p1 -%patch1005 -p1 -%endif - -%ifarch x86_64 -%patch1006 -p1 -%endif iconv -f iso-8859-2 -t utf-8 < ChangeLog > ChangeLog.tmp mv ChangeLog.tmp ChangeLog @@ -105,15 +116,17 @@ mv ChangeLog.tmp ChangeLog %build export CFLAGS="$RPM_OPT_FLAGS" - -%ifarch aarch64 -CFLAGS+=" -DARM_NEON -O3" -CFLAGS+=" -march=armv8-a+crc" +%ifarch ppc64 +CFLAGS+=" -O3" %endif export LDFLAGS="$LDFLAGS -Wl,-z,relro -Wl,-z,now" # no-autotools, %%configure is not compatible +%ifarch s390 s390x +./configure --libdir=%{_libdir} --includedir=%{_includedir} --prefix=%{_prefix} --dfltcc +%else ./configure --libdir=%{_libdir} --includedir=%{_includedir} --prefix=%{_prefix} +%endif %make_build %if %{with minizip} @@ -175,14 +188,18 @@ find $RPM_BUILD_ROOT -name '*.la' -delete %changelog -* Mon Oct 31 2022 Chunmei Xu - 1.2.11-19.0.1 -- remove optimised patches for s390 and IBM Z -- add optimised patches for aarch64 and x86_64 +* Wed Oct 12 2022 Ilya Leoshkevich - 1.2.11-21 +- Fix for IBM strm.adler rhbz#2134074 -* Tue Aug 09 2022 Matej Mužila - 1.2.11.19 +* Tue Aug 09 2022 Matej Mužila - 1.2.11-20 - Fix heap-based buffer over-read or buffer overflow in inflate in inflate.c - Resolves: CVE-2022-37434 +* Mon May 16 2022 Lukas Javorsky - 1.2.11-19 +- Apply IBM patch for compressBound() function +- Source from https://github.com/madler/zlib/issues/410#issuecomment-947212824 +- Resolves: #2056900 + * Tue Mar 29 2022 Matej Mužila - 1.2.11-18 - Resolves: CVE-2018-25032