diff --git a/1007-zlib-anolis-Neon-Optimized-adler32.patch b/1007-zlib-anolis-Neon-Optimized-adler32.patch deleted file mode 100644 index c2f53076e179ad90e4627b9aefe7916d2b4e507b..0000000000000000000000000000000000000000 --- a/1007-zlib-anolis-Neon-Optimized-adler32.patch +++ /dev/null @@ -1,244 +0,0 @@ -diff -uNr zlib-1.2.11/adler32.c ../zlib-1.2.11/adler32.c ---- zlib-1.2.11/adler32.c 2017-01-01 15:37:10.000000000 +0800 -+++ ../zlib-1.2.11/adler32.c 2022-08-24 11:39:57.877129854 +0800 -@@ -59,6 +59,9 @@ - # define MOD63(a) a %= BASE - #endif - -+#if defined(__ARM_NEON__) || defined(__ARM_NEON) -+extern uLong adler32_neon(uLong adler, const Bytef *buf, z_size_t len); -+#endif - /* ========================================================================= */ - uLong ZEXPORT adler32_z(adler, buf, len) - uLong adler; -@@ -68,6 +71,9 @@ - unsigned long sum2; - unsigned n; - -+#if defined(__ARM_NEON__) || defined(__ARM_NEON) -+ return adler32_neon(adler, buf, len); -+#endif - /* split Adler-32 into component sums */ - sum2 = (adler >> 16) & 0xffff; - adler &= 0xffff; -diff -uNr zlib-1.2.11/contrib/arm/adler32_neon.S ../zlib-1.2.11/contrib/arm/adler32_neon.S ---- zlib-1.2.11/contrib/arm/adler32_neon.S 1970-01-01 08:00:00.000000000 +0800 -+++ ../zlib-1.2.11/contrib/arm/adler32_neon.S 2022-08-23 18:10:10.682185859 +0800 -@@ -0,0 +1,178 @@ -+/********************************************************************** -+ Copyright(c) 2019 Arm Corporation All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in -+ the documentation and/or other materials provided with the -+ distribution. -+ * Neither the name of Arm Corporation nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+**********************************************************************/ -+ .arch armv8-a+crypto -+ .text -+ .align 3 -+ -+/* -+Macros -+*/ -+ -+.macro declare_var_vector_reg name:req,reg:req -+ \name\()_q .req q\reg -+ \name\()_v .req v\reg -+ \name\()_s .req s\reg -+ \name\()_d .req d\reg -+.endm -+ -+.macro mod_adler dest:req,tmp:req -+ umull \tmp\()_x,\dest,const_div1 -+ lsr \tmp\()_x,\tmp\()_x,47 -+ msub \dest,\tmp,const_div2,\dest -+.endm -+ -+/* -+ uint32_t adler32_neon(uint32_t adler32, uint8_t * start, uint32_t length); -+*/ -+/* -+Arguements list -+*/ -+ adler32 .req w0 -+ start .req x1 -+ length .req x2 -+ .global adler32_neon -+ .type adler32_neon, %function -+adler32_neon: -+/* -+local variables -+*/ -+ declare_var_vector_reg factor0 , 6 -+ declare_var_vector_reg factor1 , 7 -+ declare_var_vector_reg d0 , 4 -+ declare_var_vector_reg d1 , 5 -+ declare_var_vector_reg adacc , 2 -+ declare_var_vector_reg s2acc , 3 -+ declare_var_vector_reg zero , 16 -+ declare_var_vector_reg adler , 17 -+ declare_var_vector_reg back_d0 , 18 -+ declare_var_vector_reg back_d1 , 19 -+ declare_var_vector_reg sum2 , 20 -+ declare_var_vector_reg tmp2 , 20 -+ -+ adler0 .req w4 -+ adler1 .req w5 -+ adler0_x .req x4 -+ adler1_x .req x5 -+ end .req x0 -+ tmp .req w8 -+ tmp_x .req x8 -+ tmp1_x .req x9 -+ loop_cnt .req x10 -+ loop_const .req x11 -+ const_div1 .req w6 -+ const_div2 .req w7 -+ mov const_div1, 32881 -+ movk const_div1, 0x8007, lsl 16 -+ mov const_div2, 65521 -+ and adler0, adler32, 0xffff -+ lsr adler1, adler32, 16 -+ -+ lsr loop_cnt,length,5 -+ adrp x3,factors -+ add x3,x3,:lo12:factors -+ ld1 {factor0_v.16b-factor1_v.16b},[x3] -+ -+ add end,start,length -+ cbz loop_cnt,final_accum32 -+ ld1 {back_d0_v.16b-back_d1_v.16b},[start] -+ mov loop_const,173 -+ -+ movi v16.4s,0 -+ -+ -+ -+ -+great_than_32: -+ cmp loop_cnt,173 -+ csel loop_const,loop_cnt,loop_const,le -+ mov adacc_v.16b,zero_v.16b -+ mov s2acc_v.16b,zero_v.16b -+ ins adacc_v.s[0],adler0 -+ ins s2acc_v.s[0],adler1 -+ add tmp_x,start,loop_const,lsl 5 -+ -+accum32_neon: -+ add start,start,32 -+ mov d0_v.16b,back_d0_v.16b -+ mov d1_v.16b,back_d1_v.16b -+ ld1 {back_d0_v.16b-back_d1_v.16b},[start] -+ -+ shl tmp2_v.4s,adacc_v.4s,5 -+ add s2acc_v.4s,s2acc_v.4s,tmp2_v.4s -+ -+ uaddlp adler_v.8h,d0_v.16b -+ uadalp adler_v.8h,d1_v.16b -+ uadalp adacc_v.4s,adler_v.8h -+ -+ umull sum2_v.8h,factor0_v.8b ,d0_v.8b -+ umlal2 sum2_v.8h,factor0_v.16b,d0_v.16b -+ umlal sum2_v.8h,factor1_v.8b ,d1_v.8b -+ umlal2 sum2_v.8h,factor1_v.16b,d1_v.16b -+ uadalp s2acc_v.4s,sum2_v.8h -+ -+ cmp start,tmp_x -+ bne accum32_neon -+ -+ uaddlv adacc_d,adacc_v.4s -+ uaddlv s2acc_d,s2acc_v.4s -+ fmov adler0_x,adacc_d -+ fmov adler1_x,s2acc_d -+ -+ mod_adler adler0,tmp -+ mod_adler adler1,tmp -+ sub loop_cnt,loop_cnt,loop_const -+ cbnz loop_cnt,great_than_32 -+ -+final_accum32: -+ and length,length,31 -+ cbz length,end_func -+ -+accum32_body: -+ cmp start,end -+ beq end_func -+ ldrb tmp,[start],1 -+ add adler0,adler0,tmp -+ add adler1,adler1,adler0 -+ b accum32_body -+ -+end_func: -+ mod_adler adler0,tmp -+ mod_adler adler1,tmp -+ orr w0,adler0,adler1,lsl 16 -+ ret -+ -+ .size adler32_neon, .-adler32_neon -+ .section .rodata.cst16,"aM",@progbits,16 -+ .align 4 -+factors: -+ .quad 0x191a1b1c1d1e1f20 -+ .quad 0x1112131415161718 -+ .quad 0x090a0b0c0d0e0f10 -+ .quad 0x0102030405060708 -+ -diff -uNr zlib-1.2.11/Makefile.in ../zlib-1.2.11/Makefile.in ---- zlib-1.2.11/Makefile.in 2022-08-24 11:42:41.123603868 +0800 -+++ ../zlib-1.2.11/Makefile.in 2022-08-24 11:50:51.087023722 +0800 -@@ -57,11 +57,11 @@ - ZINC= - ZINCOUT=-I. - --OBJZ = adler32.o crc32_acle.o crc32.o deflate.o infback.o inffast.o inffast.o inflate.o inftrees.o trees.o zutil.o -+OBJZ = adler32.o adler32_neon.o crc32_acle.o crc32.o deflate.o infback.o inffast.o inffast.o inflate.o inftrees.o trees.o zutil.o - OBJG = compress.o uncompr.o gzclose.o gzlib.o gzread.o gzwrite.o - OBJC = $(OBJZ) $(OBJG) - --PIC_OBJZ = adler32.lo crc32_acle.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo -+PIC_OBJZ = adler32.lo adler32_neon.lo crc32_acle.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo - PIC_OBJG = compress.lo uncompr.lo gzclose.lo gzlib.lo gzread.lo gzwrite.lo - PIC_OBJC = $(PIC_OBJZ) $(PIC_OBJG) - -@@ -158,6 +158,9 @@ - adler32.o: $(SRCDIR)adler32.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)adler32.c - -+adler32_neon.o: $(SRCDIR)contrib/arm/adler32_neon.S -+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -c -o $@ $(SRCDIR)contrib/arm/adler32_neon.S -+ - crc32_acle.o: $(SRCDIR)contrib/arm/crc32_acle.c - $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -c -o $@ $(SRCDIR)contrib/arm/crc32_acle.c - -@@ -208,6 +211,11 @@ - $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/adler32.o $(SRCDIR)adler32.c - -@mv objs/adler32.o $@ - -+adler32_neon.lo: $(SRCDIR)contrib/arm/adler32_neon.S -+ -@mkdir objs 2>/dev/null || test -d objs -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -DPIC -c -o objs/adler32_neon.o $(SRCDIR)contrib/arm/adler32_neon.S -+ -@mv objs/adler32_neon.o $@ -+ - crc32_acle.lo: $(SRCDIR)contrib/arm/crc32_acle.c - -@mkdir objs 2>/dev/null || test -d objs - $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -DPIC -c -o objs/crc32_acle.o $(SRCDIR)contrib/arm/crc32_acle.c diff --git a/1008-zlib-anolis-Optimized-crc32-pmul-mix.patch b/1008-zlib-anolis-Optimized-crc32-pmul-mix.patch deleted file mode 100644 index dbfecf54c199ce31f1e3970b853b01f9fd07a1a8..0000000000000000000000000000000000000000 --- a/1008-zlib-anolis-Optimized-crc32-pmul-mix.patch +++ /dev/null @@ -1,580 +0,0 @@ -diff -uNr zlib-1.2.11/contrib/arm/crc32_common_mix.S ../zlib-1.2.11/contrib/arm/crc32_common_mix.S ---- zlib-1.2.11/contrib/arm/crc32_common_mix.S 1970-01-01 08:00:00.000000000 +0800 -+++ ../zlib-1.2.11/contrib/arm/crc32_common_mix.S 2022-08-25 19:26:40.154179151 +0800 -@@ -0,0 +1,437 @@ -+/********************************************************************** -+ Copyright(c) 2020 Arm Corporation All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in -+ the documentation and/or other materials provided with the -+ distribution. -+ * Neither the name of Arm Corporation nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+**********************************************************************/ -+ -+ -+.macro declare_var_vector_reg name:req,reg:req -+ \name\()_q .req q\reg -+ \name\()_v .req v\reg -+ \name\()_s .req s\reg -+ \name\()_d .req d\reg -+.endm -+ declare_var_vector_reg k1k2,20 -+ declare_var_vector_reg k3k4,21 -+ declare_var_vector_reg poly,22 -+ declare_var_vector_reg k5k0,23 -+ declare_var_vector_reg mask,24 -+ declare_var_vector_reg fold_poly,25 -+ -+ declare_var_vector_reg tmp0,0 -+ declare_var_vector_reg tmp1,1 -+ declare_var_vector_reg tmp2,2 -+ declare_var_vector_reg tmp3,3 -+ declare_var_vector_reg tmp4,4 -+ declare_var_vector_reg tmp5,5 -+ declare_var_vector_reg tmp6,6 -+ declare_var_vector_reg tmp7,7 -+ declare_var_vector_reg pmull_data0,16 -+ declare_var_vector_reg pmull_data1,17 -+ declare_var_vector_reg pmull_data2,18 -+ declare_var_vector_reg pmull_data3,19 -+ -+ vzr .req v26 -+ -+ const_addr .req x3 -+ crc_blk_ptr .req x4 -+ pmull_blk_ptr .req x5 -+ crc_data0 .req x6 -+ crc_data1 .req x7 -+ crc_data2 .req x9 -+ crc_data3 .req x10 -+ wPmull .req w11 -+ xPmull .req x11 -+ -+ data0 .req x4 -+ data1 .req x5 -+ data2 .req x6 -+ data3 .req x7 -+ wdata .req w4 -+ -+.macro pmull_fold -+ -+ pmull2 tmp4_v.1q, tmp0_v.2d, k1k2_v.2d -+ pmull2 tmp5_v.1q, tmp1_v.2d, k1k2_v.2d -+ pmull2 tmp6_v.1q, tmp2_v.2d, k1k2_v.2d -+ pmull2 tmp7_v.1q, tmp3_v.2d, k1k2_v.2d -+ -+ pmull tmp0_v.1q, tmp0_v.1d, k1k2_v.1d -+ pmull tmp1_v.1q, tmp1_v.1d, k1k2_v.1d -+ pmull tmp2_v.1q, tmp2_v.1d, k1k2_v.1d -+ pmull tmp3_v.1q, tmp3_v.1d, k1k2_v.1d -+ ld1 {pmull_data0_v.16b-pmull_data3_v.16b},[pmull_blk_ptr],#64 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ -+ eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b -+ eor tmp1_v.16b, tmp1_v.16b, tmp5_v.16b -+ eor tmp2_v.16b, tmp2_v.16b, tmp6_v.16b -+ eor tmp3_v.16b, tmp3_v.16b, tmp7_v.16b -+ -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ eor tmp0_v.16b, tmp0_v.16b, v16.16b -+ eor tmp1_v.16b, tmp1_v.16b, v17.16b -+ eor tmp2_v.16b, tmp2_v.16b, v18.16b -+ eor tmp3_v.16b, tmp3_v.16b, v19.16b -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+.endm -+ -+ -+ -+.macro crc32_common_mix poly_type -+ .set MIX_BLK_SIZE,2048 -+ -+.ifc \poly_type,crc32 -+ mvn wCRC,wCRC -+.endif -+ cmp LEN,MIX_BLK_SIZE-1 -+ adr const_addr, .Lconstants -+ bls start_final -+ ld1 {k1k2_v.16b,k3k4_v.16b,poly_v.16b},[const_addr],#48 -+ movi vzr.16b, #0 -+ ld1 {k5k0_v.8b,mask_v.8b,fold_poly_v.8b},[const_addr] -+ -+loop_2048: -+ ld1 {tmp0_v.16b-tmp3_v.16b}, [BUF] -+ add pmull_blk_ptr,BUF,0x40 -+ add crc_blk_ptr, BUF,512 -+ mov tmp4_v.16b,vzr.16b -+ fmov tmp4_s, wCRC -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ eor tmp0_v.16b,tmp0_v.16b,tmp4_v.16b -+ mov wCRC, 0 -+ sub LEN,LEN,MIX_BLK_SIZE -+ cmp LEN,MIX_BLK_SIZE -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ -+ pmull_fold -+ pmull_fold -+ pmull_fold -+ pmull_fold -+ pmull_fold -+ pmull_fold -+ pmull_fold -+ -+ /* Folding cache line into 128bit */ -+ pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ eor tmp0_v.16b, tmp0_v.16b, tmp2_v.16b -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ pmull2 tmp4_v.1q, tmp0_v.2d, k3k4_v.2d -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ pmull tmp0_v.1q, tmp0_v.1d, k3k4_v.1d -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ eor tmp0_v.16b, tmp0_v.16b, tmp4_v.16b -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ eor tmp0_v.16b, tmp0_v.16b, tmp3_v.16b -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ -+ -+ /** -+ * perform the last 64 bit fold, also -+ * adds 32 zeroes to the input stream -+ */ -+ ext tmp1_v.16b, tmp0_v.16b, tmp0_v.16b, #8 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ pmull2 tmp1_v.1q, tmp1_v.2d, k3k4_v.2d -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ ext tmp0_v.16b, tmp0_v.16b, vzr.16b, #8 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ -+ /* final 32-bit fold */ -+ ext tmp1_v.16b, tmp0_v.16b, vzr.16b, #4 -+ and tmp0_v.16b, tmp0_v.16b, mask_v.16b -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ pmull tmp0_v.1q, tmp0_v.1d, k5k0_v.1d -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b -+ -+ /** -+ * Finish up with the bit-reversed barrett -+ * reduction 64 ==> 32 bits -+ */ -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ and tmp1_v.16b, tmp0_v.16b, mask_v.16b -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ ext tmp1_v.16b, vzr.16b, tmp1_v.16b, #8 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ pmull2 tmp1_v.1q, tmp1_v.2d, poly_v.2d -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ and tmp1_v.16b, tmp1_v.16b, mask_v.16b -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ pmull tmp1_v.1q, tmp1_v.1d, poly_v.1d -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ eor tmp0_v.16b, tmp0_v.16b, tmp1_v.16b -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ mov tmp4_v.16b,vzr.16b -+ mov tmp4_v.s[0], tmp0_v.s[1] -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ ldp crc_data0,crc_data1,[crc_blk_ptr],16 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ ldp crc_data2,crc_data3,[crc_blk_ptr],16 -+ -+ crc32_u64 wCRC,wCRC,crc_data0 -+ crc32_u64 wCRC,wCRC,crc_data1 -+ crc32_u64 wCRC,wCRC,crc_data2 -+ crc32_u64 wCRC,wCRC,crc_data3 -+ -+ pmull tmp4_v.1q, tmp4_v.1d, fold_poly_v.1d -+ add BUF,BUF,MIX_BLK_SIZE -+ fmov xPmull, tmp4_d -+ crc32_u64 wPmull, wzr, xPmull -+ eor wCRC, wPmull, wCRC -+ bge loop_2048 -+start_final: -+ cmp LEN, 63 -+ bls .loop_16B -+.p2align 6 -+.loop_64B: -+ ldp data0, data1, [BUF],#16 -+ sub LEN,LEN,#64 -+ ldp data2, data3, [BUF],#16 -+ crc32_u64 wCRC, wCRC, data0 -+ crc32_u64 wCRC, wCRC, data1 -+ ldp data0, data1, [BUF],#16 -+ crc32_u64 wCRC, wCRC, data2 -+ crc32_u64 wCRC, wCRC, data3 -+ ldp data2, data3, [BUF],#16 -+ crc32_u64 wCRC, wCRC, data0 -+ crc32_u64 wCRC, wCRC, data1 -+ crc32_u64 wCRC, wCRC, data2 -+ crc32_u64 wCRC, wCRC, data3 -+ cmp LEN,#64 -+ bge .loop_64B -+ cbz LEN, .finish_exit -+ -+.p2align 6 -+.loop_16B: -+ cmp LEN, 15 -+ bls .less_16B -+ ldp data0, data1, [BUF],#16 -+ sub LEN,LEN,#16 -+ crc32_u64 wCRC, wCRC, data0 -+ crc32_u64 wCRC, wCRC, data1 -+ cmp LEN,15 -+ bls .less_16B -+ ldp data0, data1, [BUF],#16 -+ sub LEN,LEN,#16 -+ crc32_u64 wCRC, wCRC, data0 -+ crc32_u64 wCRC, wCRC, data1 -+ cmp LEN,15 -+ bls .less_16B -+ ldp data0, data1, [BUF],#16 -+ sub LEN,LEN,#16 //MUST less than 16B -+ crc32_u64 wCRC, wCRC, data0 -+ crc32_u64 wCRC, wCRC, data1 -+.less_16B: -+ cbz LEN, .finish_exit -+ cmp LEN, 7 -+ bls .less_8B -+ ldr data0, [BUF], 8 -+ sub LEN, LEN, #8 -+ crc32_u64 wCRC, wCRC, data0 -+.less_8B: -+ cbz LEN, .finish_exit -+ cmp LEN, 3 -+ bls .less_4B -+ ldr wdata, [BUF], 4 -+ sub LEN, LEN, #4 -+ crc32_u32 wCRC, wCRC, wdata -+.less_4B: -+ cbz LEN, .finish_exit -+ cmp LEN, 1 -+ bls .less_2B -+ ldrh wdata, [BUF], 2 -+ sub LEN, LEN, #2 -+ crc32_u16 wCRC, wCRC, wdata -+.less_2B: -+ cbz LEN, .finish_exit -+ ldrb wdata, [BUF] -+ crc32_u8 wCRC, wCRC, wdata -+.finish_exit: -+.ifc \poly_type,crc32 -+ mvn w0, wCRC -+.else -+ mov w0, wCRC -+.endif -+ ret -+.endm -diff -uNr zlib-1.2.11/contrib/arm/crc32_mix.S ../zlib-1.2.11/contrib/arm/crc32_mix.S ---- zlib-1.2.11/contrib/arm/crc32_mix.S 1970-01-01 08:00:00.000000000 +0800 -+++ ../zlib-1.2.11/contrib/arm/crc32_mix.S 2022-08-25 19:44:42.946181615 +0800 -@@ -0,0 +1,71 @@ -+/********************************************************************** -+ Copyright(c) 2020 Arm Corporation All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in -+ the documentation and/or other materials provided with the -+ distribution. -+ * Neither the name of Arm Corporation nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+**********************************************************************/ -+#if defined(__ARM_FEATURE_CRC32) && defined(ARM_NEON) -+ -+ .text -+ .align 6 -+ .arch armv8-a+crypto+crc -+ -+#include "crc32_common_mix.S" -+.Lconstants: -+ .octa 0x00000001c6e415960000000154442bd4 -+ .octa 0x00000000ccaa009e00000001751997d0 -+ .octa 0x00000001F701164100000001DB710641 -+ .quad 0x0000000163cd6124 -+ .quad 0x00000000FFFFFFFF -+ .quad 0x000000001753ab84 -+.macro crc32_u64 dst,src,data -+ crc32x \dst,\src,\data -+.endm -+.macro crc32_u32 dst,src,data -+ crc32w \dst,\src,\data -+.endm -+.macro crc32_u16 dst,src,data -+ crc32h \dst,\src,\data -+.endm -+.macro crc32_u8 dst,src,data -+ crc32b \dst,\src,\data -+.endm -+ -+ -+/** -+ * uint32_t ptg_crc32(uint32_t crc, const unsigned char *data, size_t len); -+ * -+ */ -+ BUF .req x1 -+ LEN .req x2 -+ CRC .req x0 -+ wCRC .req w0 -+ .align 6 -+ .global ptg_crc32 -+ .type ptg_crc32, %function -+ptg_crc32: -+ crc32_common_mix crc32 -+ .size ptg_crc32, .-ptg_crc32 -+#endif -diff -uNr zlib-1.2.11/crc32.c ../zlib-1.2.11/crc32.c ---- zlib-1.2.11/crc32.c 2022-08-25 19:18:06.943264265 +0800 -+++ ../zlib-1.2.11/crc32.c 2022-08-25 19:30:51.765812383 +0800 -@@ -206,6 +206,10 @@ - extern uint32_t crc32_acle(uint32_t, const unsigned char *, uint64_t); - #endif - -+#if defined(__ARM_FEATURE_CRC32) && defined(ARM_NEON) -+extern uint32_t ptg_crc32(uint32_t, const unsigned char *, uint64_t); -+#endif -+ - /* ========================================================================= */ - unsigned long ZEXPORT crc32_z(crc, buf, len) - unsigned long crc; -@@ -225,7 +229,9 @@ - - endian = 1; - if (*((unsigned char *)(&endian))) --#if defined(__ARM_FEATURE_CRC32) -+#if defined(__ARM_FEATURE_CRC32) && defined(ARM_NEON) -+ return ptg_crc32(crc, buf, len); -+#elif defined(__ARM_FEATURE_CRC32) - return crc32_acle(crc, buf, len); - #else - return crc32_little(crc, buf, len); -diff -uNr zlib-1.2.11/Makefile.in ../zlib-1.2.11/Makefile.in ---- zlib-1.2.11/Makefile.in 2022-08-25 19:18:23.206722650 +0800 -+++ ../zlib-1.2.11/Makefile.in 2022-08-25 19:35:00.657538475 +0800 -@@ -57,11 +57,11 @@ - ZINC= - ZINCOUT=-I. - --OBJZ = adler32.o adler32_neon.o crc32_acle.o crc32.o deflate.o infback.o inffast.o inffast.o inflate.o inftrees.o trees.o zutil.o -+OBJZ = adler32.o adler32_neon.o crc32_acle.o crc32_mix.o crc32.o deflate.o infback.o inffast.o inffast.o inflate.o inftrees.o trees.o zutil.o - OBJG = compress.o uncompr.o gzclose.o gzlib.o gzread.o gzwrite.o - OBJC = $(OBJZ) $(OBJG) - --PIC_OBJZ = adler32.lo adler32_neon.lo crc32_acle.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo -+PIC_OBJZ = adler32.lo adler32_neon.lo crc32_acle.lo crc32_mix.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo - PIC_OBJG = compress.lo uncompr.lo gzclose.lo gzlib.lo gzread.lo gzwrite.lo - PIC_OBJC = $(PIC_OBJZ) $(PIC_OBJG) - -@@ -164,6 +164,9 @@ - crc32_acle.o: $(SRCDIR)contrib/arm/crc32_acle.c - $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -c -o $@ $(SRCDIR)contrib/arm/crc32_acle.c - -+crc32_mix.o: $(SRCDIR)contrib/arm/crc32_mix.S -+ $(CC) $(CFLAGS) $(ZINC) -I$(SRCDIR) -c -o $@ $(SRCDIR)contrib/arm/crc32_mix.S -+ - crc32.o: $(SRCDIR)crc32.c - $(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c - -@@ -221,6 +224,11 @@ - $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -DPIC -c -o objs/crc32_acle.o $(SRCDIR)contrib/arm/crc32_acle.c - -@mv objs/crc32_acle.o $@ - -+crc32_mix.lo: $(SRCDIR)contrib/arm/crc32_mix.S -+ -@mkdir objs 2>/dev/null || test -d objs -+ $(CC) $(SFLAGS) $(ZINC) -I$(SRCDIR) -DPIC -c -o objs/crc32_mix.o $(SRCDIR)contrib/arm/crc32_mix.S -+ -@mv objs/crc32_mix.o $@ -+ - crc32.lo: $(SRCDIR)crc32.c - -@mkdir objs 2>/dev/null || test -d objs - $(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c diff --git a/1009-zlib-anolis-Neon-Optimized-chunkcopy_neon.patch b/1009-zlib-anolis-Neon-Optimized-chunkcopy_neon.patch deleted file mode 100644 index fd4051e8b051c5dac9ae9d9805faaad5f6b29456..0000000000000000000000000000000000000000 --- a/1009-zlib-anolis-Neon-Optimized-chunkcopy_neon.patch +++ /dev/null @@ -1,615 +0,0 @@ -diff -Nru zlib-1.2.11/contrib/arm/arm_chunk_copy_neon.h ../zlib-1.2.11/contrib/arm/arm_chunk_copy_neon.h ---- zlib-1.2.11/contrib/arm/arm_chunk_copy_neon.h 1970-01-01 08:00:00.000000000 +0800 -+++ ../zlib-1.2.11/contrib/arm/arm_chunk_copy_neon.h 2022-08-30 17:29:33.032693593 +0800 -@@ -0,0 +1,311 @@ -+#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) -+ -+#define ENABLE_ARM_CHUNK_NEON -+#define INFLATE_FAST_MIN_HAVE 6 -+#define INFLATE_FAST_MIN_LEFT 258 -+ -+#include -+#include -+ -+typedef uint8x16_t chunk_t; -+ -+#define CHUNK_SIZE 16 -+ -+#define HAVE_CHUNKMEMSET_1 -+#define HAVE_CHUNKMEMSET_2 -+#define HAVE_CHUNKMEMSET_4 -+#define HAVE_CHUNKMEMSET_8 -+ -+#define zmemcpy_2(dest, src) memcpy(dest, src, 2) -+#define zmemcmp_2(str1, str2) memcmp(str1, str2, 2) -+#define zmemcpy_4(dest, src) memcpy(dest, src, 4) -+#define zmemcmp_4(str1, str2) memcmp(str1, str2, 4) -+#define zmemcpy_8(dest, src) memcpy(dest, src, 8) -+#define zmemcmp_8(str1, str2) memcmp(str1, str2, 8) -+#define MIN(a, b) ((a) > (b) ? (b) : (a)) -+#define Z_INTERNAL -+ -+static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { -+ *chunk = vld1q_dup_u8(from); -+} -+ -+static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { -+ uint16_t tmp; -+ zmemcpy_2(&tmp, from); -+ *chunk = vreinterpretq_u8_u16(vdupq_n_u16(tmp)); -+} -+ -+static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { -+ uint32_t tmp; -+ zmemcpy_4(&tmp, from); -+ *chunk = vreinterpretq_u8_u32(vdupq_n_u32(tmp)); -+} -+ -+static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { -+ uint64_t tmp; -+ zmemcpy_8(&tmp, from); -+ *chunk = vreinterpretq_u8_u64(vdupq_n_u64(tmp)); -+} -+ -+#define CHUNKSIZE chunksize_neon -+#define CHUNKCOPY chunkcopy_neon -+#define CHUNKCOPY_SAFE chunkcopy_safe_neon -+#define CHUNKUNROLL chunkunroll_neon -+#define CHUNKMEMSET chunkmemset_neon -+#define CHUNKMEMSET_SAFE chunkmemset_safe_neon -+ -+static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { -+ *chunk = vld1q_u8(s); -+} -+ -+static inline void storechunk(uint8_t *out, chunk_t *chunk) { -+ vst1q_u8(out, *chunk); -+} -+ -+/* Behave like chunkcopy, but avoid writing beyond of legal output. */ -+static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, size_t len, uint8_t *safe) { -+ uint32_t safelen = (uint32_t)((safe - out) + 1); -+ len = MIN(len, safelen); -+ int32_t olap_src = from >= out && from < out + len; -+ int32_t olap_dst = out >= from && out < from + len; -+ size_t tocopy; -+ -+ /* For all cases without overlap, memcpy is ideal */ -+ if (!(olap_src || olap_dst)) { -+ memcpy(out, from, len); -+ return out + len; -+ } -+ -+ /* We are emulating a self-modifying copy loop here. To do this in a way that doesn't produce undefined behavior, -+ * we have to get a bit clever. First if the overlap is such that src falls between dst and dst+len, we can do the -+ * initial bulk memcpy of the nonoverlapping region. Then, we can leverage the size of this to determine the safest -+ * atomic memcpy size we can pick such that we have non-overlapping regions. This effectively becomes a safe look -+ * behind or lookahead distance */ -+ size_t non_olap_size = ((from > out) ? from - out : out - from); -+ -+ memcpy(out, from, non_olap_size); -+ out += non_olap_size; -+ from += non_olap_size; -+ len -= non_olap_size; -+ -+ /* So this doesn't give use a worst case scenario of function calls in a loop, -+ * we want to instead break this down into copy blocks of fixed lengths */ -+ while (len) { -+ tocopy = MIN(non_olap_size, len); -+ len -= tocopy; -+ -+ while (tocopy >= 32) { -+ memcpy(out, from, 32); -+ out += 32; -+ from += 32; -+ tocopy -= 32; -+ } -+ -+ if (tocopy >= 16) { -+ memcpy(out, from, 16); -+ out += 16; -+ from += 16; -+ tocopy -= 16; -+ } -+ -+ if (tocopy >= 8) { -+ zmemcpy_8(out, from); -+ out += 8; -+ from += 8; -+ tocopy -= 8; -+ } -+ -+ if (tocopy >= 4) { -+ zmemcpy_4(out, from); -+ out += 4; -+ from += 4; -+ tocopy -= 4; -+ } -+ -+ if (tocopy >= 2) { -+ zmemcpy_2(out, from); -+ out += 2; -+ from += 2; -+ tocopy -= 2; -+ } -+ -+ if (tocopy) { -+ *out++ = *from++; -+ } -+ } -+ -+ return out; -+} -+ -+/* Returns the chunk size */ -+ZLIB_INTERNAL uint32_t CHUNKSIZE(void) { -+ return sizeof(chunk_t); -+} -+ -+/* Behave like memcpy, but assume that it's OK to overwrite at least -+ chunk_t bytes of output even if the length is shorter than this, -+ that the length is non-zero, and that `from` lags `out` by at least -+ sizeof chunk_t bytes (or that they don't overlap at all or simply that -+ the distance is less than the length of the copy). -+ -+ Aside from better memory bus utilisation, this means that short copies -+ (chunk_t bytes or fewer) will fall straight through the loop -+ without iteration, which will hopefully make the branch prediction more -+ reliable. */ -+ZLIB_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) { -+ Assert(len > 0, "chunkcopy should never have a length 0"); -+ chunk_t chunk; -+ int32_t align = ((len - 1) % sizeof(chunk_t)) + 1; -+ loadchunk(from, &chunk); -+ storechunk(out, &chunk); -+ out += align; -+ from += align; -+ len -= align; -+ while (len > 0) { -+ loadchunk(from, &chunk); -+ storechunk(out, &chunk); -+ out += sizeof(chunk_t); -+ from += sizeof(chunk_t); -+ len -= sizeof(chunk_t); -+ } -+ return out; -+} -+ -+/* Perform short copies until distance can be rewritten as being at least -+ sizeof chunk_t. -+ -+ This assumes that it's OK to overwrite at least the first -+ 2*sizeof(chunk_t) bytes of output even if the copy is shorter than this. -+ This assumption holds because inflate_fast() starts every iteration with at -+ least 258 bytes of output space available (258 being the maximum length -+ output from a single token; see inflate_fast()'s assumptions below). */ -+ZLIB_INTERNAL uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) { -+ unsigned char const *from = out - *dist; -+ chunk_t chunk; -+ while (*dist < *len && *dist < sizeof(chunk_t)) { -+ loadchunk(from, &chunk); -+ storechunk(out, &chunk); -+ out += *dist; -+ *len -= *dist; -+ *dist += *dist; -+ } -+ return out; -+} -+ -+/* Copy DIST bytes from OUT - DIST into OUT + DIST * k, for 0 <= k < LEN/DIST. -+ Return OUT + LEN. */ -+ZLIB_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) { -+ /* Debug performance related issues when len < sizeof(uint64_t): -+ Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */ -+ Assert(dist > 0, "chunkmemset cannot have a distance 0"); -+ -+ uint8_t *from = out - dist; -+ -+ if (dist == 1) { -+ memset(out, *from, len); -+ return out + len; -+ } else if (dist > sizeof(chunk_t)) { -+ return CHUNKCOPY(out, out - dist, len); -+ } -+ -+ chunk_t chunk_load; -+ uint32_t chunk_mod = 0; -+ /* TODO: possibly build up a permutation table for this if not an even modulus */ -+#ifdef HAVE_CHUNKMEMSET_2 -+ if (dist == 2) { -+ chunkmemset_2(from, &chunk_load); -+ } else -+#endif -+#ifdef HAVE_CHUNKMEMSET_4 -+ if (dist == 4) { -+ chunkmemset_4(from, &chunk_load); -+ } else -+#endif -+#ifdef HAVE_CHUNKMEMSET_8 -+ if (dist == 8) { -+ chunkmemset_8(from, &chunk_load); -+ } else if (dist == sizeof(chunk_t)) { -+ loadchunk(from, &chunk_load); -+ } else -+#endif -+ { -+ /* This code takes string of length dist from "from" and repeats -+ * it for as many times as can fit in a chunk_t (vector register) */ -+ uint32_t cpy_dist; -+ uint32_t bytes_remaining = sizeof(chunk_t); -+ uint8_t *cur_chunk = (uint8_t *)&chunk_load; -+ while (bytes_remaining) { -+ cpy_dist = MIN(dist, bytes_remaining); -+ memcpy(cur_chunk, from, cpy_dist); -+ bytes_remaining -= cpy_dist; -+ cur_chunk += cpy_dist; -+ /* This allows us to bypass an expensive integer division since we're effectively -+ * counting in this loop, anyway. However, we may have to derive a similarly -+ * sensible solution for if we use a permutation table that allows us to construct -+ * this vector in one load and one permute instruction */ -+ chunk_mod = cpy_dist; -+ } -+ } -+ -+ /* If we're lucky enough and dist happens to be an even modulus of our vector length, -+ * we can do two stores per loop iteration, which for most ISAs, especially x86, is beneficial */ -+ if (chunk_mod == 0) { -+ while (len >= (2 * sizeof(chunk_t))) { -+ storechunk(out, &chunk_load); -+ storechunk(out + sizeof(chunk_t), &chunk_load); -+ out += 2 * sizeof(chunk_t); -+ len -= 2 * sizeof(chunk_t); -+ } -+ } -+ -+ /* If we don't have a "dist" length that divides evenly into a vector -+ * register, we can write the whole vector register but we need only -+ * advance by the amount of the whole string that fits in our chunk_t. -+ * If we do divide evenly into the vector length, adv_amount = chunk_t size*/ -+ uint32_t adv_amount = sizeof(chunk_t) - chunk_mod; -+ while (len >= sizeof(chunk_t)) { -+ storechunk(out, &chunk_load); -+ len -= adv_amount; -+ out += adv_amount; -+ } -+ -+ if (len) { -+ memcpy(out, &chunk_load, len); -+ out += len; -+ } -+ -+ return out; -+} -+ -+ZLIB_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, unsigned dist, unsigned len, unsigned left) { -+#if !defined(UNALIGNED64_OK) -+#if !defined(UNALIGNED_OK) -+ static const uint32_t align_mask = 7; -+#else -+ static const uint32_t align_mask = 3; -+#endif -+#endif -+ -+ len = MIN(len, left); -+ uint8_t *from = out - dist; -+#if !defined(UNALIGNED64_OK) -+ while (((uintptr_t)out & align_mask) && (len > 0)) { -+ *out++ = *from++; -+ --len; -+ --left; -+ } -+#endif -+ if (left < (unsigned)(3 * sizeof(chunk_t))) { -+ while (len > 0) { -+ *out++ = *from++; -+ --len; -+ } -+ return out; -+ } -+ if (len) -+ return CHUNKMEMSET(out, dist, len); -+ -+ return out; -+} -+ -+#endif -diff -Nru zlib-1.2.11/inffast.c ../zlib-1.2.11/inffast.c ---- zlib-1.2.11/inffast.c 2017-01-16 01:29:40.000000000 +0800 -+++ ../zlib-1.2.11/inffast.c 2022-08-30 17:46:16.887320481 +0800 -@@ -7,7 +7,7 @@ - #include "inftrees.h" - #include "inflate.h" - #include "inffast.h" -- -+#include "contrib/arm/arm_chunk_copy_neon.h" - #ifdef ASMINF - # pragma message("Assembler code may have bugs -- use at your own risk") - #else -@@ -47,10 +47,268 @@ - requires strm->avail_out >= 258 for each loop to avoid checking for - output space. - */ -+#ifdef ENABLE_ARM_CHUNK_NEON -+void ZLIB_INTERNAL arm_inflate_fast(strm, start) -+z_streamp strm; -+unsigned start; /* inflate()'s starting value for strm->avail_out */ -+{ -+ struct inflate_state FAR *state; -+ z_const unsigned char FAR *in; /* local strm->next_in */ -+ z_const unsigned char FAR *last; /* have enough input while in < last */ -+ unsigned char FAR *out; /* local strm->next_out */ -+ unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ -+ unsigned char FAR *end; /* while out < end, enough space available */ -+ unsigned char *safe; /* can use chunkcopy provided out < safe */ -+#ifdef INFLATE_STRICT -+ unsigned dmax; /* maximum distance from zlib header */ -+#endif -+ unsigned wsize; /* window size or zero if not using window */ -+ unsigned whave; /* valid bytes in the window */ -+ unsigned wnext; /* window write index */ -+ unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ -+ unsigned long hold; /* local strm->hold */ -+ unsigned bits; /* local strm->bits */ -+ code const FAR *lcode; /* local strm->lencode */ -+ code const FAR *dcode; /* local strm->distcode */ -+ unsigned lmask; /* mask for first level of length codes */ -+ unsigned dmask; /* mask for first level of distance codes */ -+ code here; /* retrieved table entry */ -+ unsigned op; /* code bits, operation, extra bits, or */ -+ /* window position, window bytes to copy */ -+ unsigned len; /* match length, unused bytes */ -+ unsigned dist; /* match distance */ -+ unsigned char FAR *from; /* where to copy match from */ -+ unsigned extra_safe; /* copy chunks safely in all cases */ -+ uint32_t chunksize = chunksize_neon(); -+ /* copy state to local variables */ -+ state = (struct inflate_state FAR *)strm->state; -+ in = strm->next_in; -+ last = in + (strm->avail_in - 5); -+ out = strm->next_out; -+ beg = out - (start - strm->avail_out); -+ end = out + (strm->avail_out - 257); -+ safe = out + strm->avail_out; -+#ifdef INFLATE_STRICT -+ dmax = state->dmax; -+#endif -+ wsize = state->wsize; -+ whave = state->whave; -+ wnext = state->wnext; -+ window = state->window; -+ hold = state->hold; -+ bits = state->bits; -+ lcode = state->lencode; -+ dcode = state->distcode; -+ lmask = (1U << state->lenbits) - 1; -+ dmask = (1U << state->distbits) - 1; -+ extra_safe = (wsize != 0 && out >= window && out + INFLATE_FAST_MIN_LEFT <= window + wsize); -+ /* decode literals and length/distances until end-of-block or not enough -+ input data or output space */ -+ do { -+ if (bits < 15) { -+ hold += (unsigned long)(*in++) << bits; -+ bits += 8; -+ hold += (unsigned long)(*in++) << bits; -+ bits += 8; -+ } -+ here = lcode[hold & lmask]; -+ dolen: -+ op = (unsigned)(here.bits); -+ hold >>= op; -+ bits -= op; -+ op = (unsigned)(here.op); -+ if (op == 0) { /* literal */ -+ Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? -+ "inflate: literal '%c'\n" : -+ "inflate: literal 0x%02x\n", here.val)); -+ *out++ = (unsigned char)(here.val); -+ } -+ else if (op & 16) { /* length base */ -+ len = (unsigned)(here.val); -+ op &= 15; /* number of extra bits */ -+ if (op) { -+ if (bits < op) { -+ hold += (unsigned long)(*in++) << bits; -+ bits += 8; -+ } -+ len += (unsigned)hold & ((1U << op) - 1); -+ hold >>= op; -+ bits -= op; -+ } -+ Tracevv((stderr, "inflate: length %u\n", len)); -+ if (bits < 15) { -+ hold += (unsigned long)(*in++) << bits; -+ bits += 8; -+ hold += (unsigned long)(*in++) << bits; -+ bits += 8; -+ } -+ here = dcode[hold & dmask]; -+ dodist: -+ op = (unsigned)(here.bits); -+ hold >>= op; -+ bits -= op; -+ op = (unsigned)(here.op); -+ if (op & 16) { /* distance base */ -+ dist = (unsigned)(here.val); -+ op &= 15; /* number of extra bits */ -+ if (bits < op) { -+ hold += (unsigned long)(*in++) << bits; -+ bits += 8; -+ if (bits < op) { -+ hold += (unsigned long)(*in++) << bits; -+ bits += 8; -+ } -+ } -+ dist += (unsigned)hold & ((1U << op) - 1); -+#ifdef INFLATE_STRICT -+ if (dist > dmax) { -+ strm->msg = (char *)"invalid distance too far back"; -+ state->mode = BAD; -+ break; -+ } -+#endif -+ hold >>= op; -+ bits -= op; -+ Tracevv((stderr, "inflate: distance %u\n", dist)); -+ op = (unsigned)(out - beg); /* max distance in output */ -+ if (dist > op) { /* see if copy from window */ -+ op = dist - op; /* distance back in window */ -+ if (op > whave) { -+ if (state->sane) { -+ strm->msg = -+ (char *)"invalid distance too far back"; -+ state->mode = BAD; -+ break; -+ } -+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR -+ if (len <= op - whave) { -+ do { -+ *out++ = 0; -+ } while (--len); -+ continue; -+ } -+ len -= op - whave; -+ do { -+ *out++ = 0; -+ } while (--op > whave); -+ if (op == 0) { -+ from = out - dist; -+ do { -+ *out++ = *from++; -+ } while (--len); -+ continue; -+ } -+#endif -+ } -+ from = window; -+ if (wnext == 0) { /* very common case */ -+ from += wsize - op; -+ } else if (wnext >= op) { /* contiguous in window */ -+ from += wnext - op; -+ } else { /* wrap around window */ -+ op -= wnext; -+ from += wsize - op; -+ if (op < len) { /* some from end of window */ -+ len -= op; -+ out = chunkcopy_safe(out, from, op, safe); -+ from = window; /* more from start of window */ -+ op = wnext; -+ /* This (rare) case can create a situation where -+ the first chunkcopy below must be checked. -+ */ -+ } -+ } -+ if (op < len) { /* still need some from output */ -+ len -= op; -+ out = chunkcopy_safe(out, from, op, safe); -+ out = chunkunroll_neon(out, &dist, &len); -+ out = chunkcopy_safe(out, out - dist, len, safe); -+ } else { -+ out = chunkcopy_safe(out, from, len, safe); -+ } -+ } else if (extra_safe) { -+ /* Whole reference is in range of current output. */ -+ if (dist >= len || dist >= chunksize) -+ out = chunkcopy_safe(out, out - dist, len, safe); -+ else -+ out = chunkmemset_safe_neon(out, dist, len, (unsigned)((safe - out) + 1)); -+ } else { -+ /* Whole reference is in range of current output. No range checks are -+ necessary because we start with room for at least 258 bytes of output, -+ so unroll and roundoff operations can write beyond `out+len` so long -+ as they stay within 258 bytes of `out`. -+ */ -+ if (dist >= len || dist >= chunksize) -+ out = chunkcopy_neon(out, out - dist, len); -+ else -+ out = chunkmemset_neon(out, dist, len); -+ } -+ } else if ((op & 64) == 0) { /* 2nd level distance code */ -+ here = dcode[here.val + (hold & ((1U << op) - 1))]; -+ goto dodist; -+ } -+ else { -+ strm->msg = (char *)"invalid distance code"; -+ state->mode = BAD; -+ break; -+ } -+ } -+ else if ((op & 64) == 0) { /* 2nd level length code */ -+ here = lcode[here.val + (hold & ((1U << op) - 1))]; -+ goto dolen; -+ } -+ else if (op & 32) { /* end-of-block */ -+ Tracevv((stderr, "inflate: end of block\n")); -+ state->mode = TYPE; -+ break; -+ } -+ else { -+ strm->msg = (char *)"invalid literal/length code"; -+ state->mode = BAD; -+ break; -+ } -+ } while (in < last && out < end); -+ -+ /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ -+ len = bits >> 3; -+ in -= len; -+ bits -= len << 3; -+ hold &= (1U << bits) - 1; -+ -+ /* update state and return */ -+ strm->next_in = in; -+ strm->next_out = out; -+ strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); -+ strm->avail_out = (unsigned)(out < end ? -+ 257 + (end - out) : 257 - (out - end)); -+ state->hold = hold; -+ state->bits = bits; -+ return; -+} -+ -+/* -+ inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): -+ - Using bit fields for code structure -+ - Different op definition to avoid & for extra bits (do & for table bits) -+ - Three separate decoding do-loops for direct, window, and wnext == 0 -+ - Special case for distance > 1 copies to do overlapped load and store copy -+ - Explicit branch predictions (based on measured branch probabilities) -+ - Deferring match copy and interspersed it with decoding subsequent codes -+ - Swapping literal/length else -+ - Swapping window/direct else -+ - Larger unrolled copy loops (three is about right) -+ - Moving len -= 3 statement into middle of loop -+ */ -+#endif -+ - void ZLIB_INTERNAL inflate_fast(strm, start) - z_streamp strm; - unsigned start; /* inflate()'s starting value for strm->avail_out */ - { -+#ifdef ENABLE_ARM_CHUNK_NEON -+ return arm_inflate_fast(strm, start); -+#endif -+ - struct inflate_state FAR *state; - z_const unsigned char FAR *in; /* local strm->next_in */ - z_const unsigned char FAR *last; /* have enough input while in < last */ -@@ -306,18 +564,4 @@ - return; - } - --/* -- inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): -- - Using bit fields for code structure -- - Different op definition to avoid & for extra bits (do & for table bits) -- - Three separate decoding do-loops for direct, window, and wnext == 0 -- - Special case for distance > 1 copies to do overlapped load and store copy -- - Explicit branch predictions (based on measured branch probabilities) -- - Deferring match copy and interspersed it with decoding subsequent codes -- - Swapping literal/length else -- - Swapping window/direct else -- - Larger unrolled copy loops (three is about right) -- - Moving len -= 3 statement into middle of loop -- */ -- - #endif /* !ASMINF */ diff --git a/dist b/dist new file mode 100644 index 0000000000000000000000000000000000000000..0ee7539a2292be885eb3a6caf1a4ee4666e5e99f --- /dev/null +++ b/dist @@ -0,0 +1 @@ +an8_6 diff --git a/download b/download new file mode 100644 index 0000000000000000000000000000000000000000..3215315d3d8330850574c0aabee2c712dfdd6707 --- /dev/null +++ b/download @@ -0,0 +1 @@ +85adef240c5f370b308da8c938951a68 zlib-1.2.11.tar.xz diff --git a/zlib-1.2.11-CVE-2018-25032.patch b/zlib-1.2.11-CVE-2018-25032.patch new file mode 100644 index 0000000000000000000000000000000000000000..9b4debdf2cc356b4d2791cd868f107bf0866e25b --- /dev/null +++ b/zlib-1.2.11-CVE-2018-25032.patch @@ -0,0 +1,346 @@ +From 5c44459c3b28a9bd3283aaceab7c615f8020c531 Mon Sep 17 00:00:00 2001 +From: Mark Adler +Date: Tue, 17 Apr 2018 22:09:22 -0700 +Subject: [PATCH] Fix a bug that can crash deflate on some input when using + Z_FIXED. + +This bug was reported by Danilo Ramos of Eideticom, Inc. It has +lain in wait 13 years before being found! The bug was introduced +in zlib 1.2.2.2, with the addition of the Z_FIXED option. That +option forces the use of fixed Huffman codes. For rare inputs with +a large number of distant matches, the pending buffer into which +the compressed data is written can overwrite the distance symbol +table which it overlays. That results in corrupted output due to +invalid distances, and can result in out-of-bound accesses, +crashing the application. + +The fix here combines the distance buffer and literal/length +buffers into a single symbol buffer. Now three bytes of pending +buffer space are opened up for each literal or length/distance +pair consumed, instead of the previous two bytes. This assures +that the pending buffer cannot overwrite the symbol table, since +the maximum fixed code compressed length/distance is 31 bits, and +since there are four bytes of pending space for every three bytes +of symbol space. +--- + deflate.c | 74 ++++++++++++++++++++++++++++++++++++++++--------------- + deflate.h | 25 +++++++++---------- + trees.c | 50 +++++++++++-------------------------- + 3 files changed, 79 insertions(+), 70 deletions(-) + +diff --git a/deflate.c b/deflate.c +index 425babc..19cba87 100644 +--- a/deflate.c ++++ b/deflate.c +@@ -255,11 +255,6 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + int wrap = 1; + static const char my_version[] = ZLIB_VERSION; + +- ushf *overlay; +- /* We overlay pending_buf and d_buf+l_buf. This works since the average +- * output size for (length,distance) codes is <= 24 bits. +- */ +- + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; +@@ -329,9 +324,47 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + +- overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); +- s->pending_buf = (uchf *) overlay; +- s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); ++ /* We overlay pending_buf and sym_buf. This works since the average size ++ * for length/distance pairs over any compressed block is assured to be 31 ++ * bits or less. ++ * ++ * Analysis: The longest fixed codes are a length code of 8 bits plus 5 ++ * extra bits, for lengths 131 to 257. The longest fixed distance codes are ++ * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest ++ * possible fixed-codes length/distance pair is then 31 bits total. ++ * ++ * sym_buf starts one-fourth of the way into pending_buf. So there are ++ * three bytes in sym_buf for every four bytes in pending_buf. Each symbol ++ * in sym_buf is three bytes -- two for the distance and one for the ++ * literal/length. As each symbol is consumed, the pointer to the next ++ * sym_buf value to read moves forward three bytes. From that symbol, up to ++ * 31 bits are written to pending_buf. The closest the written pending_buf ++ * bits gets to the next sym_buf symbol to read is just before the last ++ * code is written. At that time, 31*(n-2) bits have been written, just ++ * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at ++ * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1 ++ * symbols are written.) The closest the writing gets to what is unread is ++ * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and ++ * can range from 128 to 32768. ++ * ++ * Therefore, at a minimum, there are 142 bits of space between what is ++ * written and what is read in the overlain buffers, so the symbols cannot ++ * be overwritten by the compressed data. That space is actually 139 bits, ++ * due to the three-bit fixed-code block header. ++ * ++ * That covers the case where either Z_FIXED is specified, forcing fixed ++ * codes, or when the use of fixed codes is chosen, because that choice ++ * results in a smaller compressed block than dynamic codes. That latter ++ * condition then assures that the above analysis also covers all dynamic ++ * blocks. A dynamic-code block will only be chosen to be emitted if it has ++ * fewer bits than a fixed-code block would for the same set of symbols. ++ * Therefore its average symbol length is assured to be less than 31. So ++ * the compressed data for a dynamic block also cannot overwrite the ++ * symbols from which it is being constructed. ++ */ ++ ++ s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4); ++ s->pending_buf_size = (ulg)s->lit_bufsize * 4; + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { +@@ -340,8 +373,12 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, + deflateEnd (strm); + return Z_MEM_ERROR; + } +- s->d_buf = overlay + s->lit_bufsize/sizeof(ush); +- s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; ++ s->sym_buf = s->pending_buf + s->lit_bufsize; ++ s->sym_end = (s->lit_bufsize - 1) * 3; ++ /* We avoid equality with lit_bufsize*3 because of wraparound at 64K ++ * on 16 bit machines and because stored blocks are restricted to ++ * 64K-1 bytes. ++ */ + + s->level = level; + s->strategy = strategy; +@@ -552,7 +589,7 @@ int ZEXPORT deflatePrime (strm, bits, value) + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; +- if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) ++ if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; + do { + put = Buf_size - s->bi_valid; +@@ -1113,7 +1150,6 @@ int ZEXPORT deflateCopy (dest, source) + #else + deflate_state *ds; + deflate_state *ss; +- ushf *overlay; + + + if (deflateStateCheck(source) || dest == Z_NULL) { +@@ -1133,8 +1169,7 @@ int ZEXPORT deflateCopy (dest, source) + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); +- overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); +- ds->pending_buf = (uchf *) overlay; ++ ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4); + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { +@@ -1148,8 +1183,7 @@ int ZEXPORT deflateCopy (dest, source) + zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); +- ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); +- ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; ++ ds->sym_buf = ds->pending_buf + ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; +@@ -1925,7 +1959,7 @@ local block_state deflate_fast(s, flush) + FLUSH_BLOCK(s, 1); + return finish_done; + } +- if (s->last_lit) ++ if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; + } +@@ -2056,7 +2090,7 @@ local block_state deflate_slow(s, flush) + FLUSH_BLOCK(s, 1); + return finish_done; + } +- if (s->last_lit) ++ if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; + } +@@ -2131,7 +2165,7 @@ local block_state deflate_rle(s, flush) + FLUSH_BLOCK(s, 1); + return finish_done; + } +- if (s->last_lit) ++ if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; + } +@@ -2170,7 +2204,7 @@ local block_state deflate_huff(s, flush) + FLUSH_BLOCK(s, 1); + return finish_done; + } +- if (s->last_lit) ++ if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; + } +diff --git a/deflate.h b/deflate.h +index 23ecdd3..d4cf1a9 100644 +--- a/deflate.h ++++ b/deflate.h +@@ -217,7 +217,7 @@ typedef struct internal_state { + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + +- uchf *l_buf; /* buffer for literals or lengths */ ++ uchf *sym_buf; /* buffer for distances and literals/lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for +@@ -239,13 +239,8 @@ typedef struct internal_state { + * - I can't count above 4 + */ + +- uInt last_lit; /* running index in l_buf */ +- +- ushf *d_buf; +- /* Buffer for distances. To simplify the code, d_buf and l_buf have +- * the same number of elements. To use different lengths, an extra flag +- * array would be necessary. +- */ ++ uInt sym_next; /* running index in sym_buf */ ++ uInt sym_end; /* symbol table full when sym_next reaches this */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ +@@ -325,20 +320,22 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, + + # define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ +- s->d_buf[s->last_lit] = 0; \ +- s->l_buf[s->last_lit++] = cc; \ ++ s->sym_buf[s->sym_next++] = 0; \ ++ s->sym_buf[s->sym_next++] = 0; \ ++ s->sym_buf[s->sym_next++] = cc; \ + s->dyn_ltree[cc].Freq++; \ +- flush = (s->last_lit == s->lit_bufsize-1); \ ++ flush = (s->sym_next == s->sym_end); \ + } + # define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (uch)(length); \ + ush dist = (ush)(distance); \ +- s->d_buf[s->last_lit] = dist; \ +- s->l_buf[s->last_lit++] = len; \ ++ s->sym_buf[s->sym_next++] = dist; \ ++ s->sym_buf[s->sym_next++] = dist >> 8; \ ++ s->sym_buf[s->sym_next++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ +- flush = (s->last_lit == s->lit_bufsize-1); \ ++ flush = (s->sym_next == s->sym_end); \ + } + #else + # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +diff --git a/trees.c b/trees.c +index 4f4a650..decaeb7 100644 +--- a/trees.c ++++ b/trees.c +@@ -416,7 +416,7 @@ local void init_block(s) + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; +- s->last_lit = s->matches = 0; ++ s->sym_next = s->matches = 0; + } + + #define SMALLEST 1 +@@ -948,7 +948,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, +- s->last_lit)); ++ s->sym_next / 3)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + +@@ -1017,8 +1017,9 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ + { +- s->d_buf[s->last_lit] = (ush)dist; +- s->l_buf[s->last_lit++] = (uch)lc; ++ s->sym_buf[s->sym_next++] = dist; ++ s->sym_buf[s->sym_next++] = dist >> 8; ++ s->sym_buf[s->sym_next++] = lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; +@@ -1033,30 +1034,7 @@ int ZLIB_INTERNAL _tr_tally (s, dist, lc) + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } +- +-#ifdef TRUNCATE_BLOCK +- /* Try to guess if it is profitable to stop the current block here */ +- if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { +- /* Compute an upper bound for the compressed length */ +- ulg out_length = (ulg)s->last_lit*8L; +- ulg in_length = (ulg)((long)s->strstart - s->block_start); +- int dcode; +- for (dcode = 0; dcode < D_CODES; dcode++) { +- out_length += (ulg)s->dyn_dtree[dcode].Freq * +- (5L+extra_dbits[dcode]); +- } +- out_length >>= 3; +- Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", +- s->last_lit, in_length, out_length, +- 100L - out_length*100L/in_length)); +- if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; +- } +-#endif +- return (s->last_lit == s->lit_bufsize-1); +- /* We avoid equality with lit_bufsize because of wraparound at 64K +- * on 16 bit machines and because stored blocks are restricted to +- * 64K-1 bytes. +- */ ++ return (s->sym_next == s->sym_end); + } + + /* =========================================================================== +@@ -1069,13 +1047,14 @@ local void compress_block(s, ltree, dtree) + { + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ +- unsigned lx = 0; /* running index in l_buf */ ++ unsigned sx = 0; /* running index in sym_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + +- if (s->last_lit != 0) do { +- dist = s->d_buf[lx]; +- lc = s->l_buf[lx++]; ++ if (s->sym_next != 0) do { ++ dist = s->sym_buf[sx++] & 0xff; ++ dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; ++ lc = s->sym_buf[sx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); +@@ -1100,11 +1079,10 @@ local void compress_block(s, ltree, dtree) + } + } /* literal or match pair ? */ + +- /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ +- Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, +- "pendingBuf overflow"); ++ /* Check that the overlay between pending_buf and sym_buf is ok: */ ++ Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); + +- } while (lx < s->last_lit); ++ } while (sx < s->sym_next); + + send_code(s, END_BLOCK, ltree); + } +-- +2.34.1 + diff --git a/zlib-1.2.11-cve-2022-37434.patch b/zlib-1.2.11-cve-2022-37434.patch new file mode 100644 index 0000000000000000000000000000000000000000..1f3aa8d1608b826ec75142bbec571bbb948de985 --- /dev/null +++ b/zlib-1.2.11-cve-2022-37434.patch @@ -0,0 +1,35 @@ +From eff308af425b67093bab25f80f1ae950166bece1 Mon Sep 17 00:00:00 2001 +From: Mark Adler +Date: Sat, 30 Jul 2022 15:51:11 -0700 +Subject: [PATCH] Fix a bug when getting a gzip header extra field with + inflate(). + +If the extra field was larger than the space the user provided with +inflateGetHeader(), and if multiple calls of inflate() delivered +the extra header data, then there could be a buffer overflow of the +provided space. This commit assures that provided space is not +exceeded. +--- + inflate.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/inflate.c b/inflate.c +index 7be8c63..7a72897 100644 +--- a/inflate.c ++++ b/inflate.c +@@ -763,9 +763,10 @@ int flush; + copy = state->length; + if (copy > have) copy = have; + if (copy) { ++ len = state->head->extra_len - state->length; + if (state->head != Z_NULL && +- state->head->extra != Z_NULL) { +- len = state->head->extra_len - state->length; ++ state->head->extra != Z_NULL && ++ len < state->head->extra_max) { + zmemcpy(state->head->extra + len, next, + len + copy > state->head->extra_max ? + state->head->extra_max - len : copy); +-- +2.35.3 + diff --git a/zlib-1.2.11-cve-2022-37434_2.patch b/zlib-1.2.11-cve-2022-37434_2.patch new file mode 100644 index 0000000000000000000000000000000000000000..d0e9d1b016045df1d3076f196cdc9daf0f59110d --- /dev/null +++ b/zlib-1.2.11-cve-2022-37434_2.patch @@ -0,0 +1,32 @@ +From 1eb7682f845ac9e9bf9ae35bbfb3bad5dacbd91d Mon Sep 17 00:00:00 2001 +From: Mark Adler +Date: Mon, 8 Aug 2022 10:50:09 -0700 +Subject: [PATCH] Fix extra field processing bug that dereferences NULL + state->head. + +The recent commit to fix a gzip header extra field processing bug +introduced the new bug fixed here. +--- + inflate.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/inflate.c b/inflate.c +index 7a72897..2a3c4fe 100644 +--- a/inflate.c ++++ b/inflate.c +@@ -763,10 +763,10 @@ int flush; + copy = state->length; + if (copy > have) copy = have; + if (copy) { +- len = state->head->extra_len - state->length; + if (state->head != Z_NULL && + state->head->extra != Z_NULL && +- len < state->head->extra_max) { ++ (len = state->head->extra_len - state->length) < ++ state->head->extra_max) { + zmemcpy(state->head->extra + len, next, + len + copy > state->head->extra_max ? + state->head->extra_max - len : copy); +-- +2.35.3 + diff --git a/zlib-1.2.11.tar.xz b/zlib-1.2.11.tar.xz deleted file mode 100644 index 305b7a058f2b18b5ff15b0c5258ab7d489c21973..0000000000000000000000000000000000000000 Binary files a/zlib-1.2.11.tar.xz and /dev/null differ diff --git a/zlib.spec b/zlib.spec index 3d9582bd757bc44c09ed2dd6be62aa580396ae07..ab4cafa7af1af49dd146745a987b41a1f0313388 100644 --- a/zlib.spec +++ b/zlib.spec @@ -1,10 +1,10 @@ -%define anolis_release .0.4 +%define anolis_release .0.1 # disabled, per rhbz#1609830 and rhbz#1602742 %bcond_with minizip Name: zlib Version: 1.2.11 -Release: 17%{anolis_release}%{?dist} +Release: 19%{anolis_release}%{?dist} Summary: The compression and decompression library # /contrib/dotzlib/ have Boost license License: zlib and Boost @@ -15,6 +15,11 @@ Source: http://www.zlib.net/zlib-%{version}.tar.xz Patch0: zlib-1.2.5-minizip-fixuncrypt.patch # permit a deflateParams() parameter change Patch1: zlib-1.2.11-permit-deflateParams-change.patch +Patch2: zlib-1.2.11-CVE-2018-25032.patch + +# Patches for CVE-2022-37434 +Patch3: zlib-1.2.11-cve-2022-37434.patch +Patch4: zlib-1.2.11-cve-2022-37434_2.patch # general aarch64 optimizations Patch1001: 1001-zlib-anolis-Neon-Optimized-hash-chain-rebase.patch @@ -26,12 +31,6 @@ Patch1004: 1004-zlib-anolis-compute-crc32-using-armv8-specific-instruction.patch Patch1005: 1005-zlib-anolis-ARM-optimized-insert_string.patch # x86_64 optimized slide_hash Patch1006: 1006-zlib-anolis-Optimize-slide_hash.patch -# optimized adler32 function in armv8 -Patch1007: 1007-zlib-anolis-Neon-Optimized-adler32.patch -# optimized crc32 function with crc32 + pmul instruction in armv8 -Patch1008: 1008-zlib-anolis-Optimized-crc32-pmul-mix.patch -# optimized chunk copy by neon in armv8 -Patch1009: 1009-zlib-anolis-Neon-Optimized-chunkcopy_neon.patch BuildRequires: automake, autoconf, libtool @@ -83,7 +82,10 @@ developing applications which use minizip. %prep %setup -q %patch0 -p1 -b .fixuncrypt -%patch1 -p1 +%patch1 -p1 -b .optimized-deflate +%patch2 -p1 +%patch3 -p1 +%patch4 -p1 %ifarch aarch64 %patch1001 -p1 @@ -91,9 +93,6 @@ developing applications which use minizip. %patch1003 -p1 %patch1004 -p1 %patch1005 -p1 -%patch1007 -p1 -%patch1008 -p1 -%patch1009 -p1 %endif %ifarch x86_64 @@ -176,19 +175,17 @@ find $RPM_BUILD_ROOT -name '*.la' -delete %changelog -* Tue Aug 30 2022 binbin Xu - 1.2.11-17.0.4 -- add optimized chunk copy by neon patch for aarch64 - -* Thu Aug 25 2022 binbin Xu - 1.2.11-17.0.3 -- add optimized crc32 with pmul mix crc patch for aarch64 - -* Wed Aug 24 2022 binbin Xu - 1.2.11-17.0.2 -- add optimized adler32_neon patch for aarch64 - -* Tue Nov 23 2021 Chunmei Xu - 1.2.11-17.0.1 +* Mon Oct 31 2022 Chunmei Xu - 1.2.11-19.0.1 - remove optimised patches for s390 and IBM Z - add optimised patches for aarch64 and x86_64 +* Tue Aug 09 2022 Matej Mužila - 1.2.11.19 +- Fix heap-based buffer over-read or buffer overflow in inflate in inflate.c +- Resolves: CVE-2022-37434 + +* Tue Mar 29 2022 Matej Mužila - 1.2.11-18 +- Resolves: CVE-2018-25032 + * Mon Jun 15 2020 Ondrej Dubaj - 1.2.11-17 - Fixed DFLTCC compression level switching issues (#1875492) - Enabled HW compression for compression levels 1 through 6 (#1847438)