From 067ff14139c39922b5aa3fb4c9e29839760b7ddf Mon Sep 17 00:00:00 2001 From: Yarovoy Danil WX1195294 Date: Tue, 2 May 2023 13:18:45 +0300 Subject: [PATCH 1/2] CRC32 pattern matching. --- gcc/combine.c | 282 ++++++++++++++++++++- gcc/common.opt | 4 + gcc/config/aarch64/aarch64.c | 5 + gcc/doc/tm.texi | 4 + gcc/doc/tm.texi.in | 2 + gcc/target.def | 6 + gcc/testsuite/gcc.target/aarch64/crc32-1.c | 110 ++++++++ gcc/testsuite/gcc.target/aarch64/crc32-2.c | 97 +++++++ 8 files changed, 509 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/crc32-1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/crc32-2.c diff --git a/gcc/combine.c b/gcc/combine.c index 497e53289ca..afa4787292d 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -1,5 +1,5 @@ /* Optimize by combining instructions for GNU compiler. - Copyright (C) 1987-2020 Free Software Foundation, Inc. + Copyright (C) 1987-2023 Free Software Foundation, Inc. This file is part of GCC. @@ -2651,6 +2651,279 @@ count_auto_inc (rtx, rtx, rtx, rtx, rtx, void *arg) return 0; } +struct crc32_combine_info +{ + rtx dest; + rtx src1; + rtx src2; +}; + +static unsigned int +calc_crc32 (unsigned char c) +{ + int i; + unsigned int POLY = 0xedb88320; + unsigned int crc = c; + for (i = 0; i < 8; i++) + crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; + return crc; +} + +static rtx_insn * +replace_32bit_case (rtx dest, rtx src1, rtx src2, rtx_insn *loc) +{ + gcc_assert (targetm.gen_crc32b); + + rtx pattern = targetm.gen_crc32b (dest, src1, src2); + + start_sequence (); + rtx_insn *crc32 = emit_insn (pattern); + + if (recog_memoized (crc32) < 0) + { + end_sequence (); + return 0; + } + + rtx_insn *seq = get_insns (); + end_sequence (); + emit_insn_after (seq, loc); + + return NEXT_INSN (crc32); +} + +/* We try to replace + dest = crc32_table[(src1 ^ src2) & 0xff] ^ (src1 >> 8); + with one crc32 instruction. If src1 has 64 bit size we cannot + just replace with the instruction because crc32 has 32 bit input + and 32 bit output. To save user code behavior we need to insert + some bitwise operations in case of (src1 > 0xffffffff). */ + +static rtx_insn * +replace_64bit_case (rtx dest, rtx src1, rtx src2, rtx_insn *loc) +{ + gcc_assert (targetm.gen_crc32b); + + rtx reg1 = gen_reg_rtx (DImode); + rtx reg2 = gen_reg_rtx (DImode); + rtx reg3 = gen_reg_rtx (DImode); + + auto_vec patterns; + + const unsigned int crc32_shift_imm = 8; + rtx imm = gen_rtx_CONST_INT (DImode, crc32_shift_imm); + patterns.safe_push (gen_rtx_SET (reg1, + gen_rtx_LSHIFTRT (DImode, + gen_lowpart (DImode, src1), + imm))); + + rtx mask = gen_rtx_CONST_INT (DImode, 0xffffffff000000); + patterns.safe_push (gen_rtx_SET (reg2, + gen_rtx_AND (DImode, reg1, mask))); + + patterns.safe_push (targetm.gen_crc32b (gen_lowpart (SImode, reg3), + src1, src2)); + + patterns.safe_push (gen_rtx_SET (gen_lowpart (DImode, dest), + gen_rtx_XOR (DImode, reg3, reg2))); + + rtx_insn *insn; + rtx pattern; + int i; + + start_sequence (); + FOR_EACH_VEC_ELT (patterns, i, pattern) + { + insn = emit_insn (pattern); + if (recog_memoized (insn) < 0) + { + end_sequence (); + return 0; + } + } + + rtx_insn *seq = get_insns (); + end_sequence (); + emit_insn_after (seq, loc); + + return NEXT_INSN (insn); +} + +static bool +crc32_table_ref (rtx mem, rtx idx) +{ + rtx addr = XEXP (mem, 0); + if (GET_CODE (addr) != PLUS) + return false; + + struct address_info info; + decompose_mem_address (&info, mem); + + rtx base = *info.base; + rtx offset = *info.index; + + /* Check if base is reg which holds anchor address. */ + if (!REG_P (base)) + return false; + + /* Check if offset = idx * sizeof (crc_table[0]) */ + int size = GET_MODE_SIZE (GET_MODE (mem)).to_constant (); + if (GET_CODE (offset) != MULT + || !REG_P (XEXP (offset, 0)) + || !reg_overlap_mentioned_p (idx, offset) + || GET_MODE (XEXP (offset, 0)) != GET_MODE (idx) + || !CONST_INT_P (XEXP (offset, 1)) + || INTVAL (XEXP (offset, 1)) != size) + return false; + + tree expr = MEM_EXPR (mem); + if (!expr || TREE_CODE (expr) != ARRAY_REF) + return false; + + tree decl = TREE_OPERAND (expr, 0); + if (!decl || !DECL_P (decl) || !TREE_READONLY (decl)) + return false; + + const unsigned int crc_table_nelts = 256; + tree ctor = DECL_INITIAL (decl); + if (!ctor || CONSTRUCTOR_NELTS (ctor) != crc_table_nelts) + return false; + + unsigned int ix; + tree val; + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (ctor), ix, val) + { + unsigned HOST_WIDE_INT ival = TREE_INT_CST_LOW (val); + unsigned HOST_WIDE_INT crc = calc_crc32 (ix); + if (ival != crc) + return false; + } + return true; +} + +/* Check pattern: + dest = crc32_table[(src1 ^ src2) & 0xff] ^ (src1 >> 8); */ + +static bool +crc32_pattern (rtx i3, rtx i2, rtx i1, rtx i0, struct crc32_combine_info *info) +{ + /* Check that i0: { i0dest = op1 ^ op2 } */ + rtx i0src = SET_SRC (i0); + rtx i0dest = SET_DEST (i0); + if (GET_CODE (i0src) != XOR || !REG_P (i0dest)) + return false; + + rtx i0op1 = XEXP (i0src, 0); + rtx i0op2 = XEXP (i0src, 1); + if ((!REG_P (i0op1) && !SUBREG_P (i0op1)) + || (!REG_P (i0op2) && !SUBREG_P (i0op2))) + return false; + + /* Check that i1: { i1dest = zero_extend ((unsigned char)i0dest) } */ + rtx i1src = SET_SRC (i1); + rtx i1dest = SET_DEST (i1); + if (!REG_P (i1dest) + || GET_CODE (i1src) != ZERO_EXTEND + || !SUBREG_P (XEXP (i1src, 0)) + || GET_MODE (XEXP (i1src, 0)) != QImode + || !reg_overlap_mentioned_p (i0dest, i1src)) + return false; + + /* Check that i2: { i2dest = crc32_table[idest1] } */ + rtx i2src = SET_SRC (i2); + rtx i2dest = SET_DEST (i2); + if (!REG_P (i2dest)) + return false; + + rtx mem; + if (GET_CODE (i2src) == MEM) + mem = i2src; + else if (GET_CODE (i2src) == ZERO_EXTEND + && GET_CODE (XEXP (i2src, 0)) == MEM) + mem = XEXP (i2src, 0); + else + mem = NULL_RTX; + + if (!mem || !crc32_table_ref (mem, i1dest)) + return false; + + /* Check that i3: { i3dest = i2dest ^ (op[1|2] >> 8) } */ + rtx i3src = SET_SRC (i3); + rtx i3dest = SET_DEST (i3); + + if (GET_MODE (i3dest) != DImode + && GET_MODE (i3dest) != SImode) + return false; + + if (!REG_P (i3dest) || GET_CODE (i3src) != XOR + || GET_CODE (XEXP (i3src, 0)) != LSHIFTRT) + return false; + + rtx i3op1 = XEXP (XEXP (i3src, 0), 0); + rtx i3op2 = XEXP (XEXP (i3src, 0), 1); + rtx i3op3 = XEXP (i3src, 1); + if (!REG_P (i3op1) + || (!REG_P (i3op3) && !SUBREG_P (i3op3)) + || !CONST_INT_P (i3op2) || INTVAL (i3op2) != 8 + || !reg_overlap_mentioned_p (i2dest, i3op3)) + return false; + + bool is_op1 = reg_overlap_mentioned_p (i0op1, i3op1); + bool is_op2 = reg_overlap_mentioned_p (i0op2, i3op1); + if (!is_op1 && !is_op2) + return false; + + info->dest = i3dest; + info->src1 = is_op1 ? i0op1 : i0op2; + info->src2 = is_op1 ? i0op2 : i0op1; + + return true; +} + +static rtx_insn* +try_combine_crc32 (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0) +{ + struct crc32_combine_info info; + + rtx i3set = single_set (i3); + rtx i2set = single_set (i2); + rtx i1set = single_set (i1); + rtx i0set = single_set (i0); + + if (!i3set || !i2set || !i1set || !i0set) + return 0; + + if (!crc32_pattern (i3set, i2set, i1set, i0set, &info)) + return 0; + + if (!dead_or_set_p (i1, SET_DEST (i0set)) + || !dead_or_set_p (i2, SET_DEST (i1set)) + || !dead_or_set_p (i3, SET_DEST (i2set))) + return 0; + + rtx dest = gen_lowpart (SImode, info.dest); + rtx src1 = gen_lowpart (SImode, info.src1); + rtx src2 = gen_lowpart (QImode, info.src2); + + rtx_insn *next = NULL; + if (GET_MODE (info.dest) == DImode) + next = replace_64bit_case (dest, src1, src2, i3); + else if (GET_MODE (info.dest) == SImode) + next = replace_32bit_case (dest, src1, src2, i3); + else + gcc_unreachable (); + + if (!next) + return 0; + + SET_INSN_DELETED (i0); + SET_INSN_DELETED (i1); + SET_INSN_DELETED (i2); + SET_INSN_DELETED (i3); + + return next; +} + /* Try to combine the insns I0, I1 and I2 into I3. Here I0, I1 and I2 appear earlier than I3. I0 and I1 can be zero; then we combine just I2 into I3, or I1 and I2 into @@ -2742,6 +3015,13 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, int nshift = 0; rtx set0, set3; + if (flag_crypto_accel && targetm.gen_crc32b) + { + rtx_insn *next_comb = try_combine_crc32 (i3, i2, i1, i0); + if (next_comb) + return next_comb; + } + if (!flag_expensive_optimizations) return 0; diff --git a/gcc/common.opt b/gcc/common.opt index 6f0ed7cea59..2d24f3494f9 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1060,6 +1060,10 @@ fasynchronous-unwind-tables Common Report Var(flag_asynchronous_unwind_tables) Optimization Generate unwind tables that are exact at each instruction boundary. +fcrypto-accel +Common Report Var(flag_crypto_accel) Init(0) Optimization +Perform crypto acceleration pattern matching. + farray-widen-compare Common Report Var(flag_array_widen_compare) Optimization Extends types for pointers to arrays to improve array comparsion performance. diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index cbdde11b07b..2e08a63a3ef 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -23827,6 +23827,11 @@ aarch64_run_selftests (void) #endif /* #if CHECKING_P */ +#ifdef TARGET_CRC32 +#undef TARGET_GEN_CRC32B +#define TARGET_GEN_CRC32B gen_aarch64_crc32b +#endif + #undef TARGET_STACK_PROTECT_GUARD #define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 0508fce57a7..d98e6c5d7c6 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -11840,6 +11840,10 @@ object files that are not referenced from @code{main} and uses export lists. @end defmac +@deftypefn {Target Hook} rtx TARGET_GEN_CRC32B (rtx @var{dest}, rtx @var{src1}, rtx @var{src2}) +This function generate the crc32 instruction if target supports this. +@end deftypefn + @deftypefn {Target Hook} bool TARGET_CANNOT_MODIFY_JUMPS_P (void) This target hook returns @code{true} past the point in which new jump instructions could be created. On machines that require a register for diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 3b70ea4841a..c26729404d6 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -8002,6 +8002,8 @@ object files that are not referenced from @code{main} and uses export lists. @end defmac +@hook TARGET_GEN_CRC32B + @hook TARGET_CANNOT_MODIFY_JUMPS_P @hook TARGET_HAVE_CONDITIONAL_EXECUTION diff --git a/gcc/target.def b/gcc/target.def index 2020564118b..bfc5e4e59c9 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -2682,6 +2682,12 @@ modes and they have different conditional execution capability, such as ARM.", bool, (void), default_have_conditional_execution) +DEFHOOK +(gen_crc32b, + "This function generate the crc32 instruction if target supports this.", + rtx, (rtx dest, rtx src1, rtx src2), + NULL) + DEFHOOK (gen_ccmp_first, "This function prepares to emit a comparison insn for the first compare in a\n\ diff --git a/gcc/testsuite/gcc.target/aarch64/crc32-1.c b/gcc/testsuite/gcc.target/aarch64/crc32-1.c new file mode 100644 index 00000000000..2719af3f3e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/crc32-1.c @@ -0,0 +1,110 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fno-inline --save-temps -fcrypto-accel -march=armv8.2-a" } */ + +#include + +static const unsigned long crc32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; + +unsigned long +long_calc_crc_1 (unsigned char c, unsigned long init) +{ + unsigned long crc = init; + crc = crc32_tab[((int)crc ^ c) & 0xff] ^ (crc >> 8); + return crc; +} + +unsigned long +long_calc_crc_2 (unsigned char c, unsigned long init) +{ + unsigned long crc = init; + crc = crc32_tab[(crc ^ c) & 0xff] ^ (crc >> 8); + return crc; +} + + +unsigned int +int_calc_crc (unsigned char c, unsigned int init) +{ + unsigned int crc = init; + crc = crc32_tab[(crc ^ c) & 0xff] ^ (crc >> 8); + return crc; +} + +int +main (int argc, char **argv) +{ + unsigned int ans = 0x6722b533; + unsigned long crc1 = long_calc_crc_1 (100, 0xffffffff); + if (crc1 != ans) + abort(); + + unsigned long overflow_ans = 0x1234561f22b533; + unsigned long crc2 = long_calc_crc_1 (100, 0x12345678ffffffff); + if (crc2 != overflow_ans) + abort(); + + unsigned long crc3 = long_calc_crc_2 (100, 0xffffffff); + if (crc3 != ans) + abort(); + + unsigned int crc4 = int_calc_crc (100, 0xffffffff); + if (crc4 != ans) + abort(); + + return 0; +} + +/* { dg-final { scan-assembler-times "crc32b" 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/crc32-2.c b/gcc/testsuite/gcc.target/aarch64/crc32-2.c new file mode 100644 index 00000000000..99bd9037f9e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/crc32-2.c @@ -0,0 +1,97 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fno-inline -fcrypto-accel --save-temps -march=armv8.2-a" } */ + +#include + +static const unsigned crc32_tab[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d +}; + +unsigned long +long_calc_crc (unsigned char c, unsigned long init) +{ + unsigned long crc = init; + crc = crc32_tab[(crc ^ c) & 0xff] ^ (crc >> 8); + return crc; +} + +unsigned int +int_calc_crc (unsigned char c, unsigned int init) +{ + unsigned int crc = init; + crc = crc32_tab[(crc ^ c) & 0xff] ^ (crc >> 8); + return crc; +} + +int +main (int argc, char **argv) +{ + unsigned int ans = 0x6722b533; + unsigned long crc1 = long_calc_crc (100, 0xffffffff); + if (crc1 != ans) + abort(); + + unsigned long overflow_ans = 0x1234561f22b533; + unsigned long crc2 = long_calc_crc (100, 0x12345678ffffffff); + if (crc2 != overflow_ans) + abort(); + + unsigned int crc3 = int_calc_crc (100, 0xffffffff); + if (crc3 != ans) + abort(); + + return 0; +} + +/* { dg-final { scan-assembler-times "crc32b" 2 } } */ -- Gitee From 30ce7c5c6dd0bd6af2e48d36ab21a84320879bff Mon Sep 17 00:00:00 2001 From: Yarovoy Danil Date: Wed, 2 Aug 2023 20:50:24 +0800 Subject: [PATCH 2/2] Add special case in crc32 pattern matching --- gcc/combine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/combine.c b/gcc/combine.c index afa4787292d..0f3c0955c99 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -2862,7 +2862,7 @@ crc32_pattern (rtx i3, rtx i2, rtx i1, rtx i0, struct crc32_combine_info *info) rtx i3op1 = XEXP (XEXP (i3src, 0), 0); rtx i3op2 = XEXP (XEXP (i3src, 0), 1); rtx i3op3 = XEXP (i3src, 1); - if (!REG_P (i3op1) + if (!REG_P (i3op1) && !SUBREG_P (i3op1) || (!REG_P (i3op3) && !SUBREG_P (i3op3)) || !CONST_INT_P (i3op2) || INTVAL (i3op2) != 8 || !reg_overlap_mentioned_p (i2dest, i3op3)) -- Gitee