From ef72ae0e3dc75227687aefbc19fdd9c910bbdfc2 Mon Sep 17 00:00:00 2001 From: bule Date: Thu, 15 Dec 2022 14:34:16 +0800 Subject: [PATCH 1/2] crc loop optimization initial --- gcc/Makefile.in | 1 + gcc/common.opt | 4 + gcc/doc/invoke.texi | 6 +- gcc/match.pd | 169 +++++ gcc/passes.def | 1 + gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c | 85 +++ .../tree-ssa/loop-crc-1.c.042t.loop_crc | 90 +++ gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c | 88 +++ gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c | 85 +++ gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c | 89 +++ .../tree-ssa/loop-crc-4.c.042t.loop_crc | 0 .../loop-crc-calculation-check-fail.c | 156 +++++ ...crc-calculation-check-fail.c.042t.loop_crc | 64 ++ .../loop-crc-calculation-check-fail.s | 329 +++++++++ .../gcc.dg/tree-ssa/loop-crc-loop-form-fail.c | 111 +++ .../gcc.dg/tree-ssa/loop-crc-sucess.c | 84 +++ .../tree-ssa/loop-crc-table-check-fail.c | 113 +++ gcc/timevar.def | 1 + gcc/tree-pass.h | 1 + gcc/tree-ssa-loop-crc.c | 644 ++++++++++++++++++ 20 files changed, 2120 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c.042t.loop_crc create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c create mode 100644 gcc/tree-ssa-loop-crc.c diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 3f06b8907ce..2a59acfbeaf 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1592,6 +1592,7 @@ OBJS = \ tree-ssa-loop-manip.o \ tree-ssa-loop-niter.o \ tree-ssa-loop-array-widen-compare.o \ + tree-ssa-loop-crc.o \ tree-ssa-loop-prefetch.o \ tree-ssa-loop-split.o \ tree-ssa-loop-unswitch.o \ diff --git a/gcc/common.opt b/gcc/common.opt index 238c97db8d5..02f2c0637c4 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1065,6 +1065,10 @@ Common Report Var(flag_array_widen_compare) Optimization Extends types for pointers to arrays to improve array comparsion performance. In some extreme situations this may result in unsafe behavior. +floop-crc +Common Report Var(flag_loop_crc) Optimization +do the loop crc conversion. + fauto-inc-dec Common Report Var(flag_auto_inc_dec) Init(1) Optimization Generate auto-inc/dec instructions. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 44f1f8a2e3d..8405efa270e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -460,7 +460,7 @@ Objective-C and Objective-C++ Dialects}. -fno-allocation-dce -fallow-store-data-races @gol -fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol -farray-widen-compare -fauto-inc-dec -fbranch-probabilities @gol --fcaller-saves @gol +-fcaller-saves -floop-crc @gol -fcombine-stack-adjustments -fconserve-stack @gol -fcompare-elim -fcprop-registers -fcrossjumping @gol -fcse-follow-jumps -fcse-skip-blocks -fcx-fortran-rules @gol @@ -9721,6 +9721,10 @@ extreme situations this may result in unsafe behavior. This option may generate better or worse code; results are highly dependent on the structure of loops within the source code. +@item -floop-crc +@opindex floop-crc +Do the loop crc conversion + @item -fdce @opindex fdce Perform dead code elimination (DCE) on RTL@. diff --git a/gcc/match.pd b/gcc/match.pd index 01f81b063f9..87b31695348 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3487,6 +3487,175 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) ) #endif + +#if GIMPLE +(if (canonicalize_math_p ()) +/* These patterns are mostly used by PHIOPT to move some operations outside of + the if statements. They should be done late because it gives jump threading + and few other passes to reduce what is going on. */ +/* a ? x op POW2 : x -> x op (a ? POW2 : 0). */ + (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate) + (simplify + (cond @0 (op:s @1 INTEGER_CST@2) @1) + /* powerof2cst */ + (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2)) + (with { + tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); + } + (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; }))) + ) + ) + ) +) +#endif + +#if GIMPLE +/* These patterns are mostly used by FORWPROP to move some operations outside of + the if statements. They should be done late because it gives jump threading + and few other passes to reduce what is going on. */ +/* Mul64 is defined as a multiplication algorithm which compute two 64-bit integers to one 128-bit integer + (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { + In0Lo = In0(D) & 4294967295; + In0Hi = In0(D) >> 32; + In1Lo = In1(D) & 4294967295; + In1Hi = In1(D) >> 32; + Mull_01 = In0Hi * In1Lo; + Addc = In0Lo * In1Hi + Mull_01; + addc32 = Addc << 32; + ResLo = In0Lo * In1Lo + addc32; + ResHi = ((long unsigned int) (addc32 > ResLo)) + + (((long unsigned int) (Mull_01 > Addc)) << 32) + (Addc >> 32) + In0Hi * In1Hi; + } */ + (simplify + (plus + (plus + (convert + (gt @10 + (plus + (mult @4 @6) + (lshift@10 @9 @3)))) + (lshift + (convert + (gt @8 @9)) @3)) + (plus@11 + (rshift + (plus@9 + (mult (bit_and@4 SSA_NAME@0 @2) @7) + (mult@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) @3) + (mult (rshift@5 SSA_NAME@0 @3) + (rshift@7 SSA_NAME@1 INTEGER_CST@3)))) + (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && + TYPE_PRECISION (type) == 64) + (with { + tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); + tree shift = build_int_cst (integer_type_node, 64); + //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) + } + (convert:type (rshift + (mult (convert:i128_type @0) (convert:i128_type @1)) { shift; }))) + ) + ) + + /* (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { + In0Lo = In0(D) & 4294967295; + In0Hi = In0(D) >> 32; + In1Lo = In1(D) & 4294967295; + In1Hi = In1(D) >> 32; + Mull_01 = In0Hi * In1Lo; + Addc = In0Lo * In1Hi + Mull_01; + addc32 = Addc << 32; + ResLo = In0(D) * In1(D); + ResHi = ((long unsigned int) (addc32 > ResLo)) + + (((long unsigned int) (Mull_01 > Addc)) << 32) + (Addc >> 32) + In0Hi * In1Hi; + } */ + (simplify + (plus + (plus + (convert + (gt (lshift@10 @9 @3) + (mult @0 @1))) + (lshift + (convert + (gt @8 @9)) @3)) + (plus@11 + (rshift + (plus@9 + (mult (bit_and@4 SSA_NAME@0 @2) @7) + (mult@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) @3) + (mult (rshift@5 SSA_NAME@0 @3) + (rshift@7 SSA_NAME@1 INTEGER_CST@3)))) + (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && + TYPE_PRECISION (type) == 64) + (with { + tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); + tree shift = build_int_cst (integer_type_node, 64); + //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) + } + (convert:type (rshift + (mult (convert:i128_type @0) (convert:i128_type @1)) { shift; }))) + ) + ) +#endif + +#if GIMPLE +/* These patterns are mostly used by FORWPROP to move some operations outside of + the if statements. They should be done late because it gives jump threading + and few other passes to reduce what is going on. */ + /* + In0Lo = In0(D) & 4294967295; + In0Hi = In0(D) >> 32; + In1Lo = In1(D) & 4294967295; + In1Hi = In1(D) >> 32; + Addc = In0Lo * In1Hi + In0Hi * In1Lo; + addc32 = Addc << 32; + ResLo = In0Lo * In1Lo + addc32 + */ + (simplify + (plus (mult @4 @5) + (lshift + (plus + (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3)) + (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2))) INTEGER_CST@3)) + (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && + TYPE_PRECISION (type) == 64) + (with { + tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); + tree shift = build_int_cst (integer_type_node, 64); + //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) + } + (mult (convert:type @0) (convert:type @1))) + ) + ) +#endif + + +#if GIMPLE +/* Try to match */ + /* +_4 = (int) _3; //NOP_EXPR (SSA_NAME @2) +_5 = _4 ^ c_10; //BIT_XOR_EXPR (SSA_NAME@1, SSA_NAME) +_6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) + */ +(match (crc_match_index @1 @2 @3) + (bit_and (bit_xor (nop SSA_NAME@2) SSA_NAME@1) INTEGER_CST@3) + (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@3) == 255)) +) + +#endif + +#if GIMPLE +/* Try to match */ + /* +_8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) +c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) + */ +(match (crc_match_res @1 @2 @3) + (bit_xor SSA_NAME@3 (rshift SSA_NAME@1 INTEGER_CST@2)) + (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@2) == 8)) +) + +#endif + /* Simplification moved from fold_cond_expr_with_comparison. It may also be extended. */ /* This pattern implements two kinds simplification: diff --git a/gcc/passes.def b/gcc/passes.def index ea50db08623..7abd946ce8d 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -92,6 +92,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_cd_dce); NEXT_PASS (pass_phiopt, true /* early_p */); NEXT_PASS (pass_array_widen_compare); + NEXT_PASS (pass_loop_crc); NEXT_PASS (pass_tail_recursion); NEXT_PASS (pass_convert_switch); NEXT_PASS (pass_cleanup_eh); diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c new file mode 100644 index 00000000000..07f9e01ecee --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c @@ -0,0 +1,85 @@ +/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ + +#include +#include +typedef unsigned long ulg; +typedef unsigned char uch; + +static const ulg crc_32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; + +ulg updcrc(s, n) + uch *s; /* pointer to bytes to pump through */ + unsigned n; /* number of bytes in s[] */ +{ + register ulg c; /* temporary variable */ + + static ulg crc = (ulg)0xffffffffL; /* shift register contents */ + + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + if (n) + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); + } while (--n); + } + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +} +/* { dg-final { scan-tree-dump-times "Processing loop" 1 "loop_crc"} } */ +/* { dg-final { scan-tree-dump-times "the loop can be optimized" 1 "loop_crc"} } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc new file mode 100644 index 00000000000..c726059f3d3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc @@ -0,0 +1,90 @@ + +;; Function updcrc (updcrc, funcdef_no=0, decl_uid=3687, cgraph_uid=1, symbol_order=1) + +;; 2 loops found +;; +;; Loop 0 +;; header 0, latch 1 +;; depth 0, outer -1 +;; nodes: 0 1 2 3 6 4 7 5 +;; +;; Loop 1 +;; header 4, latch 7 +;; depth 1, outer 0 +;; nodes: 4 7 +;; 2 succs { 5 3 } +;; 3 succs { 6 5 } +;; 6 succs { 4 } +;; 4 succs { 7 5 } +;; 7 succs { 4 } +;; 5 succs { 1 } + +Starting the loop_crc pass +====================================== +Processing loop 1: +====================================== +;; +;; Loop 1 +;; header 4, latch 7 +;; depth 1, outer 0 +;; nodes: 4 7 + + +The 1th loop form is success matched,and the loop can be optimized. +updcrc (uch * s, unsigned int n) +{ + static ulg crc = 4294967295; + register ulg c; + unsigned char _2; + long unsigned int _3; + long unsigned int _4; + long unsigned int _5; + long unsigned int _6; + long unsigned int _7; + ulg _21; + + : + if (s_12(D) == 0B) + goto ; [INV] + else + goto ; [INV] + + : + c_14 = crc; + if (n_15(D) != 0) + goto ; [INV] + else + goto ; [INV] + + : + + : + # s_8 = PHI + # n_9 = PHI + # c_10 = PHI + s_16 = s_8 + 1; + _2 = *s_8; + _3 = (long unsigned int) _2; + _4 = _3 ^ c_10; + _5 = _4 & 255; + _6 = crc_32_tab[_5]; + _7 = c_10 >> 8; + c_17 = _6 ^ _7; + n_18 = n_9 + 4294967295; + if (n_18 != 0) + goto ; [INV] + else + goto ; [INV] + + : + goto ; [100.00%] + + : + # c_11 = PHI <4294967295(2), c_14(3), c_17(4)> + crc = c_11; + _21 = c_11 ^ 4294967295; + return _21; + +} + + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c new file mode 100644 index 00000000000..f73c4d550f3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c @@ -0,0 +1,88 @@ +/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ + +#include +#include +typedef unsigned long ulg; +typedef unsigned char uch; + +static const ulg crc_32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; + +ulg updcrc(s, n) + uch *s; /* pointer to bytes to pump through */ + unsigned n; /* number of bytes in s[] */ +{ + register ulg c; /* temporary variable */ + + static ulg crc = (ulg)0xffffffffL; /* shift register contents */ + + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + if (n) + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); + for (int i = 0; i < 5; i++) { + c++; + } + + } while (--n); + } + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +} +/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 1 "loop_crc"} } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c new file mode 100644 index 00000000000..70eb1b81435 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c @@ -0,0 +1,85 @@ +/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ + +#include +#include +typedef unsigned long ulg; +typedef unsigned char uch; + +static const ulg crc_32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; + +ulg updcrc(s, n) + uch *s; /* pointer to bytes to pump through */ + unsigned n; /* number of bytes in s[] */ +{ + register ulg c; /* temporary variable */ + + static ulg crc = (ulg)0xffffffffL; /* shift register contents */ + + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + if (n) + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); + } while (--n || c != 0) ; + } + crc = c; +exit1: + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +} +/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 1 "loop_crc"} } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c new file mode 100644 index 00000000000..1d7e0a319bc --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c @@ -0,0 +1,89 @@ +/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ + +#include +#include +typedef unsigned long ulg; +typedef unsigned char uch; + +static const ulg crc_32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; +int test[5] = {0}; + +ulg updcrc(s, n) + uch *s; /* pointer to bytes to pump through */ + unsigned n; /* number of bytes in s[] */ +{ + register ulg c; /* temporary variable */ + + static ulg crc = (ulg)0xffffffffL; /* shift register contents */ + + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + if (n) + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) * test[c%5]; + } while (--n) ; + } + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); + test[c%5] = c; + } while (--n) ; + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +} +/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 2 "loop_crc"} } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c.042t.loop_crc b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c.042t.loop_crc new file mode 100644 index 00000000000..e69de29bb2d diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c new file mode 100644 index 00000000000..71b25f5375d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c @@ -0,0 +1,156 @@ +/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ + +#include +#include +typedef unsigned long ulg; +typedef unsigned char uch; + +static const ulg crc_32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; + +int test[5] = {0}; + +ulg updcrc(s, n) + uch *s; + unsigned n; +{ + register ulg c; + + static ulg crc = (ulg)0xffffffffL; + int a = 0; + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + if (n) + do { + a++; + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) ; + } while (--n) ; + } + crc = c; + return c ^ 0xffffffffL*a; +} + +ulg updcrc1(s, n) + uch *s; + unsigned n; +{ + register ulg c; + + static ulg crc = (ulg)0xffffffffL; + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + unsigned n_back = n; + if (n) + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) ; + n = n - 2; + } while (n != 0) ; + } + + crc = c; + return c ^ 0xffffffffL; +} + +ulg updcrc2(s, n) + uch *s; + unsigned n; +{ + register ulg c; + + static ulg crc = (ulg)0xffffffffL; + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + unsigned n_back = n; + if (n) + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) + 1; + } while (--n) ; + } + + crc = c; + return c ^ 0xffffffffL; +} +/* +ulg updcrc3(s, n) + uch *s; + int n; +{ + register ulg c; + + static ulg crc = (ulg)0xffffffffL; + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); + --n; + } while (n ) ; + } + + crc = c; + return c ^ 0xffffffffL; +}*/ +/* { dg-final { scan-tree-dump-times "num of phi noeds check failed." 1 "loop_crc"} } */ +/* { dg-final { scan-tree-dump-times "evolution pattern check failed." 1 "loop_crc"} } */ +/* { dg-final { scan-tree-dump-times "calculation pattern check failed." 1 "loop_crc"} } */ + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc new file mode 100644 index 00000000000..6d52a86840c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc @@ -0,0 +1,64 @@ + +;; Function updcrc3 (updcrc3, funcdef_no=0, decl_uid=3687, cgraph_uid=1, symbol_order=1) + +;; 2 loops found +;; +;; Loop 0 +;; header 0, latch 1 +;; depth 0, outer -1 +;; nodes: 0 1 2 3 4 5 +;; +;; Loop 1 +;; header 4, latch 4 +;; depth 1, outer 0 +;; nodes: 4 +;; 2 succs { 5 3 } +;; 3 succs { 4 5 } +;; 4 succs { 4 } +;; 5 succs { 1 } + +Starting the loop_crc pass +====================================== +Processing loop 1: +====================================== +;; +;; Loop 1 +;; header 4, latch 4 +;; depth 1, outer 0 +;; nodes: 4 + + + +Wrong loop form for crc matching. +updcrc3 (uch * s, unsigned int n) +{ + unsigned int n_back; + static ulg crc = 4294967295; + register ulg c; + ulg _22; + + : + if (s_12(D) == 0B) + goto ; [INV] + else + goto ; [INV] + + : + c_14 = crc; + if (n_15(D) != 0) + goto ; [INV] + else + goto ; [INV] + + : + goto ; [100.00%] + + : + # c_11 = PHI <4294967295(2), c_14(3)> + crc = c_11; + _22 = c_11 ^ 4294967295; + return _22; + +} + + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s new file mode 100644 index 00000000000..cae934bfecd --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s @@ -0,0 +1,329 @@ + .arch armv8-a + .file "loop-crc-calculation-check-fail.c" + .text + .section .rodata + .align 3 + .type crc_32_tab, %object + .size crc_32_tab, 2048 +crc_32_tab: + .xword 0 + .xword 1996959894 + .xword 3993919788 + .xword 2567524794 + .xword 124634137 + .xword 1886057615 + .xword 3915621685 + .xword 2657392035 + .xword 249268274 + .xword 2044508324 + .xword 3772115230 + .xword 2547177864 + .xword 162941995 + .xword 2125561021 + .xword 3887607047 + .xword 2428444049 + .xword 498536548 + .xword 1789927666 + .xword 4089016648 + .xword 2227061214 + .xword 450548861 + .xword 1843258603 + .xword 4107580753 + .xword 2211677639 + .xword 325883990 + .xword 1684777152 + .xword 4251122042 + .xword 2321926636 + .xword 335633487 + .xword 1661365465 + .xword 4195302755 + .xword 2366115317 + .xword 997073096 + .xword 1281953886 + .xword 3579855332 + .xword 2724688242 + .xword 1006888145 + .xword 1258607687 + .xword 3524101629 + .xword 2768942443 + .xword 901097722 + .xword 1119000684 + .xword 3686517206 + .xword 2898065728 + .xword 853044451 + .xword 1172266101 + .xword 3705015759 + .xword 2882616665 + .xword 651767980 + .xword 1373503546 + .xword 3369554304 + .xword 3218104598 + .xword 565507253 + .xword 1454621731 + .xword 3485111705 + .xword 3099436303 + .xword 671266974 + .xword 1594198024 + .xword 3322730930 + .xword 2970347812 + .xword 795835527 + .xword 1483230225 + .xword 3244367275 + .xword 3060149565 + .xword 1994146192 + .xword 31158534 + .xword 2563907772 + .xword 4023717930 + .xword 1907459465 + .xword 112637215 + .xword 2680153253 + .xword 3904427059 + .xword 2013776290 + .xword 251722036 + .xword 2517215374 + .xword 3775830040 + .xword 2137656763 + .xword 141376813 + .xword 2439277719 + .xword 3865271297 + .xword 1802195444 + .xword 476864866 + .xword 2238001368 + .xword 4066508878 + .xword 1812370925 + .xword 453092731 + .xword 2181625025 + .xword 4111451223 + .xword 1706088902 + .xword 314042704 + .xword 2344532202 + .xword 4240017532 + .xword 1658658271 + .xword 366619977 + .xword 2362670323 + .xword 4224994405 + .xword 1303535960 + .xword 984961486 + .xword 2747007092 + .xword 3569037538 + .xword 1256170817 + .xword 1037604311 + .xword 2765210733 + .xword 3554079995 + .xword 1131014506 + .xword 879679996 + .xword 2909243462 + .xword 3663771856 + .xword 1141124467 + .xword 855842277 + .xword 2852801631 + .xword 3708648649 + .xword 1342533948 + .xword 654459306 + .xword 3188396048 + .xword 3373015174 + .xword 1466479909 + .xword 544179635 + .xword 3110523913 + .xword 3462522015 + .xword 1591671054 + .xword 702138776 + .xword 2966460450 + .xword 3352799412 + .xword 1504918807 + .xword 783551873 + .xword 3082640443 + .xword 3233442989 + .xword 3988292384 + .xword 2596254646 + .xword 62317068 + .xword 1957810842 + .xword 3939845945 + .xword 2647816111 + .xword 81470997 + .xword 1943803523 + .xword 3814918930 + .xword 2489596804 + .xword 225274430 + .xword 2053790376 + .xword 3826175755 + .xword 2466906013 + .xword 167816743 + .xword 2097651377 + .xword 4027552580 + .xword 2265490386 + .xword 503444072 + .xword 1762050814 + .xword 4150417245 + .xword 2154129355 + .xword 426522225 + .xword 1852507879 + .xword 4275313526 + .xword 2312317920 + .xword 282753626 + .xword 1742555852 + .xword 4189708143 + .xword 2394877945 + .xword 397917763 + .xword 1622183637 + .xword 3604390888 + .xword 2714866558 + .xword 953729732 + .xword 1340076626 + .xword 3518719985 + .xword 2797360999 + .xword 1068828381 + .xword 1219638859 + .xword 3624741850 + .xword 2936675148 + .xword 906185462 + .xword 1090812512 + .xword 3747672003 + .xword 2825379669 + .xword 829329135 + .xword 1181335161 + .xword 3412177804 + .xword 3160834842 + .xword 628085408 + .xword 1382605366 + .xword 3423369109 + .xword 3138078467 + .xword 570562233 + .xword 1426400815 + .xword 3317316542 + .xword 2998733608 + .xword 733239954 + .xword 1555261956 + .xword 3268935591 + .xword 3050360625 + .xword 752459403 + .xword 1541320221 + .xword 2607071920 + .xword 3965973030 + .xword 1969922972 + .xword 40735498 + .xword 2617837225 + .xword 3943577151 + .xword 1913087877 + .xword 83908371 + .xword 2512341634 + .xword 3803740692 + .xword 2075208622 + .xword 213261112 + .xword 2463272603 + .xword 3855990285 + .xword 2094854071 + .xword 198958881 + .xword 2262029012 + .xword 4057260610 + .xword 1759359992 + .xword 534414190 + .xword 2176718541 + .xword 4139329115 + .xword 1873836001 + .xword 414664567 + .xword 2282248934 + .xword 4279200368 + .xword 1711684554 + .xword 285281116 + .xword 2405801727 + .xword 4167216745 + .xword 1634467795 + .xword 376229701 + .xword 2685067896 + .xword 3608007406 + .xword 1308918612 + .xword 956543938 + .xword 2808555105 + .xword 3495958263 + .xword 1231636301 + .xword 1047427035 + .xword 2932959818 + .xword 3654703836 + .xword 1088359270 + .xword 936918000 + .xword 2847714899 + .xword 3736837829 + .xword 1202900863 + .xword 817233897 + .xword 3183342108 + .xword 3401237130 + .xword 1404277552 + .xword 615818150 + .xword 3134207493 + .xword 3453421203 + .xword 1423857449 + .xword 601450431 + .xword 3009837614 + .xword 3294710456 + .xword 1567103746 + .xword 711928724 + .xword 3020668471 + .xword 3272380065 + .xword 1510334235 + .xword 755167117 + .text + .align 2 + .global updcrc3 + .type updcrc3, %function +updcrc3: +.LFB0: + .cfi_startproc + str x19, [sp, -48]! + .cfi_def_cfa_offset 48 + .cfi_offset 19, -48 + str x0, [sp, 24] + str w1, [sp, 20] + ldr x0, [sp, 24] + cmp x0, 0 + bne .L2 + mov x19, 4294967295 + b .L3 +.L2: + adrp x0, crc.0 + add x0, x0, :lo12:crc.0 + ldr x19, [x0] + ldr w0, [sp, 20] + str w0, [sp, 44] + ldr w0, [sp, 20] + cmp w0, 0 + beq .L3 +.L4: + ldr x0, [sp, 24] + add x1, x0, 1 + str x1, [sp, 24] + ldrb w0, [x0] + and x0, x0, 255 + eor x0, x19, x0 + and x1, x0, 255 + adrp x0, crc_32_tab + add x0, x0, :lo12:crc_32_tab + ldr x1, [x0, x1, lsl 3] + lsr x0, x19, 8 + eor x19, x1, x0 + ldr w0, [sp, 20] + sub w0, w0, #1 + str w0, [sp, 20] + ldr w0, [sp, 20] + cmp w0, 999 + bls .L4 +.L3: + adrp x0, crc.0 + add x0, x0, :lo12:crc.0 + str x19, [x0] + eor x0, x19, 4294967295 + ldr x19, [sp], 48 + .cfi_restore 19 + .cfi_def_cfa_offset 0 + ret + .cfi_endproc +.LFE0: + .size updcrc3, .-updcrc3 + .data + .align 3 + .type crc.0, %object + .size crc.0, 8 +crc.0: + .xword 4294967295 + .ident "GCC: (Kunpeng gcc 10.3.1-2.3.0.b006) 10.3.1" + .section .note.GNU-stack,"",@progbits diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c new file mode 100644 index 00000000000..b59704e311e --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c @@ -0,0 +1,111 @@ +/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ + +#include +#include +typedef unsigned long ulg; +typedef unsigned char uch; + +static const ulg crc_32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; + +/* check when the loop have a innor loop, should fail. */ +ulg updcrc(s, n) + uch *s; /* pointer to bytes to pump through */ + unsigned n; /* number of bytes in s[] */ +{ + register ulg c; /* temporary variable */ + + static ulg crc = (ulg)0xffffffffL; /* shift register contents */ + + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + if (n) + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); + for (int i = 0; i < 5; i++) { + c++; + } + + } while (--n); + } + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +} + +/* check when the loop have a second backedge, should fail. */ +ulg updcrc1(s, n) + uch *s; /* pointer to bytes to pump through */ + unsigned n; /* number of bytes in s[] */ +{ + register ulg c; /* temporary variable */ + + static ulg crc = (ulg)0xffffffffL; /* shift register contents */ + + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + if (n) + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); + } while (--n || c != 0) ; + } + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +} +/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 2 "loop_crc"} } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c new file mode 100644 index 00000000000..e1e16eaf29c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c @@ -0,0 +1,84 @@ +/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ + +#include +#include +typedef unsigned long ulg; +typedef unsigned char uch; + +static const ulg crc_32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; + +ulg updcrc(s, n) + uch *s; /* pointer to bytes to pump through */ + unsigned n; /* number of bytes in s[] */ +{ + register ulg c; /* temporary variable */ + + static ulg crc = (ulg)0xffffffffL; /* shift register contents */ + + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + if (n) + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); + } while (--n); + } + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +} +/* { dg-final { scan-tree-dump-times "the loop can be optimized" 1 "loop_crc"} } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c new file mode 100644 index 00000000000..f03a4fa8245 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c @@ -0,0 +1,113 @@ +/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ + +#include +#include +typedef unsigned long ulg; +typedef unsigned char uch; + +static const ulg crc_32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf1L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; +int test[5] = {0}; + +/* check when the loop is doing more then 1 array read or writing an array, both should fail. */ +ulg updcrc(s, n) + uch *s; /* pointer to bytes to pump through */ + unsigned n; /* number of bytes in s[] */ +{ + register ulg c; /* temporary variable */ + + static ulg crc = (ulg)0xffffffffL; /* shift register contents */ + + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + if (n) + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) * test[c%5]; + } while (--n) ; + } + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); + test[c%5] = c; + } while (--n) ; + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +} + +/* check when the loop is not working on a correct crc_table. should fail. */ +ulg updcrc1(s, n) + uch *s; /* pointer to bytes to pump through */ + unsigned n; /* number of bytes in s[] */ +{ + register ulg c; /* temporary variable */ + + static ulg crc = (ulg)0xffffffffL; /* shift register contents */ + + if (s == NULL) { + c = 0xffffffffL; + } else { + c = crc; + if (n) + do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); + } while (--n) ; + } + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +} +/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 2 "loop_crc"} } */ +/* { dg-final { scan-tree-dump-times "Table check fail. Table not matching." 1 "loop_crc"} } */ \ No newline at end of file diff --git a/gcc/timevar.def b/gcc/timevar.def index 2814b14f21c..ba86a1b7bb9 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -215,6 +215,7 @@ DEFTIMEVAR (TV_TREE_COPY_RENAME , "tree rename SSA copies") DEFTIMEVAR (TV_TREE_SSA_VERIFY , "tree SSA verifier") DEFTIMEVAR (TV_TREE_STMT_VERIFY , "tree STMT verifier") DEFTIMEVAR (TV_TREE_ARRAY_WIDEN_COMPARE, "tree array widen compare") +DEFTIMEVAR (TV_TREE_LOOP_CRC, "tree loop crc") DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion") DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering") DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 3cdc124668b..027f8992dcd 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -437,6 +437,7 @@ extern gimple_opt_pass *make_pass_phiopt (gcc::context *ctxt); extern gimple_opt_pass *make_pass_forwprop (gcc::context *ctxt); extern gimple_opt_pass *make_pass_phiprop (gcc::context *ctxt); extern gimple_opt_pass *make_pass_array_widen_compare (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_loop_crc (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_ifcombine (gcc::context *ctxt); extern gimple_opt_pass *make_pass_dse (gcc::context *ctxt); extern gimple_opt_pass *make_pass_nrv (gcc::context *ctxt); diff --git a/gcc/tree-ssa-loop-crc.c b/gcc/tree-ssa-loop-crc.c new file mode 100644 index 00000000000..4982384c607 --- /dev/null +++ b/gcc/tree-ssa-loop-crc.c @@ -0,0 +1,644 @@ +/* Array widen compare. + Copyright (C) 2022-2022 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "tree.h" +#include "gimple.h" +#include "tree-pass.h" +#include "gimple-ssa.h" +#include "tree-pretty-print.h" +#include "fold-const.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "tree-ssa-loop-manip.h" +#include "tree-ssa-loop.h" +#include "ssa.h" +#include "tree-into-ssa.h" +#include "cfganal.h" +#include "cfgloop.h" +#include "gimple-pretty-print.h" +#include "tree-cfg.h" +#include "cgraph.h" +#include "print-tree.h" +#include "cfghooks.h" +#include "gimple-fold.h" + +/* Match.pd function to match the ctz expression. */ +extern bool gimple_crc_match_index (tree, tree *, tree (*)(tree)); +extern bool gimple_crc_match_res (tree, tree *, tree (*)(tree)); + +static gimple *crc_table_read_stmt = NULL; + + +/* The loop form check will check the entire loop control flow + It should be a loop that: + 1. a do-while loop with header and latch only with no other control flow inside the loop + 2. have only one exiting edge + 3. have only one back edge and one entry edge +*/ +static bool +crc_loop_form_check (class loop *loop) +{ + if (loop->num_nodes > 2 || loop->inner) + return false; + // should only have 1 exit edge + vec edges; + edges = get_loop_exit_edges (loop); + if (edges.length() != 1) + return false; + + // The header should have only 2 incoming edges + // One of them is the preheader edge and the other is the backedge from the latch + if (EDGE_COUNT (loop->header->preds) != 2) + return false; + edge e1 = EDGE_PRED (loop->header, 0); + edge e2 = EDGE_PRED (loop->header, 1); + + if ((e1->src == loop->latch && e2->src->loop_father != loop) + || (e2->src == loop->latch && e1->src->loop_father != loop)) + return true; + + return false; +} + +/* Check there is only one array is read in the loop. + Return the only array as crc_table. */ +static bool +only_one_array_read (class loop *loop, tree &crc_table) +{ + gimple_stmt_iterator gsi; + gimple *stmt; + bool res = false; + for (gsi = gsi_start_bb (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) + { + stmt = gsi_stmt (gsi); + if (stmt == NULL) + return false; + + if (gimple_code (stmt) == GIMPLE_ASSIGN && + TREE_CODE(gimple_assign_lhs (stmt)) == ARRAY_REF ) + return false; + + if (gimple_code (stmt) == GIMPLE_ASSIGN && + TREE_CODE(gimple_assign_rhs1 (stmt)) == ARRAY_REF) + { + if (crc_table == NULL) + { + crc_table = gimple_assign_rhs1 (stmt); + crc_table_read_stmt = stmt; + res = true; + } + else + return false; + } + } + return res; +} + +static const unsigned HOST_WIDE_INT crc_32_tab[] = { + 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, + 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, + 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, + 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, + 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, + 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, + 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, + 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, + 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, + 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, + 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, + 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, + 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, + 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, + 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, + 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, + 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, + 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, + 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, + 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, + 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, + 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, + 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, + 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, + 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, + 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, + 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, + 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, + 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, + 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, + 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, + 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, + 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, + 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, + 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, + 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, + 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, + 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, + 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, + 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, + 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, + 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, + 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, + 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, + 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, + 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, + 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, + 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, + 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, + 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, + 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, + 0x2d02ef8dL +}; + +/* Check the content of the array. */ +static bool +match_crc_table (tree crc_table) +{ + unsigned HOST_WIDE_INT lb = tree_to_uhwi (array_ref_low_bound (crc_table)); + unsigned HOST_WIDE_INT ub = tree_to_uhwi (array_ref_up_bound (crc_table)); + unsigned HOST_WIDE_INT es = tree_to_uhwi (array_ref_element_size (crc_table)); + if (lb != 0 || ub != 255 || es != 8) + return false; + + tree decl = TREE_OPERAND (crc_table, 0); + tree ctor = ctor_for_folding(decl); + for (int i = 0; i < 255; i++) { + unsigned HOST_WIDE_INT val = tree_to_uhwi (CONSTRUCTOR_ELT (ctor,i)->value); + if (crc_32_tab[i] != val) + return false; + } + return true; +} + + +/* Check the crc table. The loop should have only one data reference. + And match the data reference with the predefined array. */ +static bool +crc_table_check (class loop *loop) +{ + tree crc_table = NULL; + if (!only_one_array_read (loop, crc_table)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nTable check fail. not only single array is read.\n"); + return false; + } + if (!match_crc_table (crc_table)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nTable check fail. Table not matching.\n"); + return false; + } + return true; +} + +/* check whether the evolution pattern of phi is phi = SSA_NAME + target*/ +static bool +evolution_pattern_plus_with_p (class loop* loop, gphi *phi, unsigned HOST_WIDE_INT target) +{ + edge backedge = find_edge (loop->latch, loop->header); + if (backedge == NULL) + return false; + tree evolution_node = PHI_ARG_DEF_FROM_EDGE (phi, backedge); + gimple *evolution_expr = SSA_NAME_DEF_STMT (evolution_node); + + if (evolution_expr && (gimple_assign_rhs_code (evolution_expr) == PLUS_EXPR || + gimple_assign_rhs_code (evolution_expr) == POINTER_PLUS_EXPR)) + { + tree rhs1 = gimple_assign_rhs1 (evolution_expr); + tree rhs2 = gimple_assign_rhs2 (evolution_expr); + if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST + && tree_to_uhwi (rhs2) == target) + return true; + } + return false; +} + +/* Check whether there are only 3 phi nodes in the header block. + Return 3 phi nodes in the capture. */ +static bool +check_num_of_phi (basic_block header, gphi *capture[]) +{ + gphi *phi; + gphi_iterator gsi; + int num_of_phi = 0; + + for (gsi = gsi_start_phis (header); !gsi_end_p (gsi); gsi_next (&gsi)) + { + phi = gsi.phi(); + if (phi) num_of_phi++; + if (num_of_phi > 3) + return false; + capture[num_of_phi - 1] = phi; + } + /* phi node should be exactly 3. */ + return num_of_phi == 3; +} + +/* Check the evolution pattern of three phi nodes. + Should be one of the node +1 every time (s), one of the node -1 + every time (n), and a 3rd one neither (c). Return 3 phi nodes in + the capture with the order of s,n,c.*/ +static bool +check_evolution_pattern (class loop* loop, gphi *capture[]) +{ + gphi *s=NULL; + gphi *n=NULL; + gphi *c=NULL; + + for (int i = 0; i < 3; i++) + { + if (evolution_pattern_plus_with_p(loop, capture[i], 1)) + { + if (s != NULL) + return false; + s = capture[i]; + } + else if (evolution_pattern_plus_with_p(loop, capture[i], 4294967295)) + { + if (n != NULL) + return false; + n = capture[i]; + } + else + { + if (c != NULL) + return false; + c = capture[i]; + } + } + + // some envolution pattern cannot find + if (!n || !s || !c) + return false; + + capture[0] = s; + capture[1] = n; + capture[2] = c; + return true; +} +/* check the calculation pattern before and after the crc_table array read stmt. + _7 = crc_32_tab[_6]; + The caculation of index _6 should be the result of a sequency of calculation by the s and c + The result of the array read _7 should be used to calculate the new c. */ +static bool +check_calculation_pattern (class loop* loop, gphi *capture[]) +{ + gphi *s=capture[0]; + gphi *c=capture[2]; + tree res_ops[3]; + tree index = TREE_OPERAND (gimple_assign_rhs1 (crc_table_read_stmt), 1); + + /* Try to match + _4 = (int) _3; //NOP_EXPR (SSA_NAME @2) + _5 = _4 ^ c_10; //BIT_XOR_EXPR (SSA_NAME, PHI @1) + _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) + */ + + if (!gimple_crc_match_index(index, res_ops, NULL)) + return false; + gimple *s_res_stmt = SSA_NAME_DEF_STMT(res_ops[1]); + tree s_res = TREE_OPERAND(gimple_assign_rhs1(s_res_stmt),0); + if (res_ops[0] != gimple_phi_result (c) || + s_res != gimple_phi_result (s)) + return false; + + /* Try to match + _8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) + c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) + */ + edge backedge = find_edge(loop->latch, loop->header); + tree updated_c = PHI_ARG_DEF_FROM_EDGE (c, backedge); + if (!gimple_crc_match_res(updated_c, res_ops, NULL)) + return false; + if (res_ops[0] != gimple_phi_result (c) + || res_ops[2] != gimple_assign_lhs(crc_table_read_stmt)) + return false; + + return true; +} + +/* check the exit condition is n != 0. */ +static bool +check_exit_condition (class loop* loop, gphi *n) +{ + edge backedge = find_edge(loop->latch, loop->header); + gimple *cond_stmt = gsi_stmt (gsi_last_bb (loop->header)); + if (!cond_stmt || gimple_code (cond_stmt) != GIMPLE_COND || gimple_cond_code (cond_stmt) != NE_EXPR + || gimple_cond_lhs (cond_stmt) != PHI_ARG_DEF_FROM_EDGE (n, backedge) + || tree_to_uhwi(gimple_cond_rhs (cond_stmt)) != 0) + return false; + + return true; +} + +/* Check the loop body. The loop body we are trying to match is + +# s_10 = PHI +# n_11 = PHI +# c_12 = PHI +_1 = (int) c_12; +s_18 = s_10 + 1; +_3 = *s_10; +_4 = (int) _3; +_5 = _1 ^ _4; +_6 = _5 & 255; +_7 = crc_32_tab[_6]; +_8 = c_12 >> 8; +c_19 = _7 ^ _8; +n_20 = n_11 + 4294967295; +if (n_20 != 0) + goto ; [INV] +else + goto ; [INV] + +which is doing a very simple calculation +do { + c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); +} while (--n); + +In this case ,we don't want this loop to have any other operation inside. +so the matching condition is +1. There are only 3 loop variant during each itoration, namely s,c,n, + which is limited by the condition that the loop have exactly 3 phi nodes. +2. The 3 loop variants should have evolution pattern as 1 of the 3 nodes is + increased by 1 every itoration, 1 of the 3 nodes is decreased by 1 every itor + and the 3rd one is neither. These three tree node SSA value will be captured for + the later arithmatic pattern matching +3. Pattern matching for the index of crc_table +4. pattern matching for the result of c calcuation after read from crc_table +5. The exit condition matching. + */ +static bool +crc_loop_body_check (class loop *loop) +{ + basic_block header = loop->header; + gphi *capture[3]; + if (!check_num_of_phi(header, capture)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n num of phi noeds check failed.\n"); + return false; + } + if (!check_evolution_pattern(loop, capture)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n evolution pattern check failed.\n"); + return false; + } + if (!check_calculation_pattern(loop, capture)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n calculation pattern check failed.\n"); + return false; + } + if (!check_exit_condition(loop, capture[1] /* n*/)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n exit condition check failed.\n"); + return false; + } + return true; +/* gphi *phi; + gphi_iterator gsi; + int num_of_phi = 0; + //s, n, c; + //only 3 phi nodes are there, every one of the phi nodes comming from 2 edge only, one from preheader, one from latch + // s increase by 1 every itoration + // n decrease by 1 every itoration + // The final one is c, which is the result, should be used for the start of the later pattern matching + for (gsi = gsi_start_phis(loop->header); !gsi_end_p(gsi); gsi_next(&gsi)) + { + phi = gsi.phi(); + + if (phi) num_of_phi++; + if (num_of_phi > 3) return false; // more then 3 phi node + if (gimple_phi_num_args(phi) > 2) // more than 2 edges other then one backedge and one preheader edge + return false; + //capture[num_of_phi - 1] = gimple_phi_result(phi); + capture[num_of_phi - 1] = phi; + } + if (num_of_phi != 3) return false; // phi node should be 3 */ + // Find the envolution pattern for s and n, try to match the identity of these variable +/* gphi *s=NULL; + gphi *n=NULL; + gphi *c=NULL; + + for (int i = 0; i < 3; i++) + { + if (evolution_pattern_plus_with_p(loop, capture[i], 1)) + { + if(s != NULL) + return false; + s = capture[i]; + } + else if (evolution_pattern_plus_with_p(loop, capture[i], 4294967295)) + { + if(n != NULL) + return false; + n = capture[i]; + } + else + { + if(c != NULL) + return false; + c = capture[i]; + } + } + + // some envolution pattern cannot find + if (!n || !s || !c) + return false; + gphi *s=capture[0]; + gphi *n=capture[1]; + gphi *c=capture[2]; + tree res_ops[3]; + tree index = TREE_OPERAND (gimple_assign_rhs1 (crc_table_read_stmt), 1); + + /* Try to match + _1 = (int) c_12; //NOP_EXPR (SSA_NAME @1) + _4 = (int) _3; //NOP_EXPR (SSA_NAME @2) + _5 = _1 ^ _4; //BIT_XOR_EXPR (SSA_NAME, SSA_NAME) + _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) + + + if (!gimple_crc_match_index(index, res_ops, NULL)) + return false; + gimple *s_res_stmt = SSA_NAME_DEF_STMT(res_ops[1]); + tree s_res = TREE_OPERAND(gimple_assign_rhs1(s_res_stmt),0); + if (res_ops[0] != gimple_phi_result (c) || + s_res != gimple_phi_result (s)) + return false; + + /* +_8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) +c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) + + edge backedge = find_edge(loop->latch, loop->header); + tree updated_c = PHI_ARG_DEF_FROM_EDGE (c, backedge); + if (!gimple_crc_match_res(updated_c, res_ops, NULL)) + return false; + if (res_ops[0] != gimple_phi_result (c) + || res_ops[2] != gimple_assign_lhs(crc_table_read_stmt)) + return false; + + // try match n as the induction variable + // The proceed condition for back edge is n != 0 + gimple *cond_stmt = gsi_stmt (gsi_last_bb (loop->header)); + if (!cond_stmt || gimple_code (cond_stmt) != GIMPLE_COND || gimple_cond_code (cond_stmt) != NE_EXPR + || gimple_cond_lhs (cond_stmt) != PHI_ARG_DEF_FROM_EDGE (n, backedge) + || tree_to_uhwi(gimple_cond_rhs (cond_stmt)) != 0) + return false; + + return true; + */ +} + + +static bool +match_crc_loop (class loop *loop) +{ + if (!crc_loop_form_check(loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nWrong loop form for crc matching.\n"); + return false; + } + if (!crc_table_check(loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nWrong crc table for crc matching.\n"); + return false; + } + if (!crc_loop_body_check(loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nWrong loop body for crc matching.\n"); + return false; + } + return true; +} + +/* The main entry of loop crc optimizes. */ +static unsigned int +tree_ssa_loop_crc () +{ + unsigned int todo = 0; + class loop *loop; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + flow_loops_dump (dump_file, NULL, 1); + fprintf (dump_file, "\nStarting the loop_crc pass\n"); + } + + FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "======================================\n"); + fprintf (dump_file, "Processing loop %d:\n", loop->num); + fprintf (dump_file, "======================================\n"); + flow_loop_dump (loop, dump_file, NULL, 1); + fprintf (dump_file, "\n\n"); + } + + if (match_crc_loop (loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "The %dth loop form is success matched," + "and the loop can be optimized.\n", + loop->num); + } + + convert_to_new_loop (loop); + } + } + + todo |= (TODO_update_ssa); + return todo; +} + +/* Loop crc. */ + +namespace { + +const pass_data pass_data_tree_loop_crc = +{ + GIMPLE_PASS, + "loop_crc", + OPTGROUP_LOOP, + TV_TREE_LOOP_CRC, + (PROP_cfg | PROP_ssa), + 0, + 0, + 0, + (TODO_update_ssa | TODO_verify_all) +}; + +class pass_loop_crc : public gimple_opt_pass +{ +public: + pass_loop_crc (gcc::context *ctxt) + : gimple_opt_pass (pass_data_tree_loop_crc, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *); + virtual unsigned int execute (function *); + +}; // class pass_loop_crc + +bool +pass_loop_crc::gate (function *) +{ + return (flag_loop_crc > 0 && optimize >= 3); +} + +unsigned int +pass_loop_crc::execute (function *fun) +{ + if (number_of_loops (fun) <= 1) + return 0; + + /* Only supports LP64 data mode. */ + if (TYPE_PRECISION (long_integer_type_node) != 64 + || POINTER_SIZE != 64 || TYPE_PRECISION (integer_type_node) != 32) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "The current data mode is not supported," + "only the LP64 date mode is supported.\n"); + return 0; + } + + return tree_ssa_loop_crc (); +} + +} // anon namespace + +gimple_opt_pass * +make_pass_loop_crc (gcc::context *ctxt) +{ + return new pass_loop_crc (ctxt); +} \ No newline at end of file -- Gitee From a0e46c404408bdfebc7f864671594fbdf1985be6 Mon Sep 17 00:00:00 2001 From: xingyushuai Date: Mon, 24 Apr 2023 09:34:35 +0800 Subject: [PATCH 2/2] crc loop optimization initial --- gcc/config/aarch64/aarch64-builtins.c | 29 + gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64.c | 12 + gcc/doc/tm.texi | 9 + gcc/doc/tm.texi.in | 2 + gcc/match.pd | 4 +- gcc/target.def | 14 + gcc/tree-ssa-loop-crc.c | 727 ++++++++++++++++++++++---- 8 files changed, 695 insertions(+), 103 deletions(-) diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index d92157dff02..1e8b046da77 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -441,6 +441,12 @@ typedef struct #define VAR1(T, N, MAP, A) \ AARCH64_SIMD_BUILTIN_##T##_##N##A, +enum aarch64_crc_builtins{ + AARCH64_BUILTIN_CRC32B, + AARCH64_BUILTIN_CRC32H, + AARCH64_BUILTIN_CRC32W, +}; + enum aarch64_builtins { AARCH64_BUILTIN_MIN, @@ -1321,6 +1327,29 @@ aarch64_general_builtin_decl (unsigned code, bool) return aarch64_builtin_decls[code]; } +/* Implement TARGET_GET_CRC_BUILTIN_CODE */ +unsigned +get_crc_builtin_code(unsigned code, bool) +{ + if (code > AARCH64_BUILTIN_CRC32W) + return AARCH64_BUILTIN_MIN; + + unsigned res = AARCH64_BUILTIN_MIN; + switch (code) { + case AARCH64_BUILTIN_CRC32B: + res = AARCH64_BUILTIN_crc32b; + break; + case AARCH64_BUILTIN_CRC32H: + res = AARCH64_BUILTIN_crc32h; + break; + case AARCH64_BUILTIN_CRC32W: + res = AARCH64_BUILTIN_crc32w; + break; + default: + break; + } + return res; +} typedef enum { diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 9b6d309a7a4..a0ca662bc8b 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -723,6 +723,7 @@ tree aarch64_general_fold_builtin (unsigned int, tree, unsigned int, tree *); gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *); rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); tree aarch64_general_builtin_decl (unsigned, bool); +unsigned get_crc_builtin_code(unsigned , bool); tree aarch64_general_builtin_rsqrt (unsigned int); tree aarch64_builtin_vectorized_function (unsigned int, tree, tree); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 10e037325e7..f37db029b89 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -13735,6 +13735,15 @@ aarch64_builtin_decl (unsigned int code, bool initialize_p) gcc_unreachable (); } +/* Implement TARGET_GET_CRC_BUILTIN_CODE. */ +static unsigned +aarch64_get_crc_builtin_code(unsigned code, bool initialize_p) +{ + unsigned subcode = get_crc_builtin_code(code,initialize_p); + unsigned res = subcode << AARCH64_BUILTIN_SHIFT; + return res; +} + /* Return true if it is safe and beneficial to use the approximate rsqrt optabs to optimize 1.0/sqrt. */ @@ -23911,6 +23920,9 @@ aarch64_run_selftests (void) #undef TARGET_BUILTIN_DECL #define TARGET_BUILTIN_DECL aarch64_builtin_decl +#undef TARGET_GET_CRC_BUILTIN_CODE +#define TARGET_GET_CRC_BUILTIN_CODE aarch64_get_crc_builtin_code + #undef TARGET_BUILTIN_RECIPROCAL #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 0508fce57a7..b46418d0b65 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -11610,6 +11610,15 @@ If @var{code} is out of range the function should return @code{error_mark_node}. @end deftypefn +@deftypefn {Target Hook} unsigned TARGET_GET_CRC_BUILTIN_CODE (unsigned @var{code}, bool @var{initialize_p}) +Define this hook to get crc32 builtin code. It should be a function that +returns the crc32 builtin function code @var{code}. +If there is no such builtin and it cannot be initialized at this time +if @var{initialize_p} is true the function should return @code{NULL_TREE}. +If @var{code} is out of range the function should return +@code{error_mark_node}. +@end deftypefn + @deftypefn {Target Hook} rtx TARGET_EXPAND_BUILTIN (tree @var{exp}, rtx @var{target}, rtx @var{subtarget}, machine_mode @var{mode}, int @var{ignore}) Expand a call to a machine specific built-in function that was set up by diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 3b70ea4841a..2663547c71f 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -7941,6 +7941,8 @@ to by @var{ce_info}. @hook TARGET_BUILTIN_DECL +@hook TARGET_GET_CRC_BUILTIN_CODE + @hook TARGET_EXPAND_BUILTIN @hook TARGET_RESOLVE_OVERLOADED_BUILTIN diff --git a/gcc/match.pd b/gcc/match.pd index 87b31695348..73c0cf72df4 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3637,10 +3637,9 @@ _5 = _4 ^ c_10; //BIT_XOR_EXPR (SSA_NAME@1, SSA_NAME) _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) */ (match (crc_match_index @1 @2 @3) - (bit_and (bit_xor (nop SSA_NAME@2) SSA_NAME@1) INTEGER_CST@3) + (bit_and (bit_xor (nop SSA_NAME@2) (nop SSA_NAME@1)) INTEGER_CST@3) (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@3) == 255)) ) - #endif #if GIMPLE @@ -3653,7 +3652,6 @@ c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) (bit_xor SSA_NAME@3 (rshift SSA_NAME@1 INTEGER_CST@2)) (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@2) == 8)) ) - #endif /* Simplification moved from fold_cond_expr_with_comparison. It may also diff --git a/gcc/target.def b/gcc/target.def index 2020564118b..34d3561bd53 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -2421,6 +2421,20 @@ If @var{code} is out of range the function should return\n\ @code{error_mark_node}.", tree, (unsigned code, bool initialize_p), NULL) +/* Initialize (if INITIALIZE_P is true) and return the real code of + target-specific built-in function . + Return NULL if that is not possible. Return error_mark_node if CODE + is outside of the range of valid crc32 codes. */ +DEFHOOK +(get_crc_builtin_code, + "Define this hook to get crc32 builtin code. It should be a function that\n\ +returns the crc32 builtin function code @var{code}.\n\ +If there is no such builtin and it cannot be initialized at this time\n\ +if @var{initialize_p} is true the function should return @code{NULL_TREE}.\n\ +If @var{code} is out of range the function should return\n\ +@code{error_mark_node}.", + unsigned , (unsigned code, bool initialize_p), NULL) + /* Expand a target-specific builtin. */ DEFHOOK (expand_builtin, diff --git a/gcc/tree-ssa-loop-crc.c b/gcc/tree-ssa-loop-crc.c index 4982384c607..2d45747aad2 100644 --- a/gcc/tree-ssa-loop-crc.c +++ b/gcc/tree-ssa-loop-crc.c @@ -1,5 +1,5 @@ -/* Array widen compare. - Copyright (C) 2022-2022 Free Software Foundation, Inc. +/* loop crc. + Copyright (C) 2023-2023 Free Software Foundation, Inc. This file is part of GCC. @@ -42,13 +42,178 @@ along with GCC; see the file COPYING3. If not see #include "print-tree.h" #include "cfghooks.h" #include "gimple-fold.h" +#include "diagnostic-core.h" -/* Match.pd function to match the ctz expression. */ extern bool gimple_crc_match_index (tree, tree *, tree (*)(tree)); extern bool gimple_crc_match_res (tree, tree *, tree (*)(tree)); static gimple *crc_table_read_stmt = NULL; +static gphi* phi_s = NULL; +static gphi* phi_c = NULL; +static tree nn_tree = NULL; + +enum aarch64_crc_builtins + { + AARCH64_BUILTIN_CRC32B, + AARCH64_BUILTIN_CRC32H, + AARCH64_BUILTIN_CRC32W, + }; + +/* The useful information of origin loop. */ +struct origin_loop_info +{ + tree limit; /* The limit index of the array in the old loop. */ + tree base_n; /* The initial value of the old loop. */ + tree base_s; /* The initial value of the old loop. */ + tree base_c; /* The initial value of the old loop. */ + edge entry_edge; /* The edge into the old loop. */ + edge exit_edge; /* The edge outto the old loop. */ + basic_block exit_bb; +}; + +typedef struct origin_loop_info origin_loop_info; + +static origin_loop_info origin_loop; +hash_map n_map; +hash_map nn_map; +hash_map s_map; +hash_map c_map; +hash_map crc_map; + +/* Initialize the origin_loop structure. */ +static void +init_origin_loop_structure () +{ + origin_loop.entry_edge = NULL; + origin_loop.exit_edge = NULL; + origin_loop.exit_bb = NULL; + origin_loop.limit = NULL; + origin_loop.base_n = NULL; + origin_loop.base_s = NULL; + origin_loop.base_c = NULL; +} + +/* Get the edge that first entered the loop. */ +static edge +get_loop_preheader_edge (class loop *loop) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, loop->header->preds) + if (e->src != loop->latch) + break; + + return e; +} + +/* Returns true if t is SSA_NAME and user variable exists. */ + +static bool +ssa_name_var_p (tree t) +{ + if (!t || TREE_CODE (t) != SSA_NAME) + return false; + if (SSA_NAME_VAR (t)) + return true; + return false; +} + +/* Returns true if t1 and t2 are SSA_NAME and belong to the same variable. */ + +static bool +same_ssa_name_var_p (tree t1, tree t2) +{ + if (!ssa_name_var_p (t1) || !ssa_name_var_p (t2)) + return false; + if (SSA_NAME_VAR (t1) == SSA_NAME_VAR (t2)) + return true; + return false; +} + +/* Get origin loop induction variable upper bound. */ + +static bool +get_iv_upper_bound (gimple *stmt) +{ + if (origin_loop.limit != NULL || origin_loop.base_n != NULL) + return false; + + tree lhs = gimple_cond_lhs (stmt); + tree rhs = gimple_cond_rhs (stmt); + + if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE + || TREE_CODE (TREE_TYPE (rhs)) != INTEGER_TYPE) + return false; + + gimple *g = SSA_NAME_DEF_STMT (rhs); + + /* TODO: Currently, the input restrictions on lhs and rhs are implemented + through PARM_DECL. We may consider releasing the restrictions later, and + we need to consider the overall adaptation scenario and adding test + cases. */ + if (ssa_name_var_p (lhs) && TREE_CODE (SSA_NAME_VAR (lhs)) == PARM_DECL) + { + origin_loop.limit = rhs; + origin_loop.base_n = lhs; + } + else + return false; + + if (origin_loop.limit != NULL && origin_loop.base_n != NULL) + return true; + + return false; +} + +/* Get origin loop info. */ +static bool +get_origin_loop_info(class loop *loop) +{ + vec edges; + edges = get_loop_exit_edges (loop); + origin_loop.exit_edge = edges[0]; + origin_loop.exit_bb = origin_loop.exit_edge->dest; + origin_loop.entry_edge = get_loop_preheader_edge(loop); + origin_loop.base_s = PHI_ARG_DEF_FROM_EDGE(phi_s,origin_loop.entry_edge); + origin_loop.base_c = PHI_ARG_DEF_FROM_EDGE(phi_c,origin_loop.entry_edge); + + basic_block preheader_bb; + preheader_bb = origin_loop.entry_edge->src; + + if(preheader_bb->preds->length() != 1) + return false; + + edge entry_pre_bb_edge; + entry_pre_bb_edge = EDGE_PRED (preheader_bb, 0); + + basic_block pre_preheader_bb; + pre_preheader_bb = entry_pre_bb_edge->src; + + gimple_stmt_iterator gsi; + gimple *stmt; + + for (gsi = gsi_last_bb (pre_preheader_bb); !gsi_end_p (gsi); gsi_prev (&gsi)) + { + stmt = gsi_stmt (gsi); + if (stmt && is_gimple_debug (stmt)) + continue; + if (stmt && gimple_code (stmt) == GIMPLE_COND) + { + if (!get_iv_upper_bound (stmt)) + return false; + } + else if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN) + { + continue; + } + else + return false; + } + + return true; +} /* The loop form check will check the entire loop control flow It should be a loop that: @@ -273,6 +438,7 @@ check_evolution_pattern (class loop* loop, gphi *capture[]) if (s != NULL) return false; s = capture[i]; + phi_s = s; } else if (evolution_pattern_plus_with_p(loop, capture[i], 4294967295)) { @@ -285,6 +451,7 @@ check_evolution_pattern (class loop* loop, gphi *capture[]) if (c != NULL) return false; c = capture[i]; + phi_c = c; } } @@ -304,6 +471,7 @@ check_evolution_pattern (class loop* loop, gphi *capture[]) static bool check_calculation_pattern (class loop* loop, gphi *capture[]) { + bool check = false; gphi *s=capture[0]; gphi *c=capture[2]; tree res_ops[3]; @@ -314,14 +482,25 @@ check_calculation_pattern (class loop* loop, gphi *capture[]) _5 = _4 ^ c_10; //BIT_XOR_EXPR (SSA_NAME, PHI @1) _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) */ - if (!gimple_crc_match_index(index, res_ops, NULL)) return false; gimple *s_res_stmt = SSA_NAME_DEF_STMT(res_ops[1]); tree s_res = TREE_OPERAND(gimple_assign_rhs1(s_res_stmt),0); if (res_ops[0] != gimple_phi_result (c) || s_res != gimple_phi_result (s)) - return false; + { + check = false; + } + if(!check) + { + s_res_stmt = SSA_NAME_DEF_STMT(res_ops[0]); + s_res = TREE_OPERAND(gimple_assign_rhs1(s_res_stmt),0); + if (res_ops[1] != gimple_phi_result (c) || + s_res != gimple_phi_result (s)) + { + return false; + } + } /* Try to match _8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) @@ -333,7 +512,11 @@ check_calculation_pattern (class loop* loop, gphi *capture[]) return false; if (res_ops[0] != gimple_phi_result (c) || res_ops[2] != gimple_assign_lhs(crc_table_read_stmt)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n gimple_crc_match_res pattern check failed.\n"); return false; + } return true; } @@ -419,102 +602,8 @@ crc_loop_body_check (class loop *loop) return false; } return true; -/* gphi *phi; - gphi_iterator gsi; - int num_of_phi = 0; - //s, n, c; - //only 3 phi nodes are there, every one of the phi nodes comming from 2 edge only, one from preheader, one from latch - // s increase by 1 every itoration - // n decrease by 1 every itoration - // The final one is c, which is the result, should be used for the start of the later pattern matching - for (gsi = gsi_start_phis(loop->header); !gsi_end_p(gsi); gsi_next(&gsi)) - { - phi = gsi.phi(); - - if (phi) num_of_phi++; - if (num_of_phi > 3) return false; // more then 3 phi node - if (gimple_phi_num_args(phi) > 2) // more than 2 edges other then one backedge and one preheader edge - return false; - //capture[num_of_phi - 1] = gimple_phi_result(phi); - capture[num_of_phi - 1] = phi; - } - if (num_of_phi != 3) return false; // phi node should be 3 */ - // Find the envolution pattern for s and n, try to match the identity of these variable -/* gphi *s=NULL; - gphi *n=NULL; - gphi *c=NULL; - - for (int i = 0; i < 3; i++) - { - if (evolution_pattern_plus_with_p(loop, capture[i], 1)) - { - if(s != NULL) - return false; - s = capture[i]; - } - else if (evolution_pattern_plus_with_p(loop, capture[i], 4294967295)) - { - if(n != NULL) - return false; - n = capture[i]; - } - else - { - if(c != NULL) - return false; - c = capture[i]; - } - } - - // some envolution pattern cannot find - if (!n || !s || !c) - return false; - gphi *s=capture[0]; - gphi *n=capture[1]; - gphi *c=capture[2]; - tree res_ops[3]; - tree index = TREE_OPERAND (gimple_assign_rhs1 (crc_table_read_stmt), 1); - - /* Try to match - _1 = (int) c_12; //NOP_EXPR (SSA_NAME @1) - _4 = (int) _3; //NOP_EXPR (SSA_NAME @2) - _5 = _1 ^ _4; //BIT_XOR_EXPR (SSA_NAME, SSA_NAME) - _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) - - - if (!gimple_crc_match_index(index, res_ops, NULL)) - return false; - gimple *s_res_stmt = SSA_NAME_DEF_STMT(res_ops[1]); - tree s_res = TREE_OPERAND(gimple_assign_rhs1(s_res_stmt),0); - if (res_ops[0] != gimple_phi_result (c) || - s_res != gimple_phi_result (s)) - return false; - - /* -_8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) -c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) - - edge backedge = find_edge(loop->latch, loop->header); - tree updated_c = PHI_ARG_DEF_FROM_EDGE (c, backedge); - if (!gimple_crc_match_res(updated_c, res_ops, NULL)) - return false; - if (res_ops[0] != gimple_phi_result (c) - || res_ops[2] != gimple_assign_lhs(crc_table_read_stmt)) - return false; - - // try match n as the induction variable - // The proceed condition for back edge is n != 0 - gimple *cond_stmt = gsi_stmt (gsi_last_bb (loop->header)); - if (!cond_stmt || gimple_code (cond_stmt) != GIMPLE_COND || gimple_cond_code (cond_stmt) != NE_EXPR - || gimple_cond_lhs (cond_stmt) != PHI_ARG_DEF_FROM_EDGE (n, backedge) - || tree_to_uhwi(gimple_cond_rhs (cond_stmt)) != 0) - return false; - - return true; - */ } - static bool match_crc_loop (class loop *loop) { @@ -536,9 +625,447 @@ match_crc_loop (class loop *loop) fprintf (dump_file, "\nWrong loop body for crc matching.\n"); return false; } + + init_origin_loop_structure(); + if(!get_origin_loop_info(loop)) + return false; + return true; } +static void +create_new_bb (basic_block &new_bb, basic_block after_bb, + basic_block dominator_bb, class loop *outer) +{ + new_bb = create_empty_bb (after_bb); + add_bb_to_loop (new_bb, outer); + set_immediate_dominator (CDI_DOMINATORS, new_bb, dominator_bb); +} + +static void +change_preheader_bb(edge entry_edge) +{ + gimple_seq stmts = NULL; + gimple_stmt_iterator gsi; + gimple* g; + tree lhs1; + + lhs1 = create_tmp_var(TREE_TYPE(origin_loop.base_n),"nn"); + lhs1 = make_ssa_name(lhs1); + gsi = gsi_last_bb (entry_edge->src); + g = gimple_build_assign(lhs1,RSHIFT_EXPR,origin_loop.base_n, + build_int_cst (TREE_TYPE (origin_loop.base_n), 2)); + gimple_seq_add_stmt(&stmts,g); + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); + nn_tree = lhs1; + set_current_def(nn_tree, lhs1); + nn_map.put (entry_edge->src, lhs1); +} + +static gphi* +create_phi_node_for_bb(tree old_name, basic_block bb) +{ + gphi *phi = create_phi_node(NULL_TREE, bb); + create_new_def_for(old_name, phi, gimple_phi_result_ptr(phi)); + return phi; +} + +static gimple* +call_builtin_fun(int code,tree& lhs, tree arg1, tree arg2) +{ + unsigned int builtin_code = targetm.get_crc_builtin_code(code, true);// 根据code获取到正确的builtin_fun_code + tree fn = targetm.builtin_decl(builtin_code,true); // get the decl of __builtin_aarch64_crc32w + if (!fn || fn == error_mark_node) + fatal_error (input_location, + "target specific builtin not available"); + gimple* call_builtin = gimple_build_call(fn, 2, arg1, arg2); // _40 = __builtin_aarch64_crc32* (_1, _2); + lhs = make_ssa_name (unsigned_type_node); + gimple_call_set_lhs(call_builtin,lhs); + + return call_builtin; +} + +/* Create loop_header and loop_latch for new loop + : + # s_14 = PHI + # c_16 = PHI + # nn_19 = PHI + _1 = (unsigned int) c_16; + _2 = MEM[(uint32_t *)s_14]; + _40 = __builtin_aarch64_crc32w (_1, _2); + c_29 = (long unsigned int) _40; + s_30 = s_14 + 4; + nn_31 = nn_19 + 4294967295; + if (nn_31 != 0) + The IR of bb is as above. */ +static void +create_loop_bb(basic_block& loop_bb, basic_block after_bb, + basic_block dominator_bb, class loop *outer, edge entry_edge) +{ + gimple_seq stmts = NULL; + gimple_stmt_iterator gsi; + gimple* g; + gphi* phi_s_loop; + gphi* phi_c_loop; + gphi* phi_nn_loop; + + create_new_bb(loop_bb, after_bb, dominator_bb, outer); + redirect_edge_and_branch(entry_edge, loop_bb); + gsi = gsi_last_bb(loop_bb); + tree entry_nn = get_current_def(nn_tree); + phi_s_loop = create_phi_node_for_bb(origin_loop.base_s, loop_bb); + phi_c_loop = create_phi_node_for_bb(origin_loop.base_c, loop_bb); + phi_nn_loop = create_phi_node_for_bb(entry_nn, loop_bb); + + tree res_s = gimple_phi_result(phi_s_loop); + tree res_nn = gimple_phi_result(phi_nn_loop); + tree lhs1 = gimple_build(&stmts, NOP_EXPR, unsigned_type_node, + gimple_phi_result(phi_c_loop)); + g = gimple_build_assign(make_ssa_name(unsigned_type_node), + fold_build2(MEM_REF,unsigned_type_node,res_s, + build_int_cst (build_pointer_type (unsigned_type_node), 0))); + gimple_seq_add_stmt(&stmts, g); + tree lhs2 = gimple_assign_lhs(g); // _2 = MEM[(uint32_t *)s_14]; + unsigned int code = AARCH64_BUILTIN_CRC32W; + tree lhs3; + gimple* build_crc32w = call_builtin_fun(code,lhs3, lhs1, lhs2); + crc_map.put(loop_bb, lhs3); + gimple_seq_add_stmt(&stmts,build_crc32w); + + tree lhs4 = copy_ssa_name(origin_loop.base_c); + g = gimple_build_assign(lhs4, NOP_EXPR, lhs3); + gimple_seq_add_stmt(&stmts, g); + c_map.put(loop_bb, lhs4); + + tree lhs5 = copy_ssa_name(origin_loop.base_s); + g = gimple_build_assign(lhs5, POINTER_PLUS_EXPR, res_s, + build_int_cst (sizetype, 4)); + gimple_seq_add_stmt(&stmts, g); + s_map.put(loop_bb, lhs5); + + tree lhs6 = copy_ssa_name(nn_tree); + g = gimple_build_assign(lhs6, PLUS_EXPR, res_nn, + build_int_cst (TREE_TYPE (res_nn), 4294967295)); + gimple_seq_add_stmt(&stmts,g); + nn_map.put(loop_bb, lhs6); + + gcond* cond_stmt = gimple_build_cond (NE_EXPR, lhs6, origin_loop.limit, + NULL_TREE, NULL_TREE); + gimple_seq_add_stmt (&stmts, cond_stmt); + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); +} + +/* : + # c_6 = PHI + # s_46 = PHI + _44 = n_26(D) & 2; + if (_44 != 0) + The IR of bb is as above. */ +static void +create_cond_bb(basic_block& cond_bb, basic_block after_bb, + basic_block dominator_bb, class loop *outer){ + gimple_seq stmts = NULL; + gimple_stmt_iterator gsi; + gphi* phi_s_loop; + gphi* phi_c_loop; + + create_new_bb(cond_bb, after_bb, dominator_bb, outer); + gsi = gsi_last_bb(cond_bb); + tree entry_nn = get_current_def(nn_tree); + phi_s_loop = create_phi_node_for_bb(origin_loop.base_s, cond_bb); + phi_c_loop = create_phi_node_for_bb(origin_loop.base_c, cond_bb); + tree res_s = gimple_phi_result(phi_s_loop); + set_current_def(origin_loop.base_s, res_s); + s_map.put(cond_bb, res_s); + tree res_c = gimple_phi_result(phi_c_loop); + set_current_def(origin_loop.base_c, res_c); + c_map.put(cond_bb, res_c); + + tree lhs1 = gimple_build(&stmts, BIT_AND_EXPR, TREE_TYPE(origin_loop.base_n), + origin_loop.base_n, build_int_cst (TREE_TYPE (origin_loop.base_n), 2)); + gcond* cond_stmt = gimple_build_cond (NE_EXPR, lhs1, origin_loop.limit, + NULL_TREE, NULL_TREE); + gimple_seq_add_stmt (&stmts, cond_stmt); + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); +} + +/* : + _7 = MEM[(uint16_t *)s_46]; + _41 = __builtin_aarch64_crc32h (_8, _7); + c_33 = (long unsigned int) _41; + s_34 = s_30 + 2; + The IR of bb is as above.*/ +static void +create_cond_true_bb(basic_block& cond_true_bb, basic_block after_bb, + basic_block dominator_bb, class loop *outer){ + gimple_seq stmts = NULL; + gimple* g; + gimple_stmt_iterator gsi; + + create_new_bb(cond_true_bb, after_bb, dominator_bb, outer); + gsi = gsi_last_bb(cond_true_bb); + tree s_46 = *(s_map.get(after_bb)); + g = gimple_build_assign(make_ssa_name(short_unsigned_type_node), + fold_build2(MEM_REF,short_unsigned_type_node,s_46, + build_int_cst (build_pointer_type (short_unsigned_type_node), 0))); + gimple_seq_add_stmt(&stmts,g); + tree lhs1 = gimple_assign_lhs(g); // _7 = MEM[(uint16_t *)s_46]; + unsigned int code = AARCH64_BUILTIN_CRC32H; + tree lhs2; + gimple* call_builtin = call_builtin_fun(code, lhs2,*(crc_map.get(cond_true_bb->prev_bb->prev_bb)),lhs1); + crc_map.put(cond_true_bb,lhs2); + gimple_seq_add_stmt(&stmts, call_builtin); + + tree lhs3 = copy_ssa_name(origin_loop.base_c); + g = gimple_build_assign(lhs3, NOP_EXPR, lhs2); + gimple_seq_add_stmt(&stmts, g); + c_map.put(cond_true_bb, lhs3); + + tree lhs5 = copy_ssa_name(s_46); + g = gimple_build_assign(lhs5, POINTER_PLUS_EXPR, s_46, + build_int_cst (sizetype, 2)); // s_30 + 2; + gimple_seq_add_stmt(&stmts, g); + s_map.put(cond_true_bb, lhs5); + + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); + s_map.put(cond_true_bb, lhs5); +} + +/* : + # s_15 = PHI + # c_17 = PHI + _3 = n_26(D) & 1; + if (_3 != 0) + The IR of bb is as above.*/ +static void +create_cond_false_bb(basic_block& cond_false_bb, basic_block after_bb, + basic_block dominator_bb, class loop *outer) +{ + gimple_seq stmts = NULL; + gimple_stmt_iterator gsi; + gphi* phi_s_cond_true_bb; + gphi* phi_c_cond_true_bb; + + create_new_bb(cond_false_bb, after_bb, dominator_bb, outer); + make_single_succ_edge(after_bb, cond_false_bb, EDGE_FALLTHRU); + + tree entry_s = get_current_def(origin_loop.base_s); + phi_s_cond_true_bb = create_phi_node_for_bb(entry_s, cond_false_bb); + tree entry_c = get_current_def(origin_loop.base_c); + phi_c_cond_true_bb = create_phi_node_for_bb(entry_c, cond_false_bb); + tree res_s = gimple_phi_result(phi_s_cond_true_bb); + set_current_def(origin_loop.base_s, res_s); + s_map.put(cond_false_bb, res_s); + tree res_c = gimple_phi_result(phi_c_cond_true_bb); + set_current_def(origin_loop.base_c, res_c); + c_map.put(cond_false_bb, res_c); + + gsi = gsi_last_bb(cond_false_bb); + tree lhs1 = gimple_build(&stmts, BIT_AND_EXPR, TREE_TYPE(origin_loop.base_n), + origin_loop.base_n, build_int_cst (TREE_TYPE (origin_loop.base_n), 1)); + gcond* cond_stmt = gimple_build_cond (NE_EXPR, lhs1, origin_loop.limit, + NULL_TREE, NULL_TREE); + gimple_seq_add_stmt (&stmts, cond_stmt); + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); +} + +/* : + _11 = (unsigned int) c_17; + _12 = *s_15; + _42 = __builtin_aarch64_crc32b (_11, _12); + c_36 = (long unsigned int) _42; + The IR of bb is as above. */ +static void +create_lastcond_true_bb(basic_block& new_bb, basic_block after_bb, + basic_block dominator_bb, class loop *outer){ + gimple_seq stmts = NULL; + gimple_stmt_iterator gsi; + gimple* g; + + create_new_bb(new_bb, after_bb, dominator_bb, outer); + gsi = gsi_last_bb(new_bb); + + tree lhs1 = gimple_build(&stmts, NOP_EXPR, unsigned_type_node, + get_current_def(origin_loop.base_c)); + tree lhs2; + tree s_15 = get_current_def(origin_loop.base_s); + g = gimple_build_assign (make_ssa_name (unsigned_char_type_node), + fold_build2 (MEM_REF, unsigned_char_type_node, s_15, + build_int_cst (TREE_TYPE(s_15), 0))); + gimple_seq_add_stmt (&stmts, g); + lhs2 = gimple_assign_lhs (g); + + unsigned int code = AARCH64_BUILTIN_CRC32B; + tree lhs3; + gimple* call_builtin = call_builtin_fun(code, lhs3, lhs1, lhs2); + crc_map.put(new_bb,lhs3); + gimple_seq_add_stmt(&stmts,call_builtin); + + tree lhs4 = copy_ssa_name(origin_loop.base_c); + g = gimple_build_assign(lhs4, NOP_EXPR, lhs3); + gimple_seq_add_stmt(&stmts, g); + c_map.put(new_bb, lhs4); + + gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); +} + +static bool +optional_add_phi_arg(gphi * phi, tree phi_res, tree phi_arg, edge e) +{ + location_t loc; + if (same_ssa_name_var_p (phi_arg, phi_res)) + { + if (virtual_operand_p (phi_arg)) + loc = UNKNOWN_LOCATION; + else + loc = gimple_location (SSA_NAME_DEF_STMT (phi_arg)); + add_phi_arg (phi, phi_arg, e, loc); + + return true; + } + + return false; +} + +/* Add phi_arg for bb with phi node. */ +static void +update_phi_nodes (basic_block bb) +{ + edge e; + edge_iterator ei; + gphi *phi; + gphi_iterator gsi; + tree res; + + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + phi = gsi.phi (); + res = gimple_phi_result (phi); + + FOR_EACH_EDGE (e, ei, bb->preds) + { + if (PHI_ARG_DEF_FROM_EDGE (phi, e)) + continue; + tree var_c; + tree* ptr_var_c = c_map.get (e->src); + if(ptr_var_c == nullptr) + { + var_c = origin_loop.base_c; + } else { + var_c = *ptr_var_c; + } + if(optional_add_phi_arg(phi, res, var_c, e)) + continue; + + tree var_nn; + tree* ptr_var_nn = nn_map.get (e->src); + if(ptr_var_nn == nullptr) + { + var_nn = nn_tree; + } else { + var_nn = *ptr_var_nn; + } + if(optional_add_phi_arg(phi, res, var_nn, e)) + continue; + + tree var_s; + tree* ptr_var_s = s_map.get (e->src); + if(ptr_var_s == nullptr) + { + var_s = origin_loop.base_s; + } else { + var_s = *ptr_var_s; + } + if(optional_add_phi_arg(phi, res, var_s, e)) + continue; + } + } +} + +static void +create_new_loops(edge entry_edge) +{ + class loop* new_loop = nullptr; + basic_block loop_bb, cond_bb, cond_true_bb, cond_false_bb, lastcond_true_bb; + class loop *outer = entry_edge->src->loop_father; + change_preheader_bb(entry_edge); + + create_loop_bb(loop_bb, entry_edge->src, entry_edge->src, outer, entry_edge); + create_cond_bb(cond_bb, loop_bb, loop_bb, outer); + make_edge(loop_bb, loop_bb, EDGE_TRUE_VALUE); + make_edge(loop_bb, cond_bb, EDGE_FALSE_VALUE); + update_phi_nodes(loop_bb); + + new_loop = alloc_loop (); + new_loop->header = loop_bb; + new_loop->latch = loop_bb; + add_loop (new_loop, outer); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nPrint byte new loop %d:\n", new_loop->num); + flow_loop_dump (new_loop, dump_file, NULL, 1); + fprintf (dump_file, "\n\n"); + } + + create_cond_true_bb(cond_true_bb, cond_bb, cond_bb, outer); + make_edge(cond_bb, cond_true_bb, EDGE_TRUE_VALUE); + create_cond_false_bb(cond_false_bb, cond_true_bb, cond_bb, outer); + make_edge(cond_bb, cond_false_bb, EDGE_FALSE_VALUE); + update_phi_nodes(cond_bb); + update_phi_nodes(cond_false_bb); + create_lastcond_true_bb(lastcond_true_bb, cond_false_bb, cond_false_bb, outer); + make_edge(cond_false_bb, lastcond_true_bb, EDGE_TRUE_VALUE); + make_edge(cond_false_bb, origin_loop.exit_bb, EDGE_FALSE_VALUE); + make_single_succ_edge(lastcond_true_bb, origin_loop.exit_bb, EDGE_FALLTHRU); + + update_phi_nodes(origin_loop.exit_bb); + remove_edge(origin_loop.exit_edge); +} + +/* Clear information about the original loop. */ +static void +remove_origin_loop(class loop* loop) +{ + basic_block* body = get_loop_body_in_dom_order(loop); + unsigned n = loop->num_nodes; + for(int i = 0; i < n; ++i) + { + delete_basic_block(body[i]); + } + free(body); + delete_loop(loop); +} + +/* Make sure that the dominance relationship of the newly inserted cfg + is not missing. */ +static void +update_loop_dominator(cdi_direction dir) +{ + gcc_assert (dom_info_available_p (dir)); + + basic_block bb; + FOR_EACH_BB_FN (bb, cfun) + { + basic_block imm_bb = get_immediate_dominator (dir, bb); + if (!imm_bb || bb == origin_loop.exit_bb) + { + set_immediate_dominator (CDI_DOMINATORS, bb, + recompute_dominator (CDI_DOMINATORS, bb)); + continue; + } + } +} + +/* Perform the conversion of origin_loop to new_loop. */ +static void +convert_to_new_loop (class loop *loop) +{ + create_new_loops (origin_loop.entry_edge); + remove_origin_loop (loop); + update_loop_dominator (CDI_DOMINATORS); + update_ssa (TODO_update_ssa); +} + /* The main entry of loop crc optimizes. */ static unsigned int tree_ssa_loop_crc () @@ -641,4 +1168,4 @@ gimple_opt_pass * make_pass_loop_crc (gcc::context *ctxt) { return new pass_loop_crc (ctxt); -} \ No newline at end of file +} -- Gitee