diff --git a/0049-build-Add-some-file-right-to-executable.patch b/0049-build-Add-some-file-right-to-executable.patch new file mode 100644 index 0000000000000000000000000000000000000000..b9aa9b924649bbc7fab2eade1aecbc9adbf68f21 --- /dev/null +++ b/0049-build-Add-some-file-right-to-executable.patch @@ -0,0 +1,21 @@ +From 7dffda64fcbbd522616d7dc9c70530d146f4fed6 Mon Sep 17 00:00:00 2001 +From: zhongyunde +Date: Tue, 1 Nov 2022 16:38:38 +0800 +Subject: [PATCH 01/35] [build] Add some file right to executable + +--- + libgcc/mkheader.sh | 0 + move-if-change | 0 + 2 files changed, 0 insertions(+), 0 deletions(-) + mode change 100644 => 100755 libgcc/mkheader.sh + mode change 100644 => 100755 move-if-change + +diff --git a/libgcc/mkheader.sh b/libgcc/mkheader.sh +old mode 100644 +new mode 100755 +diff --git a/move-if-change b/move-if-change +old mode 100644 +new mode 100755 +-- +2.27.0.windows.1 + diff --git a/0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch b/0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch new file mode 100644 index 0000000000000000000000000000000000000000..d3acf4cef5228e8c9120db680fa198025bc38657 --- /dev/null +++ b/0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch @@ -0,0 +1,186 @@ +From c690da762e873d0f5c66ea084e420ba4842354a6 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Wed, 4 Nov 2020 11:55:29 +0100 +Subject: [PATCH 02/35] [Backport] phiopt: Optimize x ? 1024 : 0 to (int) x << + 10 [PR97690] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3e190757fa332d327bee27495f37beb01155cfab + +The following patch generalizes the x ? 1 : 0 -> (int) x optimization +to handle also left shifts by constant. + +During x86_64-linux and i686-linux bootstraps + regtests it triggered +in 1514 unique non-LTO -m64 cases (sort -u on log mentioning +filename, function name and shift count) and 1866 -m32 cases. + +Unfortunately, the patch regresses (before the tests have been adjusted): ++FAIL: gcc.dg/tree-ssa/ssa-ccp-11.c scan-tree-dump-times optimized "if " 0 ++FAIL: gcc.dg/vect/bb-slp-pattern-2.c -flto -ffat-lto-objects scan-tree-dump-times slp1 "optimized: basic block" 1 ++FAIL: gcc.dg/vect/bb-slp-pattern-2.c scan-tree-dump-times slp1 "optimized: basic block" 1 +and in both cases it actually results in worse code. + +> > We'd need some optimization that would go through all PHI edges and +> > compute if some use of the phi results don't actually compute a constant +> > across all the PHI edges - 1 & 0 and 0 & 1 is always 0. + +> PRE should do this, IMHO only optimizing it at -O2 is fine. + +> > Similarly, in the slp vectorization test there is: +> > a[0] = b[0] ? 1 : 7; + +> note this, carefully avoiding the already "optimized" b[0] ? 1 : 0 ... + +> So the option is to put : 7 in the 2, 4 an 8 case as well. The testcase +> wasn't added for any real-world case but is artificial I guess for +> COND_EXPR handling of invariants. + +> But yeah, for things like SLP it means we eventually have to +> implement reverse transforms for all of this to make the lanes +> matching. But that's true anyway for things like x + 1 vs. x + 0 +> or x / 3 vs. x / 2 or other simplifications we do. + +2020-11-04 Jakub Jelinek + + PR tree-optimization/97690 + * tree-ssa-phiopt.c (conditional_replacement): Also optimize + cond ? pow2p_cst : 0 as ((type) cond) << cst. + + * gcc.dg/tree-ssa/phi-opt-22.c: New test. + * gcc.dg/tree-ssa/ssa-ccp-11.c: Use -O2 instead of -O1. + * gcc.dg/vect/bb-slp-pattern-2.c (foo): Use ? 2 : 7, ? 4 : 7 and + ? 8 : 7 instead of ? 2 : 0, ? 4 : 0, ? 8 : 0. +--- + gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c | 11 ++++++ + gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c | 2 +- + gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c | 6 ++-- + gcc/tree-ssa-phiopt.c | 38 ++++++++++++++------ + 4 files changed, 43 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c +new file mode 100644 +index 000000000..fd3706666 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c +@@ -0,0 +1,11 @@ ++/* PR tree-optimization/97690 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-phiopt2" } */ ++ ++int foo (_Bool d) { return d ? 2 : 0; } ++int bar (_Bool d) { return d ? 1 : 0; } ++int baz (_Bool d) { return d ? -__INT_MAX__ - 1 : 0; } ++int qux (_Bool d) { return d ? 1024 : 0; } ++ ++/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " << " 3 "phiopt2" } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c +index 36b8e7fc8..d70ea5a01 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O1 -fdump-tree-optimized" } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ + + /* Test for CPROP across a DAG. */ + +diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c +index d32cb7585..e64f0115a 100644 +--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c ++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c +@@ -13,13 +13,13 @@ foo (short * __restrict__ a, int * __restrict__ b, int stride) + for (i = 0; i < N/stride; i++, a += stride, b += stride) + { + a[0] = b[0] ? 1 : 7; +- a[1] = b[1] ? 2 : 0; ++ a[1] = b[1] ? 2 : 7; + a[2] = b[2] ? 3 : 0; +- a[3] = b[3] ? 4 : 0; ++ a[3] = b[3] ? 4 : 7; + a[4] = b[4] ? 5 : 0; + a[5] = b[5] ? 6 : 0; + a[6] = b[6] ? 7 : 0; +- a[7] = b[7] ? 8 : 0; ++ a[7] = b[7] ? 8 : 7; + } + } + +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 591b6435f..85587e8d1 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -753,7 +753,9 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb, + gimple_stmt_iterator gsi; + edge true_edge, false_edge; + tree new_var, new_var2; +- bool neg; ++ bool neg = false; ++ int shift = 0; ++ tree nonzero_arg; + + /* FIXME: Gimplification of complex type is too hard for now. */ + /* We aren't prepared to handle vectors either (and it is a question +@@ -764,14 +766,22 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb, + || POINTER_TYPE_P (TREE_TYPE (arg1)))) + return false; + +- /* The PHI arguments have the constants 0 and 1, or 0 and -1, then +- convert it to the conditional. */ +- if ((integer_zerop (arg0) && integer_onep (arg1)) +- || (integer_zerop (arg1) && integer_onep (arg0))) +- neg = false; +- else if ((integer_zerop (arg0) && integer_all_onesp (arg1)) +- || (integer_zerop (arg1) && integer_all_onesp (arg0))) ++ /* The PHI arguments have the constants 0 and 1, or 0 and -1 or ++ 0 and (1 << cst), then convert it to the conditional. */ ++ if (integer_zerop (arg0)) ++ nonzero_arg = arg1; ++ else if (integer_zerop (arg1)) ++ nonzero_arg = arg0; ++ else ++ return false; ++ if (integer_all_onesp (nonzero_arg)) + neg = true; ++ else if (integer_pow2p (nonzero_arg)) ++ { ++ shift = tree_log2 (nonzero_arg); ++ if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg))) ++ return false; ++ } + else + return false; + +@@ -783,12 +793,12 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb, + falls through into BB. + + There is a single PHI node at the join point (BB) and its arguments +- are constants (0, 1) or (0, -1). ++ are constants (0, 1) or (0, -1) or (0, (1 << shift)). + + So, given the condition COND, and the two PHI arguments, we can + rewrite this PHI into non-branching code: + +- dest = (COND) or dest = COND' ++ dest = (COND) or dest = COND' or dest = (COND) << shift + + We use the condition as-is if the argument associated with the + true edge has the value one or the argument associated with the +@@ -823,6 +833,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb, + cond = fold_build1_loc (gimple_location (stmt), + NEGATE_EXPR, TREE_TYPE (cond), cond); + } ++ else if (shift) ++ { ++ cond = fold_convert_loc (gimple_location (stmt), ++ TREE_TYPE (result), cond); ++ cond = fold_build2_loc (gimple_location (stmt), ++ LSHIFT_EXPR, TREE_TYPE (cond), cond, ++ build_int_cst (integer_type_node, shift)); ++ } + + /* Insert our new statements at the end of conditional block before the + COND_STMT. */ +-- +2.27.0.windows.1 + diff --git a/0051-Backport-phiopt-Fix-up-conditional_replacement-PR993.patch b/0051-Backport-phiopt-Fix-up-conditional_replacement-PR993.patch new file mode 100644 index 0000000000000000000000000000000000000000..3d4767010979755fd540f57eaf24ad12b90f121d --- /dev/null +++ b/0051-Backport-phiopt-Fix-up-conditional_replacement-PR993.patch @@ -0,0 +1,92 @@ +From 79a974bc7bb67cf425a7839f3c1f5689e41c7ee8 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Tue, 9 Mar 2021 19:13:11 +0100 +Subject: [PATCH 03/35] [Backport] phiopt: Fix up conditional_replacement + [PR99305] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b610c30453d8e4cc88693d85a5a100d089640be5 + +Before my PR97690 changes, conditional_replacement would not set neg +when the nonzero arg was boolean true. +I've simplified the testing, so that it first finds the zero argument +and then checks the other argument for all the handled cases +(1, -1 and 1 << X, where the last case is what the patch added support for). +But, unfortunately I've placed the integer_all_onesp test first. +For unsigned precision 1 types such as bool integer_all_onesp, integer_onep +and integer_pow2p can all be true and the code set neg to true in that case, +which is undesirable. + +The following patch tests integer_pow2p first (which is trivially true +for integer_onep too and tree_log2 in that case gives shift == 0) +and only if that isn't the case, integer_all_onesp. + +2021-03-09 Jakub Jelinek + + PR tree-optimization/99305 + * tree-ssa-phiopt.c (conditional_replacement): Test integer_pow2p + before integer_all_onesp instead of vice versa. + + * g++.dg/opt/pr99305.C: New test. +--- + gcc/testsuite/g++.dg/opt/pr99305.C | 26 ++++++++++++++++++++++++++ + gcc/tree-ssa-phiopt.c | 6 +++--- + 2 files changed, 29 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/g++.dg/opt/pr99305.C + +diff --git a/gcc/testsuite/g++.dg/opt/pr99305.C b/gcc/testsuite/g++.dg/opt/pr99305.C +new file mode 100644 +index 000000000..8a91277e7 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/opt/pr99305.C +@@ -0,0 +1,26 @@ ++// PR tree-optimization/99305 ++// { dg-do compile } ++// { dg-options "-O3 -fno-ipa-icf -fdump-tree-optimized" } ++// { dg-final { scan-tree-dump-times " = \\\(unsigned char\\\) c_\[0-9]*\\\(D\\\);" 3 "optimized" } } ++// { dg-final { scan-tree-dump-times " = \[^\n\r]* \\+ \[0-9]*;" 3 "optimized" } } ++// { dg-final { scan-tree-dump-times " = \[^\n\r]* <= 9;" 3 "optimized" } } ++// { dg-final { scan-tree-dump-not "if \\\(c_\[0-9]*\\\(D\\\) \[!=]= 0\\\)" "optimized" } } ++// { dg-final { scan-tree-dump-not " = PHI <" "optimized" } } ++ ++bool ++foo (char c) ++{ ++ return c >= 48 && c <= 57; ++} ++ ++bool ++bar (char c) ++{ ++ return c != 0 && foo (c); ++} ++ ++bool ++baz (char c) ++{ ++ return c != 0 && c >= 48 && c <= 57; ++} +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 85587e8d1..b9be28474 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -774,14 +774,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb, + nonzero_arg = arg0; + else + return false; +- if (integer_all_onesp (nonzero_arg)) +- neg = true; +- else if (integer_pow2p (nonzero_arg)) ++ if (integer_pow2p (nonzero_arg)) + { + shift = tree_log2 (nonzero_arg); + if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg))) + return false; + } ++ else if (integer_all_onesp (nonzero_arg)) ++ neg = true; + else + return false; + +-- +2.27.0.windows.1 + diff --git a/0052-Backport-phiopt-Handle-bool-in-two_value_replacement.patch b/0052-Backport-phiopt-Handle-bool-in-two_value_replacement.patch new file mode 100644 index 0000000000000000000000000000000000000000..9ea9a4240c9f467d465be3194bfbc0da9f353a3a --- /dev/null +++ b/0052-Backport-phiopt-Handle-bool-in-two_value_replacement.patch @@ -0,0 +1,122 @@ +From 09263d5ed4d81a008ca8ffcc2883dc766e7874d5 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Sun, 6 Dec 2020 10:58:10 +0100 +Subject: [PATCH 04/35] [Backport] phiopt: Handle bool in two_value_replacement + [PR796232] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=8c23434fdadcf4caa1f0e966294c5f67ccf4bcf9 + +The following patch improves code generation on the included testcase by +enabling two_value_replacement on booleans. It does that only for arg0/arg1 +values that conditional_replacement doesn't handle. Additionally +it limits two_value_replacement optimization to the late phiopt like +conditional_replacement. + +2020-12-06 Jakub Jelinek + + PR tree-optimization/96232 + * tree-ssa-phiopt.c (two_value_replacement): Optimize even boolean lhs + cases as long as arg0 has wider precision and conditional_replacement + doesn't handle that case. + (tree_ssa_phiopt_worker): Don't call two_value_replacement during + early phiopt. + + * gcc.dg/tree-ssa/pr96232-2.c: New test. + * gcc.dg/tree-ssa/pr88676-2.c: Check phiopt2 dump rather than phiopt1. +--- + gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c | 4 ++-- + gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c | 18 ++++++++++++++++++ + gcc/tree-ssa-phiopt.c | 23 +++++++++++++++++++---- + 3 files changed, 39 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c +index 0e616365b..ea88407b6 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c +@@ -1,7 +1,7 @@ + /* PR tree-optimization/88676 */ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdump-tree-phiopt1" } */ +-/* { dg-final { scan-tree-dump-not " = PHI <" "phiopt1" { target le } } } */ ++/* { dg-options "-O2 -fdump-tree-phiopt2" } */ ++/* { dg-final { scan-tree-dump-not " = PHI <" "phiopt2" { target le } } } */ + + struct foo1 { + int i:1; +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c +new file mode 100644 +index 000000000..9f51820ed +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c +@@ -0,0 +1,18 @@ ++/* PR tree-optimization/96232 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++/* { dg-final { scan-tree-dump " 38 - " "optimized" } } */ ++/* { dg-final { scan-tree-dump " \\+ 97;" "optimized" } } */ ++/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */ ++ ++int ++foo (_Bool x) ++{ ++ return x ? 37 : 38; ++} ++ ++int ++bar (_Bool x) ++{ ++ return x ? 98 : 97; ++} +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index b9be28474..0623d740d 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -339,7 +339,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + } + + /* Do the replacement of conditional if it can be done. */ +- if (two_value_replacement (bb, bb1, e2, phi, arg0, arg1)) ++ if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p + && conditional_replacement (bb, bb1, e1, e2, phi, +@@ -636,7 +636,6 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb, + + if (TREE_CODE (lhs) != SSA_NAME + || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) +- || TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE + || TREE_CODE (rhs) != INTEGER_CST) + return false; + +@@ -649,9 +648,25 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb, + return false; + } + ++ /* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to ++ conditional_replacement. */ ++ if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE ++ && (integer_zerop (arg0) ++ || integer_zerop (arg1) ++ || TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE ++ || (TYPE_PRECISION (TREE_TYPE (arg0)) ++ <= TYPE_PRECISION (TREE_TYPE (lhs))))) ++ return false; ++ + wide_int min, max; +- if (get_range_info (lhs, &min, &max) != VR_RANGE +- || min + 1 != max ++ if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE) ++ { ++ min = wi::to_wide (boolean_false_node); ++ max = wi::to_wide (boolean_true_node); ++ } ++ else if (get_range_info (lhs, &min, &max) != VR_RANGE) ++ return false; ++ if (min + 1 != max + || (wi::to_wide (rhs) != min + && wi::to_wide (rhs) != max)) + return false; +-- +2.27.0.windows.1 + diff --git a/0053-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch b/0053-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch new file mode 100644 index 0000000000000000000000000000000000000000..15c260ed08a97822b1fcf5336b276dfa1251e827 --- /dev/null +++ b/0053-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch @@ -0,0 +1,256 @@ +From a92cf465f10585350f7cd5739457c3f2852cfc86 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Wed, 21 Oct 2020 10:51:33 +0200 +Subject: [PATCH 05/35] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32 + in GIMPLE [PR97503] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=5244b4af5e47bc98a2a9cf36f048981583a1b163 + +While we have at the RTL level noce_try_ifelse_collapse combined with +simplify_cond_clz_ctz, that optimization doesn't always trigger because +e.g. on powerpc there is an define_insn to compare a reg against zero and +copy that register to another one and so we end up with a different pseudo +in the simplify_cond_clz_ctz test and punt. + +For targets that define C?Z_DEFINED_VALUE_AT_ZERO to 2 for certain modes, +we can optimize it already in phiopt though, just need to ensure that +we transform the __builtin_c?z* calls into .C?Z ifns because my recent +VRP changes codified that the builtin calls are always undefined at zero, +while ifns honor C?Z_DEFINED_VALUE_AT_ZERO equal to 2. +And, in phiopt we already have popcount handling that does pretty much the +same thing, except for always using a zero value rather than the one set +by C?Z_DEFINED_VALUE_AT_ZERO. + +So, this patch extends that function to handle not just popcount, but also +clz and ctz. + +2020-10-21 Jakub Jelinek + + PR tree-optimization/97503 + * tree-ssa-phiopt.c: Include internal-fn.h. + (cond_removal_in_popcount_pattern): Rename to ... + (cond_removal_in_popcount_clz_ctz_pattern): ... this. Handle not just + popcount, but also clz and ctz if it has C?Z_DEFINED_VALUE_AT_ZERO 2. + + * gcc.dg/tree-ssa/pr97503.c: New test. +--- + gcc/testsuite/gcc.dg/tree-ssa/pr97503.c | 19 +++++ + gcc/tree-ssa-phiopt.c | 100 ++++++++++++++++++------ + 2 files changed, 95 insertions(+), 24 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr97503.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c +new file mode 100644 +index 000000000..3a3dae6c7 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c +@@ -0,0 +1,19 @@ ++/* PR tree-optimization/97503 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++/* { dg-additional-options "-mbmi -mlzcnt" { target i?86-*-* x86_64-*-* } } */ ++/* { dg-final { scan-tree-dump-times "\.CLZ" 2 "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-* } && lp64 } } } } */ ++/* { dg-final { scan-tree-dump-not "__builtin_clz" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */ ++/* { dg-final { scan-tree-dump-not "PHI <" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */ ++ ++int ++foo (int x) ++{ ++ return x ? __builtin_clz (x) : 32; ++} ++ ++int ++bar (unsigned long long x) ++{ ++ return x ? __builtin_clzll (x) : 64; ++} +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 0623d740d..c1e11916e 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see + #include "tree-inline.h" + #include "case-cfn-macros.h" + #include "tree-eh.h" ++#include "internal-fn.h" + + static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); + static bool two_value_replacement (basic_block, basic_block, edge, gphi *, +@@ -60,8 +61,9 @@ static bool minmax_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); + static bool abs_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); +-static bool cond_removal_in_popcount_pattern (basic_block, basic_block, +- edge, edge, gimple *, tree, tree); ++static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, ++ edge, edge, gimple *, ++ tree, tree); + static bool cond_store_replacement (basic_block, basic_block, edge, edge, + hash_set *); + static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); +@@ -348,8 +350,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p +- && cond_removal_in_popcount_pattern (bb, bb1, e1, e2, +- phi, arg0, arg1)) ++ && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, ++ e2, phi, arg0, ++ arg1)) + cfgchanged = true; + else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +@@ -1771,16 +1774,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, + + + c_12 = PHI <_9(2)> +-*/ ++ ++ Similarly for __builtin_clz or __builtin_ctz if ++ C?Z_DEFINED_VALUE_AT_ZERO is 2, optab is present and ++ instead of 0 above it uses the value from that macro. */ + + static bool +-cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, +- edge e1, edge e2, +- gimple *phi, tree arg0, tree arg1) ++cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, ++ basic_block middle_bb, ++ edge e1, edge e2, gimple *phi, ++ tree arg0, tree arg1) + { + gimple *cond; + gimple_stmt_iterator gsi, gsi_from; +- gimple *popcount; ++ gimple *call; + gimple *cast = NULL; + tree lhs, arg; + +@@ -1798,35 +1805,67 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, + gsi_next_nondebug (&gsi); + if (!gsi_end_p (gsi)) + { +- popcount = gsi_stmt (gsi); ++ call = gsi_stmt (gsi); + gsi_next_nondebug (&gsi); + if (!gsi_end_p (gsi)) + return false; + } + else + { +- popcount = cast; ++ call = cast; + cast = NULL; + } + +- /* Check that we have a popcount builtin. */ +- if (!is_gimple_call (popcount)) ++ /* Check that we have a popcount/clz/ctz builtin. */ ++ if (!is_gimple_call (call) || gimple_call_num_args (call) != 1) ++ return false; ++ ++ arg = gimple_call_arg (call, 0); ++ lhs = gimple_get_lhs (call); ++ ++ if (lhs == NULL_TREE) + return false; +- combined_fn cfn = gimple_call_combined_fn (popcount); ++ ++ combined_fn cfn = gimple_call_combined_fn (call); ++ internal_fn ifn = IFN_LAST; ++ int val = 0; + switch (cfn) + { + CASE_CFN_POPCOUNT: + break; ++ CASE_CFN_CLZ: ++ if (INTEGRAL_TYPE_P (TREE_TYPE (arg))) ++ { ++ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg)); ++ if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg), ++ OPTIMIZE_FOR_BOTH) ++ && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2) ++ { ++ ifn = IFN_CLZ; ++ break; ++ } ++ } ++ return false; ++ CASE_CFN_CTZ: ++ if (INTEGRAL_TYPE_P (TREE_TYPE (arg))) ++ { ++ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg)); ++ if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg), ++ OPTIMIZE_FOR_BOTH) ++ && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2) ++ { ++ ifn = IFN_CTZ; ++ break; ++ } ++ } ++ return false; + default: + return false; + } + +- arg = gimple_call_arg (popcount, 0); +- lhs = gimple_get_lhs (popcount); +- + if (cast) + { +- /* We have a cast stmt feeding popcount builtin. */ ++ /* We have a cast stmt feeding popcount/clz/ctz builtin. */ + /* Check that we have a cast prior to that. */ + if (gimple_code (cast) != GIMPLE_ASSIGN + || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast))) +@@ -1839,7 +1878,7 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, + + cond = last_stmt (cond_bb); + +- /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount ++ /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount/clz/ctz + builtin. */ + if (gimple_code (cond) != GIMPLE_COND + || (gimple_cond_code (cond) != NE_EXPR +@@ -1859,10 +1898,13 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, + } + + /* Check PHI arguments. */ +- if (lhs != arg0 || !integer_zerop (arg1)) ++ if (lhs != arg0 ++ || TREE_CODE (arg1) != INTEGER_CST ++ || wi::to_wide (arg1) != val) + return false; + +- /* And insert the popcount builtin and cast stmt before the cond_bb. */ ++ /* And insert the popcount/clz/ctz builtin and cast stmt before the ++ cond_bb. */ + gsi = gsi_last_bb (cond_bb); + if (cast) + { +@@ -1870,9 +1912,19 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, + gsi_move_before (&gsi_from, &gsi); + reset_flow_sensitive_info (gimple_get_lhs (cast)); + } +- gsi_from = gsi_for_stmt (popcount); +- gsi_move_before (&gsi_from, &gsi); +- reset_flow_sensitive_info (gimple_get_lhs (popcount)); ++ gsi_from = gsi_for_stmt (call); ++ if (ifn == IFN_LAST || gimple_call_internal_p (call)) ++ gsi_move_before (&gsi_from, &gsi); ++ else ++ { ++ /* For __builtin_c[lt]z* force .C[LT]Z ifn, because only ++ the latter is well defined at zero. */ ++ call = gimple_build_call_internal (ifn, 1, gimple_call_arg (call, 0)); ++ gimple_call_set_lhs (call, lhs); ++ gsi_insert_before (&gsi, call, GSI_SAME_STMT); ++ gsi_remove (&gsi_from, true); ++ } ++ reset_flow_sensitive_info (lhs); + + /* Now update the PHI and remove unneeded bbs. */ + replace_phi_edge_with_variable (cond_bb, e2, phi, lhs); +-- +2.27.0.windows.1 + diff --git a/0054-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch b/0054-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch new file mode 100644 index 0000000000000000000000000000000000000000..160afc0bb3915644ea00002100746c36a179f0b4 --- /dev/null +++ b/0054-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch @@ -0,0 +1,69 @@ +From 7d5d2ab082ce9986db4f3313013b44faa46bc412 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Thu, 22 Oct 2020 09:34:28 +0200 +Subject: [PATCH 06/35] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32 + in GIMPLE fallout [PR97503] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=ef2d3ec325b1b720df5da20784eba46249af2294 + +> this broke sparc-sun-solaris2.11 bootstrap +> +> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c: In function 'bool cond_removal_in_popcount_clz_ctz_pattern(basic_block, basic_block, edge, edge, gimple*, tree, tree)': +> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c:1858:27: error: variable 'mode' set but not used [-Werror=unused-but-set-variable] +> 1858 | scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg)); +> | ^~~~ +> +> +> and doubtlessly several other targets that use the defaults.h definition of +> +> #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) 0 + +Ugh, seems many of those macros do not evaluate the first argument. +This got broken by the change to direct_internal_fn_supported_p, previously +it used mode also in the optab test. + +2020-10-22 Jakub Jelinek + + * tree-ssa-phiopt.c (cond_removal_in_popcount_clz_ctz_pattern): + For CLZ and CTZ tests, use type temporary instead of mode. +--- + gcc/tree-ssa-phiopt.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index c1e11916e..707a5882e 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -1836,10 +1836,10 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + CASE_CFN_CLZ: + if (INTEGRAL_TYPE_P (TREE_TYPE (arg))) + { +- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg)); +- if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg), +- OPTIMIZE_FOR_BOTH) +- && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2) ++ tree type = TREE_TYPE (arg); ++ if (direct_internal_fn_supported_p (IFN_CLZ, type, OPTIMIZE_FOR_BOTH) ++ && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ++ val) == 2) + { + ifn = IFN_CLZ; + break; +@@ -1849,10 +1849,10 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + CASE_CFN_CTZ: + if (INTEGRAL_TYPE_P (TREE_TYPE (arg))) + { +- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg)); +- if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg), +- OPTIMIZE_FOR_BOTH) +- && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2) ++ tree type = TREE_TYPE (arg); ++ if (direct_internal_fn_supported_p (IFN_CTZ, type, OPTIMIZE_FOR_BOTH) ++ && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ++ val) == 2) + { + ifn = IFN_CTZ; + break; +-- +2.27.0.windows.1 + diff --git a/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch b/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch new file mode 100644 index 0000000000000000000000000000000000000000..35b773e00dbab4f1baba1f7aecb3aa8d8e288ba8 --- /dev/null +++ b/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch @@ -0,0 +1,218 @@ +From 018523df11698dd0e2d42326c57bdf724a7a1aa5 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Tue, 5 Jan 2021 16:35:22 +0100 +Subject: [PATCH 07/35] [Backport] phiopt: Optimize x < 0 ? ~y : y to (x >> 31) + ^ y [PR96928] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=576714b309b330df0e80e34114bcdf0bba35e146 + +As requested in the PR, the one's complement abs can be done more +efficiently without cmov or branching. + +Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize +it in ifcvt, on x86_64 with -m32 we generate in the end the exact same +code, but with -m64: + movl %edi, %eax +- notl %eax +- cmpl %edi, %eax +- cmovl %edi, %eax ++ sarl $31, %eax ++ xorl %edi, %eax + ret + +2021-01-05 Jakub Jelinek + + PR tree-optimization/96928 + * tree-ssa-phiopt.c (xor_replacement): New function. + (tree_ssa_phiopt_worker): Call it. + + * gcc.dg/tree-ssa/pr96928.c: New test. + * gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1, + instead of scanning rtl dump for ifcvt message check assembly + for xor instruction. +--- + gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 38 +++++++++ + gcc/tree-ssa-phiopt.c | 108 ++++++++++++++++++++++++ + 2 files changed, 146 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c +new file mode 100644 +index 000000000..209135726 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c +@@ -0,0 +1,38 @@ ++/* PR tree-optimization/96928 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-phiopt2" } */ ++/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */ ++ ++int ++foo (int a) ++{ ++ return a < 0 ? ~a : a; ++} ++ ++int ++bar (int a, int b) ++{ ++ return a < 0 ? ~b : b; ++} ++ ++unsigned ++baz (int a, unsigned int b) ++{ ++ return a < 0 ? ~b : b; ++} ++ ++unsigned ++qux (int a, unsigned int c) ++{ ++ return a >= 0 ? ~c : c; ++} ++ ++int ++corge (int a, int b) ++{ ++ return a >= 0 ? b : ~b; ++} +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 707a5882e..b9cd07a60 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -61,6 +61,8 @@ static bool minmax_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); + static bool abs_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); ++static bool xor_replacement (basic_block, basic_block, ++ edge, edge, gimple *, tree, tree); + static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, + edge, edge, gimple *, + tree, tree); +@@ -349,6 +351,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + cfgchanged = true; + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; ++ else if (!early_p ++ && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) ++ cfgchanged = true; + else if (!early_p + && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, + e2, phi, arg0, +@@ -2059,6 +2064,109 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + ++/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */ ++ ++static bool ++xor_replacement (basic_block cond_bb, basic_block middle_bb, ++ edge e0 ATTRIBUTE_UNUSED, edge e1, ++ gimple *phi, tree arg0, tree arg1) ++{ ++ if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1))) ++ return false; ++ ++ /* OTHER_BLOCK must have only one executable statement which must have the ++ form arg0 = ~arg1 or arg1 = ~arg0. */ ++ ++ gimple *assign = last_and_only_stmt (middle_bb); ++ /* If we did not find the proper one's complement assignment, then we cannot ++ optimize. */ ++ if (assign == NULL) ++ return false; ++ ++ /* If we got here, then we have found the only executable statement ++ in OTHER_BLOCK. If it is anything other than arg = ~arg1 or ++ arg1 = ~arg0, then we cannot optimize. */ ++ if (!is_gimple_assign (assign)) ++ return false; ++ ++ if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR) ++ return false; ++ ++ tree lhs = gimple_assign_lhs (assign); ++ tree rhs = gimple_assign_rhs1 (assign); ++ ++ /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ ++ if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0)) ++ return false; ++ ++ gimple *cond = last_stmt (cond_bb); ++ tree result = PHI_RESULT (phi); ++ ++ /* Only relationals comparing arg[01] against zero are interesting. */ ++ enum tree_code cond_code = gimple_cond_code (cond); ++ if (cond_code != LT_EXPR && cond_code != GE_EXPR) ++ return false; ++ ++ /* Make sure the conditional is x OP 0. */ ++ tree clhs = gimple_cond_lhs (cond); ++ if (TREE_CODE (clhs) != SSA_NAME ++ || !INTEGRAL_TYPE_P (TREE_TYPE (clhs)) ++ || TYPE_UNSIGNED (TREE_TYPE (clhs)) ++ || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1)) ++ || !integer_zerop (gimple_cond_rhs (cond))) ++ return false; ++ ++ /* We need to know which is the true edge and which is the false ++ edge so that we know if have xor or inverted xor. */ ++ edge true_edge, false_edge; ++ extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); ++ ++ /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we ++ will need to invert the result. Similarly for LT_EXPR if ++ the false edge goes to OTHER_BLOCK. */ ++ edge e; ++ if (cond_code == GE_EXPR) ++ e = true_edge; ++ else ++ e = false_edge; ++ ++ bool invert = e->dest == middle_bb; ++ ++ result = duplicate_ssa_name (result, NULL); ++ ++ gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); ++ ++ int prec = TYPE_PRECISION (TREE_TYPE (clhs)); ++ gimple *new_stmt ++ = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs, ++ build_int_cst (integer_type_node, prec - 1)); ++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); ++ ++ if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs))) ++ { ++ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)), ++ NOP_EXPR, gimple_assign_lhs (new_stmt)); ++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); ++ } ++ lhs = gimple_assign_lhs (new_stmt); ++ ++ if (invert) ++ { ++ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)), ++ BIT_NOT_EXPR, rhs); ++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); ++ rhs = gimple_assign_lhs (new_stmt); ++ } ++ ++ new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs); ++ gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); ++ ++ replace_phi_edge_with_variable (cond_bb, e1, phi, result); ++ ++ /* Note that we optimized this PHI. */ ++ return true; ++} ++ + /* Auxiliary functions to determine the set of memory accesses which + can't trap because they are preceded by accesses to the same memory + portion. We do that for MEM_REFs, so we only need to track +-- +2.27.0.windows.1 + diff --git a/0056-Backport-phiopt-Optimize-x-y-cmp-z-PR94589.patch b/0056-Backport-phiopt-Optimize-x-y-cmp-z-PR94589.patch new file mode 100644 index 0000000000000000000000000000000000000000..473dee8fede5e3f892233954b26f409cb7f1f7e8 --- /dev/null +++ b/0056-Backport-phiopt-Optimize-x-y-cmp-z-PR94589.patch @@ -0,0 +1,1067 @@ +From 02313ab8cf7eb4defc1482ece48c07c2d8c77be9 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Thu, 6 May 2021 10:15:40 +0200 +Subject: [PATCH 08/35] [Backport] phiopt: Optimize (x <=> y) cmp z [PR94589] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=ad96c867e173c1ebcfc201b201adac5095683a08 + +genericize_spaceship genericizes i <=> j to approximately +({ int c; if (i == j) c = 0; else if (i < j) c = -1; else c = 1; c; }) +for strong ordering and +({ int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; c; }) +for partial ordering. +The C++ standard supports then == or != comparisons of that against +strong/partial ordering enums, or />= comparisons of <=> result +against literal 0. + +In some cases we already optimize that but in many cases we keep performing +all the 2 or 3 comparisons, compute the spaceship value and then compare +that. + +The following patch recognizes those patterns if the <=> operands are +integral types or floating point (the latter only for -ffast-math) and +optimizes it to the single comparison that is needed (plus adds debug stmts +if needed for the spaceship result). + +There is one thing I'd like to address in a follow-up: the pr94589-2.C +testcase should be matching just 12 times each, but runs +into operator>=(partial_ordering, unspecified) being defined as +(_M_value&1)==_M_value +rather than _M_value>=0. When not honoring NaNs, the 2 case should be +unreachable and so (_M_value&1)==_M_value is then equivalent to _M_value>=0, +but is not a single use but two uses. I'll need to pattern match that case +specially. + +2021-05-06 Jakub Jelinek + + PR tree-optimization/94589 + * tree-ssa-phiopt.c (tree_ssa_phiopt_worker): Call + spaceship_replacement. + (cond_only_block_p, spaceship_replacement): New functions. + + * gcc.dg/pr94589-1.c: New test. + * gcc.dg/pr94589-2.c: New test. + * gcc.dg/pr94589-3.c: New test. + * gcc.dg/pr94589-4.c: New test. + * g++.dg/opt/pr94589-1.C: New test. + * g++.dg/opt/pr94589-2.C: New test. + * g++.dg/opt/pr94589-3.C: New test. + * g++.dg/opt/pr94589-4.C: New test. +--- + gcc/testsuite/g++.dg/opt/pr94589-1.C | 33 +++ + gcc/testsuite/g++.dg/opt/pr94589-2.C | 33 +++ + gcc/testsuite/g++.dg/opt/pr94589-3.C | 84 ++++++ + gcc/testsuite/g++.dg/opt/pr94589-4.C | 84 ++++++ + gcc/testsuite/gcc.dg/pr94589-1.c | 35 +++ + gcc/testsuite/gcc.dg/pr94589-2.c | 35 +++ + gcc/testsuite/gcc.dg/pr94589-3.c | 97 ++++++ + gcc/testsuite/gcc.dg/pr94589-4.c | 97 ++++++ + gcc/tree-ssa-phiopt.c | 424 +++++++++++++++++++++++++++ + 9 files changed, 922 insertions(+) + create mode 100644 gcc/testsuite/g++.dg/opt/pr94589-1.C + create mode 100644 gcc/testsuite/g++.dg/opt/pr94589-2.C + create mode 100644 gcc/testsuite/g++.dg/opt/pr94589-3.C + create mode 100644 gcc/testsuite/g++.dg/opt/pr94589-4.C + create mode 100644 gcc/testsuite/gcc.dg/pr94589-1.c + create mode 100644 gcc/testsuite/gcc.dg/pr94589-2.c + create mode 100644 gcc/testsuite/gcc.dg/pr94589-3.c + create mode 100644 gcc/testsuite/gcc.dg/pr94589-4.c + +diff --git a/gcc/testsuite/g++.dg/opt/pr94589-1.C b/gcc/testsuite/g++.dg/opt/pr94589-1.C +new file mode 100644 +index 000000000..d1cc5050c +--- /dev/null ++++ b/gcc/testsuite/g++.dg/opt/pr94589-1.C +@@ -0,0 +1,33 @@ ++// PR tree-optimization/94589 ++// { dg-do compile { target c++20 } } ++// { dg-options "-O2 -g0 -fdump-tree-optimized" } ++// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 12 "optimized" } } ++// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[45]" 12 "optimized" } } ++ ++#include ++ ++#define A __attribute__((noipa)) ++A bool f1 (int i, int j) { auto c = i <=> j; return c == 0; } ++A bool f2 (int i, int j) { auto c = i <=> j; return c != 0; } ++A bool f3 (int i, int j) { auto c = i <=> j; return c > 0; } ++A bool f4 (int i, int j) { auto c = i <=> j; return c < 0; } ++A bool f5 (int i, int j) { auto c = i <=> j; return c >= 0; } ++A bool f6 (int i, int j) { auto c = i <=> j; return c <= 0; } ++A bool f7 (int i, int j) { auto c = i <=> j; return c == std::strong_ordering::less; } ++A bool f8 (int i, int j) { auto c = i <=> j; return c != std::strong_ordering::less; } ++A bool f9 (int i, int j) { auto c = i <=> j; return c == std::strong_ordering::equal; } ++A bool f10 (int i, int j) { auto c = i <=> j; return c != std::strong_ordering::equal; } ++A bool f11 (int i, int j) { auto c = i <=> j; return c == std::strong_ordering::greater; } ++A bool f12 (int i, int j) { auto c = i <=> j; return c != std::strong_ordering::greater; } ++A bool f13 (int i) { auto c = i <=> 5; return c == 0; } ++A bool f14 (int i) { auto c = i <=> 5; return c != 0; } ++A bool f15 (int i) { auto c = i <=> 5; return c > 0; } ++A bool f16 (int i) { auto c = i <=> 5; return c < 0; } ++A bool f17 (int i) { auto c = i <=> 5; return c >= 0; } ++A bool f18 (int i) { auto c = i <=> 5; return c <= 0; } ++A bool f19 (int i) { auto c = i <=> 5; return c == std::strong_ordering::less; } ++A bool f20 (int i) { auto c = i <=> 5; return c != std::strong_ordering::less; } ++A bool f21 (int i) { auto c = i <=> 5; return c == std::strong_ordering::equal; } ++A bool f22 (int i) { auto c = i <=> 5; return c != std::strong_ordering::equal; } ++A bool f23 (int i) { auto c = i <=> 5; return c == std::strong_ordering::greater; } ++A bool f24 (int i) { auto c = i <=> 5; return c != std::strong_ordering::greater; } +diff --git a/gcc/testsuite/g++.dg/opt/pr94589-2.C b/gcc/testsuite/g++.dg/opt/pr94589-2.C +new file mode 100644 +index 000000000..dda947e22 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/opt/pr94589-2.C +@@ -0,0 +1,33 @@ ++// PR tree-optimization/94589 ++// { dg-do compile { target c++20 } } ++// { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" } ++// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } } ++// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } } ++ ++#include ++ ++#define A __attribute__((noipa)) ++A bool f1 (double i, double j) { auto c = i <=> j; return c == 0; } ++A bool f2 (double i, double j) { auto c = i <=> j; return c != 0; } ++A bool f3 (double i, double j) { auto c = i <=> j; return c > 0; } ++A bool f4 (double i, double j) { auto c = i <=> j; return c < 0; } ++A bool f5 (double i, double j) { auto c = i <=> j; return c >= 0; } ++A bool f6 (double i, double j) { auto c = i <=> j; return c <= 0; } ++A bool f7 (double i, double j) { auto c = i <=> j; return c == std::partial_ordering::less; } ++A bool f8 (double i, double j) { auto c = i <=> j; return c != std::partial_ordering::less; } ++A bool f9 (double i, double j) { auto c = i <=> j; return c == std::partial_ordering::equivalent; } ++A bool f10 (double i, double j) { auto c = i <=> j; return c != std::partial_ordering::equivalent; } ++A bool f11 (double i, double j) { auto c = i <=> j; return c == std::partial_ordering::greater; } ++A bool f12 (double i, double j) { auto c = i <=> j; return c != std::partial_ordering::greater; } ++A bool f13 (double i) { auto c = i <=> 5.0; return c == 0; } ++A bool f14 (double i) { auto c = i <=> 5.0; return c != 0; } ++A bool f15 (double i) { auto c = i <=> 5.0; return c > 0; } ++A bool f16 (double i) { auto c = i <=> 5.0; return c < 0; } ++A bool f17 (double i) { auto c = i <=> 5.0; return c >= 0; } ++A bool f18 (double i) { auto c = i <=> 5.0; return c <= 0; } ++A bool f19 (double i) { auto c = i <=> 5.0; return c == std::partial_ordering::less; } ++A bool f20 (double i) { auto c = i <=> 5.0; return c != std::partial_ordering::less; } ++A bool f21 (double i) { auto c = i <=> 5.0; return c == std::partial_ordering::equivalent; } ++A bool f22 (double i) { auto c = i <=> 5.0; return c != std::partial_ordering::equivalent; } ++A bool f23 (double i) { auto c = i <=> 5.0; return c == std::partial_ordering::greater; } ++A bool f24 (double i) { auto c = i <=> 5.0; return c != std::partial_ordering::greater; } +diff --git a/gcc/testsuite/g++.dg/opt/pr94589-3.C b/gcc/testsuite/g++.dg/opt/pr94589-3.C +new file mode 100644 +index 000000000..725b81f56 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/opt/pr94589-3.C +@@ -0,0 +1,84 @@ ++// { dg-do run { target c++20 } } ++// { dg-options "-O2 -g" } ++ ++#include "pr94589-1.C" ++ ++#define C(fn, i, j, r) if (fn (i, j) != r) __builtin_abort () ++#define D(fn, i, r) if (fn (i) != r) __builtin_abort () ++ ++int ++main () ++{ ++ C (f1, 7, 8, false); ++ C (f1, 8, 8, true); ++ C (f1, 9, 8, false); ++ C (f2, 7, 8, true); ++ C (f2, 8, 8, false); ++ C (f2, 9, 8, true); ++ C (f3, 7, 8, false); ++ C (f3, 8, 8, false); ++ C (f3, 9, 8, true); ++ C (f4, 7, 8, true); ++ C (f4, 8, 8, false); ++ C (f4, 9, 8, false); ++ C (f5, 7, 8, false); ++ C (f5, 8, 8, true); ++ C (f5, 9, 8, true); ++ C (f6, 7, 8, true); ++ C (f6, 8, 8, true); ++ C (f6, 9, 8, false); ++ C (f7, 7, 8, true); ++ C (f7, 8, 8, false); ++ C (f7, 9, 8, false); ++ C (f8, 7, 8, false); ++ C (f8, 8, 8, true); ++ C (f8, 9, 8, true); ++ C (f9, 7, 8, false); ++ C (f9, 8, 8, true); ++ C (f9, 9, 8, false); ++ C (f10, 7, 8, true); ++ C (f10, 8, 8, false); ++ C (f10, 9, 8, true); ++ C (f11, 7, 8, false); ++ C (f11, 8, 8, false); ++ C (f11, 9, 8, true); ++ C (f12, 7, 8, true); ++ C (f12, 8, 8, true); ++ C (f12, 9, 8, false); ++ D (f13, 4, false); ++ D (f13, 5, true); ++ D (f13, 6, false); ++ D (f14, 4, true); ++ D (f14, 5, false); ++ D (f14, 6, true); ++ D (f15, 4, false); ++ D (f15, 5, false); ++ D (f15, 6, true); ++ D (f16, 4, true); ++ D (f16, 5, false); ++ D (f16, 6, false); ++ D (f17, 4, false); ++ D (f17, 5, true); ++ D (f17, 6, true); ++ D (f18, 4, true); ++ D (f18, 5, true); ++ D (f18, 6, false); ++ D (f19, 4, true); ++ D (f19, 5, false); ++ D (f19, 6, false); ++ D (f20, 4, false); ++ D (f20, 5, true); ++ D (f20, 6, true); ++ D (f21, 4, false); ++ D (f21, 5, true); ++ D (f21, 6, false); ++ D (f22, 4, true); ++ D (f22, 5, false); ++ D (f22, 6, true); ++ D (f23, 4, false); ++ D (f23, 5, false); ++ D (f23, 6, true); ++ D (f24, 4, true); ++ D (f24, 5, true); ++ D (f24, 6, false); ++} +diff --git a/gcc/testsuite/g++.dg/opt/pr94589-4.C b/gcc/testsuite/g++.dg/opt/pr94589-4.C +new file mode 100644 +index 000000000..256a45580 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/opt/pr94589-4.C +@@ -0,0 +1,84 @@ ++// { dg-do run { target c++20 } } ++// { dg-options "-O2 -g -ffast-math" } ++ ++#include "pr94589-2.C" ++ ++#define C(fn, i, j, r) if (fn (i, j) != r) __builtin_abort () ++#define D(fn, i, r) if (fn (i) != r) __builtin_abort () ++ ++int ++main () ++{ ++ C (f1, 7.0, 8.0, false); ++ C (f1, 8.0, 8.0, true); ++ C (f1, 9.0, 8.0, false); ++ C (f2, 7.0, 8.0, true); ++ C (f2, 8.0, 8.0, false); ++ C (f2, 9.0, 8.0, true); ++ C (f3, 7.0, 8.0, false); ++ C (f3, 8.0, 8.0, false); ++ C (f3, 9.0, 8.0, true); ++ C (f4, 7.0, 8.0, true); ++ C (f4, 8.0, 8.0, false); ++ C (f4, 9.0, 8.0, false); ++ C (f5, 7.0, 8.0, false); ++ C (f5, 8.0, 8.0, true); ++ C (f5, 9.0, 8.0, true); ++ C (f6, 7.0, 8.0, true); ++ C (f6, 8.0, 8.0, true); ++ C (f6, 9.0, 8.0, false); ++ C (f7, 7.0, 8.0, true); ++ C (f7, 8.0, 8.0, false); ++ C (f7, 9.0, 8.0, false); ++ C (f8, 7.0, 8.0, false); ++ C (f8, 8.0, 8.0, true); ++ C (f8, 9.0, 8.0, true); ++ C (f9, 7.0, 8.0, false); ++ C (f9, 8.0, 8.0, true); ++ C (f9, 9.0, 8.0, false); ++ C (f10, 7.0, 8.0, true); ++ C (f10, 8.0, 8.0, false); ++ C (f10, 9.0, 8.0, true); ++ C (f11, 7.0, 8.0, false); ++ C (f11, 8.0, 8.0, false); ++ C (f11, 9.0, 8.0, true); ++ C (f12, 7.0, 8.0, true); ++ C (f12, 8.0, 8.0, true); ++ C (f12, 9.0, 8.0, false); ++ D (f13, 4.0, false); ++ D (f13, 5.0, true); ++ D (f13, 6.0, false); ++ D (f14, 4.0, true); ++ D (f14, 5.0, false); ++ D (f14, 6.0, true); ++ D (f15, 4.0, false); ++ D (f15, 5.0, false); ++ D (f15, 6.0, true); ++ D (f16, 4.0, true); ++ D (f16, 5.0, false); ++ D (f16, 6.0, false); ++ D (f17, 4.0, false); ++ D (f17, 5.0, true); ++ D (f17, 6.0, true); ++ D (f18, 4.0, true); ++ D (f18, 5.0, true); ++ D (f18, 6.0, false); ++ D (f19, 4.0, true); ++ D (f19, 5.0, false); ++ D (f19, 6.0, false); ++ D (f20, 4.0, false); ++ D (f20, 5.0, true); ++ D (f20, 6.0, true); ++ D (f21, 4.0, false); ++ D (f21, 5.0, true); ++ D (f21, 6.0, false); ++ D (f22, 4.0, true); ++ D (f22, 5.0, false); ++ D (f22, 6.0, true); ++ D (f23, 4.0, false); ++ D (f23, 5.0, false); ++ D (f23, 6.0, true); ++ D (f24, 4.0, true); ++ D (f24, 5.0, true); ++ D (f24, 6.0, false); ++} +diff --git a/gcc/testsuite/gcc.dg/pr94589-1.c b/gcc/testsuite/gcc.dg/pr94589-1.c +new file mode 100644 +index 000000000..de404ea82 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr94589-1.c +@@ -0,0 +1,35 @@ ++/* PR tree-optimization/94589 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -g0 -fdump-tree-optimized" } */ ++/* { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[45]" 14 "optimized" } } */ ++ ++#define A __attribute__((noipa)) ++A int f1 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c == 0; } ++A int f2 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c != 0; } ++A int f3 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c > 0; } ++A int f4 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c < 0; } ++A int f5 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c >= 0; } ++A int f6 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c <= 0; } ++A int f7 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c == -1; } ++A int f8 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c != -1; } ++A int f9 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c > -1; } ++A int f10 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c <= -1; } ++A int f11 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c == 1; } ++A int f12 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c != 1; } ++A int f13 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c < 1; } ++A int f14 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c >= 1; } ++A int f15 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c == 0; } ++A int f16 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c != 0; } ++A int f17 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c > 0; } ++A int f18 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c < 0; } ++A int f19 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c >= 0; } ++A int f20 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c <= 0; } ++A int f21 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c == -1; } ++A int f22 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c != -1; } ++A int f23 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c > -1; } ++A int f24 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c <= -1; } ++A int f25 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c == 1; } ++A int f26 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c != 1; } ++A int f27 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c < 1; } ++A int f28 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c >= 1; } +diff --git a/gcc/testsuite/gcc.dg/pr94589-2.c b/gcc/testsuite/gcc.dg/pr94589-2.c +new file mode 100644 +index 000000000..9481b764d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr94589-2.c +@@ -0,0 +1,35 @@ ++/* PR tree-optimization/94589 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" } */ ++/* { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } } */ ++ ++#define A __attribute__((noipa)) ++A int f1 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c == 0; } ++A int f2 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c != 0; } ++A int f3 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c > 0; } ++A int f4 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c < 0; } ++A int f5 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c >= 0; } ++A int f6 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c <= 0; } ++A int f7 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c == -1; } ++A int f8 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c != -1; } ++A int f9 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c > -1; } ++A int f10 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c <= -1; } ++A int f11 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c == 1; } ++A int f12 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c != 1; } ++A int f13 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c < 1; } ++A int f14 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c >= 1; } ++A int f15 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c == 0; } ++A int f16 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c != 0; } ++A int f17 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c > 0; } ++A int f18 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c < 0; } ++A int f19 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c >= 0; } ++A int f20 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c <= 0; } ++A int f21 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c == -1; } ++A int f22 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c != -1; } ++A int f23 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c > -1; } ++A int f24 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c <= -1; } ++A int f25 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c == 1; } ++A int f26 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c != 1; } ++A int f27 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c < 1; } ++A int f28 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c >= 1; } +diff --git a/gcc/testsuite/gcc.dg/pr94589-3.c b/gcc/testsuite/gcc.dg/pr94589-3.c +new file mode 100644 +index 000000000..df82fab73 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr94589-3.c +@@ -0,0 +1,97 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -g" } */ ++ ++#include "pr94589-1.c" ++ ++#define C(fn, i, j, r) if (fn (i, j) != r) __builtin_abort () ++#define D(fn, i, r) if (fn (i) != r) __builtin_abort () ++ ++int ++main () ++{ ++ C (f1, 7, 8, 0); ++ C (f1, 8, 8, 1); ++ C (f1, 9, 8, 0); ++ C (f2, 7, 8, 1); ++ C (f2, 8, 8, 0); ++ C (f2, 9, 8, 1); ++ C (f3, 7, 8, 0); ++ C (f3, 8, 8, 0); ++ C (f3, 9, 8, 1); ++ C (f4, 7, 8, 1); ++ C (f4, 8, 8, 0); ++ C (f4, 9, 8, 0); ++ C (f5, 7, 8, 0); ++ C (f5, 8, 8, 1); ++ C (f5, 9, 8, 1); ++ C (f6, 7, 8, 1); ++ C (f6, 8, 8, 1); ++ C (f6, 9, 8, 0); ++ C (f7, 7, 8, 1); ++ C (f7, 8, 8, 0); ++ C (f7, 9, 8, 0); ++ C (f8, 7, 8, 0); ++ C (f8, 8, 8, 1); ++ C (f8, 9, 8, 1); ++ C (f9, 7, 8, 0); ++ C (f9, 8, 8, 1); ++ C (f9, 9, 8, 1); ++ C (f10, 7, 8, 1); ++ C (f10, 8, 8, 0); ++ C (f10, 9, 8, 0); ++ C (f11, 7, 8, 0); ++ C (f11, 8, 8, 0); ++ C (f11, 9, 8, 1); ++ C (f12, 7, 8, 1); ++ C (f12, 8, 8, 1); ++ C (f12, 9, 8, 0); ++ C (f13, 7, 8, 1); ++ C (f13, 8, 8, 1); ++ C (f13, 9, 8, 0); ++ C (f14, 7, 8, 0); ++ C (f14, 8, 8, 0); ++ C (f14, 9, 8, 1); ++ D (f15, 4, 0); ++ D (f15, 5, 1); ++ D (f15, 6, 0); ++ D (f16, 4, 1); ++ D (f16, 5, 0); ++ D (f16, 6, 1); ++ D (f17, 4, 0); ++ D (f17, 5, 0); ++ D (f17, 6, 1); ++ D (f18, 4, 1); ++ D (f18, 5, 0); ++ D (f18, 6, 0); ++ D (f19, 4, 0); ++ D (f19, 5, 1); ++ D (f19, 6, 1); ++ D (f20, 4, 1); ++ D (f20, 5, 1); ++ D (f20, 6, 0); ++ D (f21, 4, 1); ++ D (f21, 5, 0); ++ D (f21, 6, 0); ++ D (f22, 4, 0); ++ D (f22, 5, 1); ++ D (f22, 6, 1); ++ D (f23, 4, 0); ++ D (f23, 5, 1); ++ D (f23, 6, 1); ++ D (f24, 4, 1); ++ D (f24, 5, 0); ++ D (f24, 6, 0); ++ D (f25, 4, 0); ++ D (f25, 5, 0); ++ D (f25, 6, 1); ++ D (f26, 4, 1); ++ D (f26, 5, 1); ++ D (f26, 6, 0); ++ D (f27, 4, 1); ++ D (f27, 5, 1); ++ D (f27, 6, 0); ++ D (f28, 4, 0); ++ D (f28, 5, 0); ++ D (f28, 6, 1); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.dg/pr94589-4.c b/gcc/testsuite/gcc.dg/pr94589-4.c +new file mode 100644 +index 000000000..b2557fb07 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr94589-4.c +@@ -0,0 +1,97 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -g -ffast-math" } */ ++ ++#include "pr94589-2.c" ++ ++#define C(fn, i, j, r) if (fn (i, j) != r) __builtin_abort () ++#define D(fn, i, r) if (fn (i) != r) __builtin_abort () ++ ++int ++main () ++{ ++ C (f1, 7.0, 8.0, 0); ++ C (f1, 8.0, 8.0, 1); ++ C (f1, 9.0, 8.0, 0); ++ C (f2, 7.0, 8.0, 1); ++ C (f2, 8.0, 8.0, 0); ++ C (f2, 9.0, 8.0, 1); ++ C (f3, 7.0, 8.0, 0); ++ C (f3, 8.0, 8.0, 0); ++ C (f3, 9.0, 8.0, 1); ++ C (f4, 7.0, 8.0, 1); ++ C (f4, 8.0, 8.0, 0); ++ C (f4, 9.0, 8.0, 0); ++ C (f5, 7.0, 8.0, 0); ++ C (f5, 8.0, 8.0, 1); ++ C (f5, 9.0, 8.0, 1); ++ C (f6, 7.0, 8.0, 1); ++ C (f6, 8.0, 8.0, 1); ++ C (f6, 9.0, 8.0, 0); ++ C (f7, 7.0, 8.0, 1); ++ C (f7, 8.0, 8.0, 0); ++ C (f7, 9.0, 8.0, 0); ++ C (f8, 7.0, 8.0, 0); ++ C (f8, 8.0, 8.0, 1); ++ C (f8, 9.0, 8.0, 1); ++ C (f9, 7.0, 8.0, 0); ++ C (f9, 8.0, 8.0, 1); ++ C (f9, 9.0, 8.0, 1); ++ C (f10, 7.0, 8.0, 1); ++ C (f10, 8.0, 8.0, 0); ++ C (f10, 9.0, 8.0, 0); ++ C (f11, 7.0, 8.0, 0); ++ C (f11, 8.0, 8.0, 0); ++ C (f11, 9.0, 8.0, 1); ++ C (f12, 7.0, 8.0, 1); ++ C (f12, 8.0, 8.0, 1); ++ C (f12, 9.0, 8.0, 0); ++ C (f13, 7.0, 8.0, 1); ++ C (f13, 8.0, 8.0, 1); ++ C (f13, 9.0, 8.0, 0); ++ C (f14, 7.0, 8.0, 0); ++ C (f14, 8.0, 8.0, 0); ++ C (f14, 9.0, 8.0, 1); ++ D (f15, 4.0, 0); ++ D (f15, 5.0, 1); ++ D (f15, 6.0, 0); ++ D (f16, 4.0, 1); ++ D (f16, 5.0, 0); ++ D (f16, 6.0, 1); ++ D (f17, 4.0, 0); ++ D (f17, 5.0, 0); ++ D (f17, 6.0, 1); ++ D (f18, 4.0, 1); ++ D (f18, 5.0, 0); ++ D (f18, 6.0, 0); ++ D (f19, 4.0, 0); ++ D (f19, 5.0, 1); ++ D (f19, 6.0, 1); ++ D (f20, 4.0, 1); ++ D (f20, 5.0, 1); ++ D (f20, 6.0, 0); ++ D (f21, 4.0, 1); ++ D (f21, 5.0, 0); ++ D (f21, 6.0, 0); ++ D (f22, 4.0, 0); ++ D (f22, 5.0, 1); ++ D (f22, 6.0, 1); ++ D (f23, 4.0, 0); ++ D (f23, 5.0, 1); ++ D (f23, 6.0, 1); ++ D (f24, 4.0, 1); ++ D (f24, 5.0, 0); ++ D (f24, 6.0, 0); ++ D (f25, 4.0, 0); ++ D (f25, 5.0, 0); ++ D (f25, 6.0, 1); ++ D (f26, 4.0, 1); ++ D (f26, 5.0, 1); ++ D (f26, 6.0, 0); ++ D (f27, 4.0, 1); ++ D (f27, 5.0, 1); ++ D (f27, 6.0, 0); ++ D (f28, 4.0, 0); ++ D (f28, 5.0, 0); ++ D (f28, 6.0, 1); ++ return 0; ++} +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index b9cd07a60..fca32222f 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -63,6 +63,8 @@ static bool abs_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); + static bool xor_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); ++static bool spaceship_replacement (basic_block, basic_block, ++ edge, edge, gphi *, tree, tree); + static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, + edge, edge, gimple *, + tree, tree); +@@ -361,6 +363,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + cfgchanged = true; + else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; ++ else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) ++ cfgchanged = true; + } + } + +@@ -1753,6 +1757,426 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + ++/* Return true if the only executable statement in BB is a GIMPLE_COND. */ ++ ++static bool ++cond_only_block_p (basic_block bb) ++{ ++ /* BB must have no executable statements. */ ++ gimple_stmt_iterator gsi = gsi_after_labels (bb); ++ if (phi_nodes (bb)) ++ return false; ++ while (!gsi_end_p (gsi)) ++ { ++ gimple *stmt = gsi_stmt (gsi); ++ if (is_gimple_debug (stmt)) ++ ; ++ else if (gimple_code (stmt) == GIMPLE_NOP ++ || gimple_code (stmt) == GIMPLE_PREDICT ++ || gimple_code (stmt) == GIMPLE_COND) ++ ; ++ else ++ return false; ++ gsi_next (&gsi); ++ } ++ return true; ++} ++ ++/* Attempt to optimize (x <=> y) cmp 0 and similar comparisons. ++ For strong ordering <=> try to match something like: ++ : // cond3_bb (== cond2_bb) ++ if (x_4(D) != y_5(D)) ++ goto ; [INV] ++ else ++ goto ; [INV] ++ ++ : // cond_bb ++ if (x_4(D) < y_5(D)) ++ goto ; [INV] ++ else ++ goto ; [INV] ++ ++ : // middle_bb ++ ++ : // phi_bb ++ # iftmp.0_2 = PHI <1(4), 0(2), -1(3)> ++ _1 = iftmp.0_2 == 0; ++ ++ and for partial ordering <=> something like: ++ ++ : // cond3_bb ++ if (a_3(D) == b_5(D)) ++ goto ; [50.00%] ++ else ++ goto ; [50.00%] ++ ++ [local count: 536870913]: // cond2_bb ++ if (a_3(D) < b_5(D)) ++ goto ; [50.00%] ++ else ++ goto ; [50.00%] ++ ++ [local count: 268435456]: // cond_bb ++ if (a_3(D) > b_5(D)) ++ goto ; [50.00%] ++ else ++ goto ; [50.00%] ++ ++ [local count: 134217728]: // middle_bb ++ ++ [local count: 1073741824]: // phi_bb ++ # SR.27_4 = PHI <0(2), -1(3), 1(4), 2(5)> ++ _2 = SR.27_4 > 0; */ ++ ++static bool ++spaceship_replacement (basic_block cond_bb, basic_block middle_bb, ++ edge e0, edge e1, gphi *phi, ++ tree arg0, tree arg1) ++{ ++ if (!INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi))) ++ || TYPE_UNSIGNED (TREE_TYPE (PHI_RESULT (phi))) ++ || !tree_fits_shwi_p (arg0) ++ || !tree_fits_shwi_p (arg1) ++ || !IN_RANGE (tree_to_shwi (arg0), -1, 2) ++ || !IN_RANGE (tree_to_shwi (arg1), -1, 2)) ++ return false; ++ ++ basic_block phi_bb = gimple_bb (phi); ++ gcc_assert (phi_bb == e0->dest && phi_bb == e1->dest); ++ if (!IN_RANGE (EDGE_COUNT (phi_bb->preds), 3, 4)) ++ return false; ++ ++ use_operand_p use_p; ++ gimple *use_stmt; ++ if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi))) ++ return false; ++ if (!single_imm_use (PHI_RESULT (phi), &use_p, &use_stmt)) ++ return false; ++ enum tree_code cmp; ++ tree lhs, rhs; ++ if (gimple_code (use_stmt) == GIMPLE_COND) ++ { ++ cmp = gimple_cond_code (use_stmt); ++ lhs = gimple_cond_lhs (use_stmt); ++ rhs = gimple_cond_rhs (use_stmt); ++ } ++ else if (is_gimple_assign (use_stmt)) ++ { ++ if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) ++ { ++ cmp = gimple_assign_rhs_code (use_stmt); ++ lhs = gimple_assign_rhs1 (use_stmt); ++ rhs = gimple_assign_rhs2 (use_stmt); ++ } ++ else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR) ++ { ++ tree cond = gimple_assign_rhs1 (use_stmt); ++ if (!COMPARISON_CLASS_P (cond)) ++ return false; ++ cmp = TREE_CODE (cond); ++ lhs = TREE_OPERAND (cond, 0); ++ rhs = TREE_OPERAND (cond, 1); ++ } ++ else ++ return false; ++ } ++ else ++ return false; ++ switch (cmp) ++ { ++ case EQ_EXPR: ++ case NE_EXPR: ++ case LT_EXPR: ++ case GT_EXPR: ++ case LE_EXPR: ++ case GE_EXPR: ++ break; ++ default: ++ return false; ++ } ++ if (lhs != PHI_RESULT (phi) ++ || !tree_fits_shwi_p (rhs) ++ || !IN_RANGE (tree_to_shwi (rhs), -1, 1)) ++ return false; ++ ++ if (!empty_block_p (middle_bb)) ++ return false; ++ ++ gcond *cond1 = as_a (last_stmt (cond_bb)); ++ enum tree_code cmp1 = gimple_cond_code (cond1); ++ if (cmp1 != LT_EXPR && cmp1 != GT_EXPR) ++ return false; ++ tree lhs1 = gimple_cond_lhs (cond1); ++ tree rhs1 = gimple_cond_rhs (cond1); ++ /* The optimization may be unsafe due to NaNs. */ ++ if (HONOR_NANS (TREE_TYPE (lhs1))) ++ return false; ++ if (TREE_CODE (lhs1) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs1)) ++ return false; ++ if (TREE_CODE (rhs1) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)) ++ return false; ++ ++ if (!single_pred_p (cond_bb) || !cond_only_block_p (cond_bb)) ++ return false; ++ ++ basic_block cond2_bb = single_pred (cond_bb); ++ if (EDGE_COUNT (cond2_bb->succs) != 2) ++ return false; ++ edge cond2_phi_edge; ++ if (EDGE_SUCC (cond2_bb, 0)->dest == cond_bb) ++ { ++ if (EDGE_SUCC (cond2_bb, 1)->dest != phi_bb) ++ return false; ++ cond2_phi_edge = EDGE_SUCC (cond2_bb, 1); ++ } ++ else if (EDGE_SUCC (cond2_bb, 0)->dest != phi_bb) ++ return false; ++ else ++ cond2_phi_edge = EDGE_SUCC (cond2_bb, 0); ++ tree arg2 = gimple_phi_arg_def (phi, cond2_phi_edge->dest_idx); ++ if (!tree_fits_shwi_p (arg2)) ++ return false; ++ gimple *cond2 = last_stmt (cond2_bb); ++ if (cond2 == NULL || gimple_code (cond2) != GIMPLE_COND) ++ return false; ++ enum tree_code cmp2 = gimple_cond_code (cond2); ++ tree lhs2 = gimple_cond_lhs (cond2); ++ tree rhs2 = gimple_cond_rhs (cond2); ++ if (lhs2 == lhs1) ++ { ++ if (!operand_equal_p (rhs2, rhs1, 0)) ++ return false; ++ } ++ else if (lhs2 == rhs1) ++ { ++ if (rhs2 != lhs1) ++ return false; ++ } ++ else ++ return false; ++ ++ tree arg3 = arg2; ++ basic_block cond3_bb = cond2_bb; ++ edge cond3_phi_edge = cond2_phi_edge; ++ gimple *cond3 = cond2; ++ enum tree_code cmp3 = cmp2; ++ tree lhs3 = lhs2; ++ tree rhs3 = rhs2; ++ if (EDGE_COUNT (phi_bb->preds) == 4) ++ { ++ if (absu_hwi (tree_to_shwi (arg2)) != 1) ++ return false; ++ if (e1->flags & EDGE_TRUE_VALUE) ++ { ++ if (tree_to_shwi (arg0) != 2 ++ || absu_hwi (tree_to_shwi (arg1)) != 1 ++ || wi::to_widest (arg1) == wi::to_widest (arg2)) ++ return false; ++ } ++ else if (tree_to_shwi (arg1) != 2 ++ || absu_hwi (tree_to_shwi (arg0)) != 1 ++ || wi::to_widest (arg0) == wi::to_widest (arg1)) ++ return false; ++ if (cmp2 != LT_EXPR && cmp2 != GT_EXPR) ++ return false; ++ /* if (x < y) goto phi_bb; else fallthru; ++ if (x > y) goto phi_bb; else fallthru; ++ bbx:; ++ phi_bb:; ++ is ok, but if x and y are swapped in one of the comparisons, ++ or the comparisons are the same and operands not swapped, ++ or second goto phi_bb is not the true edge, it is not. */ ++ if ((lhs2 == lhs1) ++ ^ (cmp2 == cmp1) ++ ^ ((e1->flags & EDGE_TRUE_VALUE) != 0)) ++ return false; ++ if ((cond2_phi_edge->flags & EDGE_TRUE_VALUE) == 0) ++ return false; ++ if (!single_pred_p (cond2_bb) || !cond_only_block_p (cond2_bb)) ++ return false; ++ cond3_bb = single_pred (cond2_bb); ++ if (EDGE_COUNT (cond2_bb->succs) != 2) ++ return false; ++ if (EDGE_SUCC (cond3_bb, 0)->dest == cond2_bb) ++ { ++ if (EDGE_SUCC (cond3_bb, 1)->dest != phi_bb) ++ return false; ++ cond3_phi_edge = EDGE_SUCC (cond3_bb, 1); ++ } ++ else if (EDGE_SUCC (cond3_bb, 0)->dest != phi_bb) ++ return false; ++ else ++ cond3_phi_edge = EDGE_SUCC (cond3_bb, 0); ++ arg3 = gimple_phi_arg_def (phi, cond3_phi_edge->dest_idx); ++ cond3 = last_stmt (cond3_bb); ++ if (cond3 == NULL || gimple_code (cond3) != GIMPLE_COND) ++ return false; ++ cmp3 = gimple_cond_code (cond3); ++ lhs3 = gimple_cond_lhs (cond3); ++ rhs3 = gimple_cond_rhs (cond3); ++ if (lhs3 == lhs1) ++ { ++ if (!operand_equal_p (rhs3, rhs1, 0)) ++ return false; ++ } ++ else if (lhs3 == rhs1) ++ { ++ if (rhs3 != lhs1) ++ return false; ++ } ++ else ++ return false; ++ } ++ else if (absu_hwi (tree_to_shwi (arg0)) != 1 ++ || absu_hwi (tree_to_shwi (arg1)) != 1 ++ || wi::to_widest (arg0) == wi::to_widest (arg1)) ++ return false; ++ ++ if (!integer_zerop (arg3) || (cmp3 != EQ_EXPR && cmp3 != NE_EXPR)) ++ return false; ++ if ((cond3_phi_edge->flags & (cmp3 == EQ_EXPR ++ ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) == 0) ++ return false; ++ ++ /* lhs1 one_cmp rhs1 results in PHI_RESULT (phi) of 1. */ ++ enum tree_code one_cmp; ++ if ((cmp1 == LT_EXPR) ++ ^ (!integer_onep ((e1->flags & EDGE_TRUE_VALUE) ? arg1 : arg0))) ++ one_cmp = LT_EXPR; ++ else ++ one_cmp = GT_EXPR; ++ ++ enum tree_code res_cmp; ++ switch (cmp) ++ { ++ case EQ_EXPR: ++ if (integer_zerop (rhs)) ++ res_cmp = EQ_EXPR; ++ else if (integer_minus_onep (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR; ++ else if (integer_onep (rhs)) ++ res_cmp = one_cmp; ++ else ++ return false; ++ break; ++ case NE_EXPR: ++ if (integer_zerop (rhs)) ++ res_cmp = NE_EXPR; ++ else if (integer_minus_onep (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR; ++ else if (integer_onep (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR; ++ else ++ return false; ++ break; ++ case LT_EXPR: ++ if (integer_onep (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR; ++ else if (integer_zerop (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR; ++ else ++ return false; ++ break; ++ case LE_EXPR: ++ if (integer_zerop (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR; ++ else if (integer_minus_onep (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR; ++ else ++ return false; ++ break; ++ case GT_EXPR: ++ if (integer_minus_onep (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR; ++ else if (integer_zerop (rhs)) ++ res_cmp = one_cmp; ++ else ++ return false; ++ break; ++ case GE_EXPR: ++ if (integer_zerop (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR; ++ else if (integer_onep (rhs)) ++ res_cmp = one_cmp; ++ else ++ return false; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (gimple_code (use_stmt) == GIMPLE_COND) ++ { ++ gcond *use_cond = as_a (use_stmt); ++ gimple_cond_set_code (use_cond, res_cmp); ++ gimple_cond_set_lhs (use_cond, lhs1); ++ gimple_cond_set_rhs (use_cond, rhs1); ++ } ++ else if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) ++ { ++ gimple_assign_set_rhs_code (use_stmt, res_cmp); ++ gimple_assign_set_rhs1 (use_stmt, lhs1); ++ gimple_assign_set_rhs2 (use_stmt, rhs1); ++ } ++ else ++ { ++ tree cond = build2 (res_cmp, TREE_TYPE (gimple_assign_rhs1 (use_stmt)), ++ lhs1, rhs1); ++ gimple_assign_set_rhs1 (use_stmt, cond); ++ } ++ update_stmt (use_stmt); ++ ++ if (MAY_HAVE_DEBUG_BIND_STMTS) ++ { ++ use_operand_p use_p; ++ imm_use_iterator iter; ++ bool has_debug_uses = false; ++ FOR_EACH_IMM_USE_FAST (use_p, iter, PHI_RESULT (phi)) ++ { ++ gimple *use_stmt = USE_STMT (use_p); ++ gcc_assert (is_gimple_debug (use_stmt)); ++ has_debug_uses = true; ++ break; ++ } ++ ++ if (has_debug_uses) ++ { ++ /* If there are debug uses, emit something like: ++ # DEBUG D#1 => i_2(D) > j_3(D) ? 1 : -1 ++ # DEBUG D#2 => i_2(D) == j_3(D) ? 0 : D#1 ++ where > stands for the comparison that yielded 1 ++ and replace debug uses of phi result with that D#2. ++ Ignore the value of 2, because if NaNs aren't expected, ++ all floating point numbers should be comparable. */ ++ gimple_stmt_iterator gsi = gsi_after_labels (gimple_bb (phi)); ++ tree type = TREE_TYPE (PHI_RESULT (phi)); ++ tree temp1 = make_node (DEBUG_EXPR_DECL); ++ DECL_ARTIFICIAL (temp1) = 1; ++ TREE_TYPE (temp1) = type; ++ SET_DECL_MODE (temp1, TYPE_MODE (type)); ++ tree t = build2 (one_cmp, boolean_type_node, lhs1, rhs2); ++ t = build3 (COND_EXPR, type, t, build_one_cst (type), ++ build_int_cst (type, -1)); ++ gimple *g = gimple_build_debug_bind (temp1, t, phi); ++ gsi_insert_before (&gsi, g, GSI_SAME_STMT); ++ tree temp2 = make_node (DEBUG_EXPR_DECL); ++ DECL_ARTIFICIAL (temp2) = 1; ++ TREE_TYPE (temp2) = type; ++ SET_DECL_MODE (temp2, TYPE_MODE (type)); ++ t = build2 (EQ_EXPR, boolean_type_node, lhs1, rhs2); ++ t = build3 (COND_EXPR, type, t, build_zero_cst (type), temp1); ++ g = gimple_build_debug_bind (temp2, t, phi); ++ gsi_insert_before (&gsi, g, GSI_SAME_STMT); ++ replace_uses_by (PHI_RESULT (phi), temp2); ++ } ++ } ++ ++ gimple_stmt_iterator psi = gsi_for_stmt (phi); ++ remove_phi_node (&psi, true); ++ ++ return true; ++} ++ + /* Convert + + +-- +2.27.0.windows.1 + diff --git a/0057-Backport-Add-support-for-__builtin_bswap128.patch b/0057-Backport-Add-support-for-__builtin_bswap128.patch new file mode 100644 index 0000000000000000000000000000000000000000..5f003065acff437f388ca0319c34ed86d515d95d --- /dev/null +++ b/0057-Backport-Add-support-for-__builtin_bswap128.patch @@ -0,0 +1,253 @@ +From 96afd5b761a74e9eef40a2e843810c503c669de8 Mon Sep 17 00:00:00 2001 +From: Eric Botcazou +Date: Thu, 28 May 2020 00:31:15 +0200 +Subject: [PATCH 09/35] [Backport] Add support for __builtin_bswap128 + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=fe7ebef7fe4f9acb79658ed9db0749b07efc3105 + +This patch introduces a new builtin named __builtin_bswap128 on targets +where TImode is supported, i.e. 64-bit targets only in practice. The +implementation simply reuses the existing double word path in optab, so +no routine is added to libgcc (which means that you get two calls to +_bswapdi2 in the worst case). + +gcc/ChangeLog: + + * builtin-types.def (BT_UINT128): New primitive type. + (BT_FN_UINT128_UINT128): New function type. + * builtins.def (BUILT_IN_BSWAP128): New GCC builtin. + * doc/extend.texi (__builtin_bswap128): Document it. + * builtins.c (expand_builtin): Deal with BUILT_IN_BSWAP128. + (is_inexpensive_builtin): Likewise. + * fold-const-call.c (fold_const_call_ss): Likewise. + * fold-const.c (tree_call_nonnegative_warnv_p): Likewise. + * tree-ssa-ccp.c (evaluate_stmt): Likewise. + * tree-vect-stmts.c (vect_get_data_ptr_increment): Likewise. + (vectorizable_call): Likewise. + * optabs.c (expand_unop): Always use the double word path for it. + * tree-core.h (enum tree_index): Add TI_UINT128_TYPE. + * tree.h (uint128_type_node): New global type. + * tree.c (build_common_tree_nodes): Build it if TImode is supported. + +gcc/testsuite/ChangeLog: + + * gcc.dg/builtin-bswap-10.c: New test. + * gcc.dg/builtin-bswap-11.c: Likewise. + * gcc.dg/builtin-bswap-12.c: Likewise. + * gcc.target/i386/builtin-bswap-5.c: Likewise. +--- + gcc/builtin-types.def | 4 ++++ + gcc/builtins.c | 2 ++ + gcc/builtins.def | 2 ++ + gcc/doc/extend.texi | 10 ++++++++-- + gcc/fold-const-call.c | 1 + + gcc/fold-const.c | 2 ++ + gcc/optabs.c | 5 ++++- + gcc/tree-core.h | 1 + + gcc/tree-ssa-ccp.c | 1 + + gcc/tree-vect-stmts.c | 5 +++-- + gcc/tree.c | 2 ++ + gcc/tree.h | 1 + + 12 files changed, 31 insertions(+), 5 deletions(-) + +diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def +index c7aa691b2..c46b1bc5c 100644 +--- a/gcc/builtin-types.def ++++ b/gcc/builtin-types.def +@@ -73,6 +73,9 @@ DEF_PRIMITIVE_TYPE (BT_UINT8, unsigned_char_type_node) + DEF_PRIMITIVE_TYPE (BT_UINT16, uint16_type_node) + DEF_PRIMITIVE_TYPE (BT_UINT32, uint32_type_node) + DEF_PRIMITIVE_TYPE (BT_UINT64, uint64_type_node) ++DEF_PRIMITIVE_TYPE (BT_UINT128, uint128_type_node ++ ? uint128_type_node ++ : error_mark_node) + DEF_PRIMITIVE_TYPE (BT_WORD, (*lang_hooks.types.type_for_mode) (word_mode, 1)) + DEF_PRIMITIVE_TYPE (BT_UNWINDWORD, (*lang_hooks.types.type_for_mode) + (targetm.unwind_word_mode (), 1)) +@@ -300,6 +303,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_UINT8_FLOAT, BT_UINT8, BT_FLOAT) + DEF_FUNCTION_TYPE_1 (BT_FN_UINT16_UINT16, BT_UINT16, BT_UINT16) + DEF_FUNCTION_TYPE_1 (BT_FN_UINT32_UINT32, BT_UINT32, BT_UINT32) + DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_UINT64, BT_UINT64, BT_UINT64) ++DEF_FUNCTION_TYPE_1 (BT_FN_UINT128_UINT128, BT_UINT128, BT_UINT128) + DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_FLOAT, BT_UINT64, BT_FLOAT) + DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_INT, BT_BOOL, BT_INT) + DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_PTR, BT_BOOL, BT_PTR) +diff --git a/gcc/builtins.c b/gcc/builtins.c +index 10b6fd3bb..1b1c75cc1 100644 +--- a/gcc/builtins.c ++++ b/gcc/builtins.c +@@ -8015,6 +8015,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, + case BUILT_IN_BSWAP16: + case BUILT_IN_BSWAP32: + case BUILT_IN_BSWAP64: ++ case BUILT_IN_BSWAP128: + target = expand_builtin_bswap (target_mode, exp, target, subtarget); + if (target) + return target; +@@ -11732,6 +11733,7 @@ is_inexpensive_builtin (tree decl) + case BUILT_IN_BSWAP16: + case BUILT_IN_BSWAP32: + case BUILT_IN_BSWAP64: ++ case BUILT_IN_BSWAP128: + case BUILT_IN_CLZ: + case BUILT_IN_CLZIMAX: + case BUILT_IN_CLZL: +diff --git a/gcc/builtins.def b/gcc/builtins.def +index fa8b0641a..ee67ac15d 100644 +--- a/gcc/builtins.def ++++ b/gcc/builtins.def +@@ -834,6 +834,8 @@ DEF_GCC_BUILTIN (BUILT_IN_APPLY_ARGS, "apply_args", BT_FN_PTR_VAR, ATTR_L + DEF_GCC_BUILTIN (BUILT_IN_BSWAP16, "bswap16", BT_FN_UINT16_UINT16, ATTR_CONST_NOTHROW_LEAF_LIST) + DEF_GCC_BUILTIN (BUILT_IN_BSWAP32, "bswap32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LEAF_LIST) + DEF_GCC_BUILTIN (BUILT_IN_BSWAP64, "bswap64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LEAF_LIST) ++DEF_GCC_BUILTIN (BUILT_IN_BSWAP128, "bswap128", BT_FN_UINT128_UINT128, ATTR_CONST_NOTHROW_LEAF_LIST) ++ + DEF_EXT_LIB_BUILTIN (BUILT_IN_CLEAR_CACHE, "__clear_cache", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LEAF_LIST) + /* [trans-mem]: Adjust BUILT_IN_TM_CALLOC if BUILT_IN_CALLOC is changed. */ + DEF_LIB_BUILTIN (BUILT_IN_CALLOC, "calloc", BT_FN_PTR_SIZE_SIZE, ATTR_MALLOC_WARN_UNUSED_RESULT_SIZE_1_2_NOTHROW_LEAF_LIST) +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 9c7345959..a7bd772de 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -13727,14 +13727,20 @@ exactly 8 bits. + + @deftypefn {Built-in Function} uint32_t __builtin_bswap32 (uint32_t x) + Similar to @code{__builtin_bswap16}, except the argument and return types +-are 32 bit. ++are 32-bit. + @end deftypefn + + @deftypefn {Built-in Function} uint64_t __builtin_bswap64 (uint64_t x) + Similar to @code{__builtin_bswap32}, except the argument and return types +-are 64 bit. ++are 64-bit. + @end deftypefn + ++@deftypefn {Built-in Function} uint128_t __builtin_bswap128 (uint128_t x) ++Similar to @code{__builtin_bswap64}, except the argument and return types ++are 128-bit. Only supported on targets when 128-bit types are supported. ++@end deftypefn ++ ++ + @deftypefn {Built-in Function} Pmode __builtin_extend_pointer (void * x) + On targets where the user visible pointer size is smaller than the size + of an actual hardware address this function returns the extended user +diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c +index 6150d7ada..da01759d9 100644 +--- a/gcc/fold-const-call.c ++++ b/gcc/fold-const-call.c +@@ -1032,6 +1032,7 @@ fold_const_call_ss (wide_int *result, combined_fn fn, const wide_int_ref &arg, + case CFN_BUILT_IN_BSWAP16: + case CFN_BUILT_IN_BSWAP32: + case CFN_BUILT_IN_BSWAP64: ++ case CFN_BUILT_IN_BSWAP128: + *result = wide_int::from (arg, precision, TYPE_SIGN (arg_type)).bswap (); + return true; + +diff --git a/gcc/fold-const.c b/gcc/fold-const.c +index 6e635382f..78227a83d 100644 +--- a/gcc/fold-const.c ++++ b/gcc/fold-const.c +@@ -13889,8 +13889,10 @@ tree_call_nonnegative_warnv_p (tree type, combined_fn fn, tree arg0, tree arg1, + CASE_CFN_POPCOUNT: + CASE_CFN_CLZ: + CASE_CFN_CLRSB: ++ case CFN_BUILT_IN_BSWAP16: + case CFN_BUILT_IN_BSWAP32: + case CFN_BUILT_IN_BSWAP64: ++ case CFN_BUILT_IN_BSWAP128: + /* Always true. */ + return true; + +diff --git a/gcc/optabs.c b/gcc/optabs.c +index 049a18ceb..c3751fdf7 100644 +--- a/gcc/optabs.c ++++ b/gcc/optabs.c +@@ -2896,8 +2896,11 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target, + if (temp) + return temp; + ++ /* We do not provide a 128-bit bswap in libgcc so force the use of ++ a double bswap for 64-bit targets. */ + if (GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD +- && optab_handler (unoptab, word_mode) != CODE_FOR_nothing) ++ && (UNITS_PER_WORD == 64 ++ || optab_handler (unoptab, word_mode) != CODE_FOR_nothing)) + { + temp = expand_doubleword_bswap (mode, op0, target); + if (temp) +diff --git a/gcc/tree-core.h b/gcc/tree-core.h +index eb01c2434..058e046aa 100644 +--- a/gcc/tree-core.h ++++ b/gcc/tree-core.h +@@ -600,6 +600,7 @@ enum tree_index { + TI_UINT16_TYPE, + TI_UINT32_TYPE, + TI_UINT64_TYPE, ++ TI_UINT128_TYPE, + + TI_VOID, + +diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c +index 952fd9cd4..dcdf10369 100644 +--- a/gcc/tree-ssa-ccp.c ++++ b/gcc/tree-ssa-ccp.c +@@ -2005,6 +2005,7 @@ evaluate_stmt (gimple *stmt) + case BUILT_IN_BSWAP16: + case BUILT_IN_BSWAP32: + case BUILT_IN_BSWAP64: ++ case BUILT_IN_BSWAP128: + val = get_value_for_expr (gimple_call_arg (stmt, 0), true); + if (val.lattice_val == UNDEFINED) + break; +diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +index b872cfc8d..4636b7ba2 100644 +--- a/gcc/tree-vect-stmts.c ++++ b/gcc/tree-vect-stmts.c +@@ -3085,7 +3085,7 @@ vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type, + return iv_step; + } + +-/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */ ++/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */ + + static bool + vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, +@@ -3454,7 +3454,8 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + else if (modifier == NONE + && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16) + || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32) +- || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64))) ++ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64) ++ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128))) + return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node, + vectype_in, cost_vec); + else +diff --git a/gcc/tree.c b/gcc/tree.c +index 84a440b35..3e6647ae0 100644 +--- a/gcc/tree.c ++++ b/gcc/tree.c +@@ -10394,6 +10394,8 @@ build_common_tree_nodes (bool signed_char) + uint16_type_node = make_or_reuse_type (16, 1); + uint32_type_node = make_or_reuse_type (32, 1); + uint64_type_node = make_or_reuse_type (64, 1); ++ if (targetm.scalar_mode_supported_p (TImode)) ++ uint128_type_node = make_or_reuse_type (128, 1); + + /* Decimal float types. */ + if (targetm.decimal_float_supported_p ()) +diff --git a/gcc/tree.h b/gcc/tree.h +index 328a2d5d2..bddc6e528 100644 +--- a/gcc/tree.h ++++ b/gcc/tree.h +@@ -4035,6 +4035,7 @@ tree_strip_any_location_wrapper (tree exp) + #define uint16_type_node global_trees[TI_UINT16_TYPE] + #define uint32_type_node global_trees[TI_UINT32_TYPE] + #define uint64_type_node global_trees[TI_UINT64_TYPE] ++#define uint128_type_node global_trees[TI_UINT128_TYPE] + + #define void_node global_trees[TI_VOID] + +-- +2.27.0.windows.1 + diff --git a/0058-Backport-tree-optimization-95393-fold-MIN-MAX_EXPR-g.patch b/0058-Backport-tree-optimization-95393-fold-MIN-MAX_EXPR-g.patch new file mode 100644 index 0000000000000000000000000000000000000000..0edbcb0f799ed49b926628525af71672a7b41520 --- /dev/null +++ b/0058-Backport-tree-optimization-95393-fold-MIN-MAX_EXPR-g.patch @@ -0,0 +1,113 @@ +From b9ac0cc69aab3c8d662d5b0a9ed43d971c13ac70 Mon Sep 17 00:00:00 2001 +From: Richard Biener +Date: Fri, 29 May 2020 09:25:53 +0200 +Subject: [PATCH 10/35] [Backport] tree-optimization/95393 - fold MIN/MAX_EXPR + generated by phiopt + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=07852a81f58532c63a57631d7c3757fc6bcea17d + +This makes sure to fold generated stmts so they do not survive +until RTL expansion and cause awkward code generation. + +2020-05-29 Richard Biener + + PR tree-optimization/95393 + * tree-ssa-phiopt.c (minmax_replacement): Use gimple_build + to build the min/max expression so we simplify cases like + MAX(0, s) immediately. + + * gcc.dg/tree-ssa/phi-opt-21.c: New testcase. + * g++.dg/vect/slp-pr87105.cc: Adjust. +--- + gcc/testsuite/g++.dg/vect/slp-pr87105.cc | 2 +- + gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c | 15 +++++++++++++ + gcc/tree-ssa-phiopt.c | 25 +++++++++++----------- + 3 files changed, 29 insertions(+), 13 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c + +diff --git a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc +index 5518f319b..d07b1cd46 100644 +--- a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc ++++ b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc +@@ -102,4 +102,4 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept { + // { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } } + // It's a bit awkward to detect that all stores were vectorized but the + // following more or less does the trick +-// { dg-final { scan-tree-dump "vect_iftmp\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } } ++// { dg-final { scan-tree-dump "vect_\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } } +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c +new file mode 100644 +index 000000000..9f3d56957 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-phiopt4-details" } */ ++ ++int f(unsigned s) ++{ ++ int i; ++ for (i = 0; i < s; ++i) ++ ; ++ ++ return i; ++} ++ ++/* { dg-final { scan-tree-dump "converted to straightline code" "phiopt4" } } */ ++/* Make sure we fold the detected MAX. */ ++/* { dg-final { scan-tree-dump-not "MAX" "phiopt4" } } */ +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index fca32222f..269eda21c 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see + #include "tree-inline.h" + #include "case-cfn-macros.h" + #include "tree-eh.h" ++#include "gimple-fold.h" + #include "internal-fn.h" + + static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); +@@ -1414,7 +1415,6 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, + { + tree result, type, rhs; + gcond *cond; +- gassign *new_stmt; + edge true_edge, false_edge; + enum tree_code cmp, minmax, ass_code; + tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false; +@@ -1738,19 +1738,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, + gsi_move_before (&gsi_from, &gsi); + } + +- /* Create an SSA var to hold the min/max result. If we're the only +- things setting the target PHI, then we can clone the PHI +- variable. Otherwise we must create a new one. */ +- result = PHI_RESULT (phi); +- if (EDGE_COUNT (gimple_bb (phi)->preds) == 2) +- result = duplicate_ssa_name (result, NULL); +- else +- result = make_ssa_name (TREE_TYPE (result)); +- + /* Emit the statement to compute min/max. */ +- new_stmt = gimple_build_assign (result, minmax, arg0, arg1); ++ gimple_seq stmts = NULL; ++ tree phi_result = PHI_RESULT (phi); ++ result = gimple_build (&stmts, minmax, TREE_TYPE (phi_result), arg0, arg1); ++ /* Duplicate range info if we're the only things setting the target PHI. */ ++ if (!gimple_seq_empty_p (stmts) ++ && EDGE_COUNT (gimple_bb (phi)->preds) == 2 ++ && !POINTER_TYPE_P (TREE_TYPE (phi_result)) ++ && SSA_NAME_RANGE_INFO (phi_result)) ++ duplicate_ssa_name_range_info (result, SSA_NAME_RANGE_TYPE (phi_result), ++ SSA_NAME_RANGE_INFO (phi_result)); ++ + gsi = gsi_last_bb (cond_bb); +- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); ++ gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT); + + replace_phi_edge_with_variable (cond_bb, e1, phi, result); + +-- +2.27.0.windows.1 + diff --git a/0059-Backport-Add-a-couple-of-A-CST1-CST2-match-and-simpl.patch b/0059-Backport-Add-a-couple-of-A-CST1-CST2-match-and-simpl.patch new file mode 100644 index 0000000000000000000000000000000000000000..a16b3361bc4c01101c6a9f9480c428d132b33376 --- /dev/null +++ b/0059-Backport-Add-a-couple-of-A-CST1-CST2-match-and-simpl.patch @@ -0,0 +1,91 @@ +From 9f3a8c600abe16f172b36d8113862e8f7aea940c Mon Sep 17 00:00:00 2001 +From: Andrew Pinski +Date: Sun, 16 May 2021 13:07:06 -0700 +Subject: [PATCH 11/35] [Backport] Add a couple of A?CST1:CST2 match and + simplify optimizations + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b6bdd7a4cb41ee057f2d064fffcb00f23ce6b497 + +Instead of some of the more manual optimizations inside phi-opt, +it would be good idea to do a lot of the heavy lifting inside match +and simplify instead. In the process, this moves the three simple +A?CST1:CST2 (where CST1 or CST2 is zero) simplifications. + +OK? Boostrapped and tested on x86_64-linux-gnu with no regressions. + +Differences from V1: +* Use bit_xor 1 instead of bit_not to fix the problem with boolean types +which are not 1 bit precision. + +Thanks, +Andrew Pinski + +gcc: + * match.pd (A?CST1:CST2): Add simplifcations for A?0:+-1, A?+-1:0, + A?POW2:0 and A?0:POW2. +--- + gcc/match.pd | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 48 insertions(+) + +diff --git a/gcc/match.pd b/gcc/match.pd +index 660d5c268..032830b0d 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3334,6 +3334,54 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (if (cst1 && cst2) + (vec_cond @0 { cst1; } { cst2; }))))) + ++/* A few simplifications of "a ? CST1 : CST2". */ ++/* NOTE: Only do this on gimple as the if-chain-to-switch ++ optimization depends on the gimple to have if statements in it. */ ++#if GIMPLE ++(simplify ++ (cond @0 INTEGER_CST@1 INTEGER_CST@2) ++ (switch ++ (if (integer_zerop (@2)) ++ (switch ++ /* a ? 1 : 0 -> a if 0 and 1 are integral types. */ ++ (if (integer_onep (@1)) ++ (convert (convert:boolean_type_node @0))) ++ /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */ ++ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1)) ++ (with { ++ tree shift = build_int_cst (integer_type_node, tree_log2 (@1)); ++ } ++ (lshift (convert (convert:boolean_type_node @0)) { shift; }))) ++ /* a ? -1 : 0 -> -a. No need to check the TYPE_PRECISION not being 1 ++ here as the powerof2cst case above will handle that case correctly. */ ++ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1)) ++ (negate (convert (convert:boolean_type_node @0)))))) ++ (if (integer_zerop (@1)) ++ (with { ++ tree booltrue = constant_boolean_node (true, boolean_type_node); ++ } ++ (switch ++ /* a ? 0 : 1 -> !a. */ ++ (if (integer_onep (@2)) ++ (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))) ++ /* a ? powerof2cst : 0 -> (!a) << (log2(powerof2cst)) */ ++ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2)) ++ (with { ++ tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); ++ } ++ (lshift (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )) ++ { shift; }))) ++ /* a ? -1 : 0 -> -(!a). No need to check the TYPE_PRECISION not being 1 ++ here as the powerof2cst case above will handle that case correctly. */ ++ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2)) ++ (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )))) ++ ) ++ ) ++ ) ++ ) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +-- +2.27.0.windows.1 + diff --git a/0060-Backport-Optimize-x-0-y-y-to-x-31-y-in-match.pd.patch b/0060-Backport-Optimize-x-0-y-y-to-x-31-y-in-match.pd.patch new file mode 100644 index 0000000000000000000000000000000000000000..80d602ae22e52fd24f9e1e71b4cd3b15a97a7bb9 --- /dev/null +++ b/0060-Backport-Optimize-x-0-y-y-to-x-31-y-in-match.pd.patch @@ -0,0 +1,155 @@ +From 4352b952ba24c413697fcfc191d06165a8a31ced Mon Sep 17 00:00:00 2001 +From: Andrew Pinski +Date: Sat, 22 May 2021 19:49:50 +0000 +Subject: [PATCH 12/35] [Backport] Optimize x < 0 ? ~y : y to (x >> 31) ^ y in + match.pd + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=1fd76b24306ed4df4cf9e797d900699ed59ce7f7 + +This copies the optimization that is done in phiopt for +"x < 0 ? ~y : y to (x >> 31) ^ y" into match.pd. The code +for phiopt is kept around until phiopt uses match.pd (which +I am working towards). + +Note the original testcase is now optimized early on and I added a +new testcase to optimize during phiopt. + +OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. + +Thanks, +Andrew Pinski + +Differences from v1: +V2: Add check for integeral type to make sure vector types are not done. + +gcc: + * match.pd (x < 0 ? ~y : y): New patterns. + +gcc/testsuite: + * gcc.dg/tree-ssa/pr96928.c: Update test for slightly different IR. + * gcc.dg/tree-ssa/pr96928-1.c: New testcase. +--- + gcc/match.pd | 32 +++++++++++++++ + gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c | 48 +++++++++++++++++++++++ + gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 7 +++- + 3 files changed, 85 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index 032830b0d..5899eea95 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4390,6 +4390,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (cmp (bit_and@2 @0 integer_pow2p@1) @1) + (icmp @2 { build_zero_cst (TREE_TYPE (@0)); }))) + ++(for cmp (ge lt) ++/* x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */ ++/* x >= 0 ? ~y : y into ~((x >> (prec-1)) ^ y). */ ++ (simplify ++ (cond (cmp @0 integer_zerop) (bit_not @1) @1) ++ (if (INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) ++ && !TYPE_UNSIGNED (TREE_TYPE (@0)) ++ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type)) ++ (with ++ { ++ tree shifter = build_int_cst (integer_type_node, TYPE_PRECISION (type) - 1); ++ } ++ (if (cmp == LT_EXPR) ++ (bit_xor (convert (rshift @0 {shifter;})) @1) ++ (bit_not (bit_xor (convert (rshift @0 {shifter;})) @1)))))) ++/* x < 0 ? y : ~y into ~((x >> (prec-1)) ^ y). */ ++/* x >= 0 ? y : ~y into (x >> (prec-1)) ^ y. */ ++ (simplify ++ (cond (cmp @0 integer_zerop) @1 (bit_not @1)) ++ (if (INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) ++ && !TYPE_UNSIGNED (TREE_TYPE (@0)) ++ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type)) ++ (with ++ { ++ tree shifter = build_int_cst (integer_type_node, TYPE_PRECISION (type) - 1); ++ } ++ (if (cmp == GE_EXPR) ++ (bit_xor (convert (rshift @0 {shifter;})) @1) ++ (bit_not (bit_xor (convert (rshift @0 {shifter;})) @1))))))) ++ + /* If we have (A & C) != 0 ? D : 0 where C and D are powers of 2, + convert this into a shift followed by ANDing with D. */ + (simplify +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c +new file mode 100644 +index 000000000..a2770e5e8 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c +@@ -0,0 +1,48 @@ ++/* PR tree-optimization/96928 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-phiopt2" } */ ++/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */ ++ ++int ++foo (int a) ++{ ++ if (a < 0) ++ return ~a; ++ return a; ++} ++ ++int ++bar (int a, int b) ++{ ++ if (a < 0) ++ return ~b; ++ return b; ++} ++ ++unsigned ++baz (int a, unsigned int b) ++{ ++ if (a < 0) ++ return ~b; ++ return b; ++} ++ ++unsigned ++qux (int a, unsigned int c) ++{ ++ if (a >= 0) ++ return ~c; ++ return c; ++} ++ ++int ++corge (int a, int b) ++{ ++ if (a >= 0) ++ return b; ++ return ~b; ++} +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c +index 209135726..e8fd82fc2 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c +@@ -1,8 +1,11 @@ + /* PR tree-optimization/96928 */ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdump-tree-phiopt2" } */ ++/* { dg-options "-O2 -fdump-tree-phiopt2 -fdump-tree-optimized" } */ + /* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */ +-/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */ ++/* The following check is done at optimized because a ^ (~b) is rewritten as ~(a^b) ++ and in the case of match.pd optimizing these ?:, the ~ is moved out already ++ by the time we get to phiopt2. */ ++/* { dg-final { scan-tree-dump-times "\\\^ c_\[0-9]*\\\(D\\\);" 1 "optimized" } } */ + /* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */ + /* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */ + /* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */ +-- +2.27.0.windows.1 + diff --git a/0061-Backport-Replace-conditional_replacement-with-match-.patch b/0061-Backport-Replace-conditional_replacement-with-match-.patch new file mode 100644 index 0000000000000000000000000000000000000000..12808be89879912c40781524f646f5589b47bf87 --- /dev/null +++ b/0061-Backport-Replace-conditional_replacement-with-match-.patch @@ -0,0 +1,249 @@ +From 406071e8c1838c824f06c35ef3cf9419aa543e6e Mon Sep 17 00:00:00 2001 +From: Andrew Pinski +Date: Tue, 1 Jun 2021 01:05:09 +0000 +Subject: [PATCH 13/35] [Backport] Replace conditional_replacement with match + and simplify + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=9f55df63154a39d67ef5b24def7044bf87300831 + +This is the first of series of patches to simplify phi-opt +to use match and simplify in many cases. This simplification +will more things to optimize. + +This is what Richard requested in +https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571197.html +and I think it is the right thing to do too. + +OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. + +gcc/ChangeLog: + + PR tree-optimization/25290 + * tree-ssa-phiopt.c (match_simplify_replacement): + New function. + (tree_ssa_phiopt_worker): Use match_simplify_replacement. + (two_value_replacement): Change the comment about + conditional_replacement. + (conditional_replacement): Delete. +--- + gcc/tree-ssa-phiopt.c | 144 ++++++++++++------------------------------ + 1 file changed, 39 insertions(+), 105 deletions(-) + +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 269eda21c..9fa6363b6 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -52,8 +52,8 @@ along with GCC; see the file COPYING3. If not see + static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); + static bool two_value_replacement (basic_block, basic_block, edge, gphi *, + tree, tree); +-static bool conditional_replacement (basic_block, basic_block, +- edge, edge, gphi *, tree, tree); ++static bool match_simplify_replacement (basic_block, basic_block, ++ edge, edge, gphi *, tree, tree); + static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree, + gimple *); + static int value_replacement (basic_block, basic_block, +@@ -349,8 +349,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p +- && conditional_replacement (bb, bb1, e1, e2, phi, +- arg0, arg1)) ++ && match_simplify_replacement (bb, bb1, e1, e2, phi, ++ arg0, arg1)) + cfgchanged = true; + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +@@ -662,7 +662,7 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb, + } + + /* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to +- conditional_replacement. */ ++ match_simplify_replacement. */ + if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE + && (integer_zerop (arg0) + || integer_zerop (arg1) +@@ -763,137 +763,71 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + +-/* The function conditional_replacement does the main work of doing the +- conditional replacement. Return true if the replacement is done. ++/* The function match_simplify_replacement does the main work of doing the ++ replacement using match and simplify. Return true if the replacement is done. + Otherwise return false. + BB is the basic block where the replacement is going to be done on. ARG0 + is argument 0 from PHI. Likewise for ARG1. */ + + static bool +-conditional_replacement (basic_block cond_bb, basic_block middle_bb, +- edge e0, edge e1, gphi *phi, +- tree arg0, tree arg1) ++match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, ++ edge e0, edge e1, gphi *phi, ++ tree arg0, tree arg1) + { +- tree result; + gimple *stmt; +- gassign *new_stmt; + tree cond; + gimple_stmt_iterator gsi; + edge true_edge, false_edge; +- tree new_var, new_var2; +- bool neg = false; +- int shift = 0; +- tree nonzero_arg; +- +- /* FIXME: Gimplification of complex type is too hard for now. */ +- /* We aren't prepared to handle vectors either (and it is a question +- if it would be worthwhile anyway). */ +- if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0)) +- || POINTER_TYPE_P (TREE_TYPE (arg0))) +- || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1)) +- || POINTER_TYPE_P (TREE_TYPE (arg1)))) +- return false; ++ gimple_seq seq = NULL; ++ tree result; + +- /* The PHI arguments have the constants 0 and 1, or 0 and -1 or +- 0 and (1 << cst), then convert it to the conditional. */ +- if (integer_zerop (arg0)) +- nonzero_arg = arg1; +- else if (integer_zerop (arg1)) +- nonzero_arg = arg0; +- else +- return false; +- if (integer_pow2p (nonzero_arg)) +- { +- shift = tree_log2 (nonzero_arg); +- if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg))) +- return false; +- } +- else if (integer_all_onesp (nonzero_arg)) +- neg = true; +- else ++ if (!empty_block_p (middle_bb)) + return false; + +- if (!empty_block_p (middle_bb)) ++ /* Special case A ? B : B as this will always simplify to B. */ ++ if (operand_equal_for_phi_arg_p (arg0, arg1)) + return false; + +- /* At this point we know we have a GIMPLE_COND with two successors. ++ /* At this point we know we have a GIMPLE_COND with two successors. + One successor is BB, the other successor is an empty block which + falls through into BB. + +- There is a single PHI node at the join point (BB) and its arguments +- are constants (0, 1) or (0, -1) or (0, (1 << shift)). +- +- So, given the condition COND, and the two PHI arguments, we can +- rewrite this PHI into non-branching code: ++ There is a single PHI node at the join point (BB). + +- dest = (COND) or dest = COND' or dest = (COND) << shift +- +- We use the condition as-is if the argument associated with the +- true edge has the value one or the argument associated with the +- false edge as the value zero. Note that those conditions are not +- the same since only one of the outgoing edges from the GIMPLE_COND +- will directly reach BB and thus be associated with an argument. */ ++ So, given the condition COND, and the two PHI arguments, match and simplify ++ can happen on (COND) ? arg0 : arg1. */ + + stmt = last_stmt (cond_bb); +- result = PHI_RESULT (phi); + + /* To handle special cases like floating point comparison, it is easier and + less error-prone to build a tree and gimplify it on the fly though it is +- less efficient. */ +- cond = fold_build2_loc (gimple_location (stmt), +- gimple_cond_code (stmt), boolean_type_node, +- gimple_cond_lhs (stmt), gimple_cond_rhs (stmt)); ++ less efficient. ++ Don't use fold_build2 here as that might create (bool)a instead of just ++ "a != 0". */ ++ cond = build2_loc (gimple_location (stmt), ++ gimple_cond_code (stmt), boolean_type_node, ++ gimple_cond_lhs (stmt), gimple_cond_rhs (stmt)); + + /* We need to know which is the true edge and which is the false + edge so that we know when to invert the condition below. */ + extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); +- if ((e0 == true_edge && integer_zerop (arg0)) +- || (e0 == false_edge && !integer_zerop (arg0)) +- || (e1 == true_edge && integer_zerop (arg1)) +- || (e1 == false_edge && !integer_zerop (arg1))) +- cond = fold_build1_loc (gimple_location (stmt), +- TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); +- +- if (neg) +- { +- cond = fold_convert_loc (gimple_location (stmt), +- TREE_TYPE (result), cond); +- cond = fold_build1_loc (gimple_location (stmt), +- NEGATE_EXPR, TREE_TYPE (cond), cond); +- } +- else if (shift) +- { +- cond = fold_convert_loc (gimple_location (stmt), +- TREE_TYPE (result), cond); +- cond = fold_build2_loc (gimple_location (stmt), +- LSHIFT_EXPR, TREE_TYPE (cond), cond, +- build_int_cst (integer_type_node, shift)); +- } ++ if (e1 == true_edge || e0 == false_edge) ++ std::swap (arg0, arg1); + +- /* Insert our new statements at the end of conditional block before the +- COND_STMT. */ +- gsi = gsi_for_stmt (stmt); +- new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true, +- GSI_SAME_STMT); ++ tree type = TREE_TYPE (gimple_phi_result (phi)); ++ result = gimple_simplify (COND_EXPR, type, ++ cond, ++ arg0, arg1, ++ &seq, NULL); ++ if (!result) ++ return false; + +- if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var))) +- { +- location_t locus_0, locus_1; ++ gsi = gsi_last_bb (cond_bb); + +- new_var2 = make_ssa_name (TREE_TYPE (result)); +- new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var); +- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); +- new_var = new_var2; +- +- /* Set the locus to the first argument, unless is doesn't have one. */ +- locus_0 = gimple_phi_arg_location (phi, 0); +- locus_1 = gimple_phi_arg_location (phi, 1); +- if (locus_0 == UNKNOWN_LOCATION) +- locus_0 = locus_1; +- gimple_set_location (new_stmt, locus_0); +- } ++ if (seq) ++ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); + +- replace_phi_edge_with_variable (cond_bb, e1, phi, new_var); ++ replace_phi_edge_with_variable (cond_bb, e1, phi, result); + + /* Note that we optimized this PHI. */ + return true; +@@ -3905,7 +3839,7 @@ gate_hoist_loads (void) + Conditional Replacement + ----------------------- + +- This transformation, implemented in conditional_replacement, ++ This transformation, implemented in match_simplify_replacement, + replaces + + bb0: +-- +2.27.0.windows.1 + diff --git a/0062-Backport-Allow-match-and-simplified-phiopt-to-run-in.patch b/0062-Backport-Allow-match-and-simplified-phiopt-to-run-in.patch new file mode 100644 index 0000000000000000000000000000000000000000..bf1b0cd708e6f476caecbf059002b166292ee59a --- /dev/null +++ b/0062-Backport-Allow-match-and-simplified-phiopt-to-run-in.patch @@ -0,0 +1,174 @@ +From fabbe6ccc798d3cb097c6371b4d53cd6dfde6c7c Mon Sep 17 00:00:00 2001 +From: Andrew Pinski +Date: Fri, 11 Jun 2021 13:21:34 -0700 +Subject: [PATCH 14/35] [Backport] Allow match-and-simplified phiopt to run in + early phiopt + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=cd48e550d1dc58307ab1c0ab490745673f748ccc + +To move a few things more to match-and-simplify from phiopt, +we need to allow match_simplify_replacement to run in early +phiopt. To do this we add a replacement for gimple_simplify +that is explictly for phiopt. + +OK? Bootstrapped and tested on x86_64-linux-gnu with no +regressions. + +gcc/ChangeLog: + + * tree-ssa-phiopt.c (match_simplify_replacement): + Add early_p argument. Call gimple_simplify_phiopt + instead of gimple_simplify. + (tree_ssa_phiopt_worker): Update call to + match_simplify_replacement and allow unconditionally. + (phiopt_early_allow): New function. + (gimple_simplify_phiopt): New function. +--- + gcc/tree-ssa-phiopt.c | 89 ++++++++++++++++++++++++++++++++++--------- + 1 file changed, 70 insertions(+), 19 deletions(-) + +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 9fa6363b6..92aeb8415 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -48,12 +48,13 @@ along with GCC; see the file COPYING3. If not see + #include "tree-eh.h" + #include "gimple-fold.h" + #include "internal-fn.h" ++#include "gimple-match.h" + + static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); + static bool two_value_replacement (basic_block, basic_block, edge, gphi *, + tree, tree); + static bool match_simplify_replacement (basic_block, basic_block, +- edge, edge, gphi *, tree, tree); ++ edge, edge, gphi *, tree, tree, bool); + static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree, + gimple *); + static int value_replacement (basic_block, basic_block, +@@ -348,9 +349,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + /* Do the replacement of conditional if it can be done. */ + if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1)) + cfgchanged = true; +- else if (!early_p +- && match_simplify_replacement (bb, bb1, e1, e2, phi, +- arg0, arg1)) ++ else if (match_simplify_replacement (bb, bb1, e1, e2, phi, ++ arg0, arg1, ++ early_p)) + cfgchanged = true; + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +@@ -763,6 +764,67 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + ++/* Return TRUE if CODE should be allowed during early phiopt. ++ Currently this is to allow MIN/MAX and ABS/NEGATE. */ ++static bool ++phiopt_early_allow (enum tree_code code) ++{ ++ switch (code) ++ { ++ case MIN_EXPR: ++ case MAX_EXPR: ++ case ABS_EXPR: ++ case ABSU_EXPR: ++ case NEGATE_EXPR: ++ case SSA_NAME: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++/* gimple_simplify_phiopt is like gimple_simplify but designed for PHIOPT. ++ Return NULL if nothing can be simplified or the resulting simplified value ++ with parts pushed if EARLY_P was true. Also rejects non allowed tree code ++ if EARLY_P is set. ++ Takes the comparison from COMP_STMT and two args, ARG0 and ARG1 and tries ++ to simplify CMP ? ARG0 : ARG1. */ ++static tree ++gimple_simplify_phiopt (bool early_p, tree type, gimple *comp_stmt, ++ tree arg0, tree arg1, ++ gimple_seq *seq) ++{ ++ tree result; ++ enum tree_code comp_code = gimple_cond_code (comp_stmt); ++ location_t loc = gimple_location (comp_stmt); ++ tree cmp0 = gimple_cond_lhs (comp_stmt); ++ tree cmp1 = gimple_cond_rhs (comp_stmt); ++ /* To handle special cases like floating point comparison, it is easier and ++ less error-prone to build a tree and gimplify it on the fly though it is ++ less efficient. ++ Don't use fold_build2 here as that might create (bool)a instead of just ++ "a != 0". */ ++ tree cond = build2_loc (loc, comp_code, boolean_type_node, ++ cmp0, cmp1); ++ gimple_match_op op (gimple_match_cond::UNCOND, ++ COND_EXPR, type, cond, arg0, arg1); ++ ++ if (op.resimplify (early_p ? NULL : seq, follow_all_ssa_edges)) ++ { ++ /* Early we want only to allow some generated tree codes. */ ++ if (!early_p ++ || op.code.is_tree_code () ++ || phiopt_early_allow ((tree_code)op.code)) ++ { ++ result = maybe_push_res_to_seq (&op, seq); ++ if (result) ++ return result; ++ } ++ } ++ ++ return NULL; ++} ++ + /* The function match_simplify_replacement does the main work of doing the + replacement using match and simplify. Return true if the replacement is done. + Otherwise return false. +@@ -772,10 +834,9 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb, + static bool + match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + edge e0, edge e1, gphi *phi, +- tree arg0, tree arg1) ++ tree arg0, tree arg1, bool early_p) + { + gimple *stmt; +- tree cond; + gimple_stmt_iterator gsi; + edge true_edge, false_edge; + gimple_seq seq = NULL; +@@ -799,15 +860,6 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + + stmt = last_stmt (cond_bb); + +- /* To handle special cases like floating point comparison, it is easier and +- less error-prone to build a tree and gimplify it on the fly though it is +- less efficient. +- Don't use fold_build2 here as that might create (bool)a instead of just +- "a != 0". */ +- cond = build2_loc (gimple_location (stmt), +- gimple_cond_code (stmt), boolean_type_node, +- gimple_cond_lhs (stmt), gimple_cond_rhs (stmt)); +- + /* We need to know which is the true edge and which is the false + edge so that we know when to invert the condition below. */ + extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); +@@ -815,10 +867,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + std::swap (arg0, arg1); + + tree type = TREE_TYPE (gimple_phi_result (phi)); +- result = gimple_simplify (COND_EXPR, type, +- cond, +- arg0, arg1, +- &seq, NULL); ++ result = gimple_simplify_phiopt (early_p, type, stmt, ++ arg0, arg1, ++ &seq); + if (!result) + return false; + +-- +2.27.0.windows.1 + diff --git a/0063-Backport-Improve-match_simplify_replacement-in-phi-o.patch b/0063-Backport-Improve-match_simplify_replacement-in-phi-o.patch new file mode 100644 index 0000000000000000000000000000000000000000..8440622e3424a9b93e6bba5cee22b684499e42fd --- /dev/null +++ b/0063-Backport-Improve-match_simplify_replacement-in-phi-o.patch @@ -0,0 +1,259 @@ +From d212d216be0752370dbe7bc63bd75b3a9249e0b5 Mon Sep 17 00:00:00 2001 +From: Andrew Pinski +Date: Tue, 1 Jun 2021 06:48:05 +0000 +Subject: [PATCH 15/35] [Backport] Improve match_simplify_replacement in + phi-opt + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=c4574d23cb07340918793a5a98ae7bb2988b3791 + +This improves match_simplify_replace in phi-opt to handle the +case where there is one cheap (non-call) preparation statement in the +middle basic block similar to xor_replacement and others. +This allows to remove xor_replacement which it does too. + +OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. + +Thanks, +Andrew Pinski + +Changes since v1: +v3 - Just minor changes to using gimple_assign_lhs +instead of gimple_lhs and fixing a comment. +v2 - change the check on the preparation statement to +allow only assignments and no calls and only assignments +that feed into the phi. + +gcc/ChangeLog: + + PR tree-optimization/25290 + * tree-ssa-phiopt.c (xor_replacement): Delete. + (tree_ssa_phiopt_worker): Delete use of xor_replacement. + (match_simplify_replacement): Allow one cheap preparation + statement that can be moved to before the if. + +gcc/testsuite/ChangeLog: + + * gcc.dg/tree-ssa/pr96928-1.c: Fix testcase for now that ~ + happens on the outside of the bit_xor. +--- + gcc/tree-ssa-phiopt.c | 164 ++++++++++++++---------------------------- + 1 file changed, 52 insertions(+), 112 deletions(-) + +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 92aeb8415..51a2d3684 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see + #include "cfghooks.h" + #include "tree-pass.h" + #include "ssa.h" ++#include "tree-ssa.h" + #include "optabs-tree.h" + #include "insn-config.h" + #include "gimple-pretty-print.h" +@@ -63,8 +64,6 @@ static bool minmax_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); + static bool abs_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); +-static bool xor_replacement (basic_block, basic_block, +- edge, edge, gimple *, tree, tree); + static bool spaceship_replacement (basic_block, basic_block, + edge, edge, gphi *, tree, tree); + static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, +@@ -355,9 +354,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + cfgchanged = true; + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +- else if (!early_p +- && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) +- cfgchanged = true; + else if (!early_p + && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, + e2, phi, arg0, +@@ -841,14 +837,51 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + edge true_edge, false_edge; + gimple_seq seq = NULL; + tree result; +- +- if (!empty_block_p (middle_bb)) +- return false; ++ gimple *stmt_to_move = NULL; + + /* Special case A ? B : B as this will always simplify to B. */ + if (operand_equal_for_phi_arg_p (arg0, arg1)) + return false; + ++ /* If the basic block only has a cheap preparation statement, ++ allow it and move it once the transformation is done. */ ++ if (!empty_block_p (middle_bb)) ++ { ++ stmt_to_move = last_and_only_stmt (middle_bb); ++ if (!stmt_to_move) ++ return false; ++ ++ if (gimple_vuse (stmt_to_move)) ++ return false; ++ ++ if (gimple_could_trap_p (stmt_to_move) ++ || gimple_has_side_effects (stmt_to_move)) ++ return false; ++ ++ if (gimple_uses_undefined_value_p (stmt_to_move)) ++ return false; ++ ++ /* Allow assignments and not no calls. ++ As const calls don't match any of the above, yet they could ++ still have some side-effects - they could contain ++ gimple_could_trap_p statements, like floating point ++ exceptions or integer division by zero. See PR70586. ++ FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p ++ should handle this. */ ++ if (!is_gimple_assign (stmt_to_move)) ++ return false; ++ ++ tree lhs = gimple_assign_lhs (stmt_to_move); ++ gimple *use_stmt; ++ use_operand_p use_p; ++ ++ /* Allow only a statement which feeds into the phi. */ ++ if (!lhs || TREE_CODE (lhs) != SSA_NAME ++ || !single_imm_use (lhs, &use_p, &use_stmt) ++ || use_stmt != phi) ++ return false; ++ } ++ + /* At this point we know we have a GIMPLE_COND with two successors. + One successor is BB, the other successor is an empty block which + falls through into BB. +@@ -874,7 +907,17 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + return false; + + gsi = gsi_last_bb (cond_bb); +- ++ if (stmt_to_move) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "statement un-sinked:\n"); ++ print_gimple_stmt (dump_file, stmt_to_move, 0, ++ TDF_VOPS|TDF_MEMSYMS); ++ } ++ gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt_to_move); ++ gsi_move_before (&gsi1, &gsi); ++ } + if (seq) + gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); + +@@ -2474,109 +2517,6 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + +-/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */ +- +-static bool +-xor_replacement (basic_block cond_bb, basic_block middle_bb, +- edge e0 ATTRIBUTE_UNUSED, edge e1, +- gimple *phi, tree arg0, tree arg1) +-{ +- if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1))) +- return false; +- +- /* OTHER_BLOCK must have only one executable statement which must have the +- form arg0 = ~arg1 or arg1 = ~arg0. */ +- +- gimple *assign = last_and_only_stmt (middle_bb); +- /* If we did not find the proper one's complement assignment, then we cannot +- optimize. */ +- if (assign == NULL) +- return false; +- +- /* If we got here, then we have found the only executable statement +- in OTHER_BLOCK. If it is anything other than arg = ~arg1 or +- arg1 = ~arg0, then we cannot optimize. */ +- if (!is_gimple_assign (assign)) +- return false; +- +- if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR) +- return false; +- +- tree lhs = gimple_assign_lhs (assign); +- tree rhs = gimple_assign_rhs1 (assign); +- +- /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ +- if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0)) +- return false; +- +- gimple *cond = last_stmt (cond_bb); +- tree result = PHI_RESULT (phi); +- +- /* Only relationals comparing arg[01] against zero are interesting. */ +- enum tree_code cond_code = gimple_cond_code (cond); +- if (cond_code != LT_EXPR && cond_code != GE_EXPR) +- return false; +- +- /* Make sure the conditional is x OP 0. */ +- tree clhs = gimple_cond_lhs (cond); +- if (TREE_CODE (clhs) != SSA_NAME +- || !INTEGRAL_TYPE_P (TREE_TYPE (clhs)) +- || TYPE_UNSIGNED (TREE_TYPE (clhs)) +- || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1)) +- || !integer_zerop (gimple_cond_rhs (cond))) +- return false; +- +- /* We need to know which is the true edge and which is the false +- edge so that we know if have xor or inverted xor. */ +- edge true_edge, false_edge; +- extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); +- +- /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we +- will need to invert the result. Similarly for LT_EXPR if +- the false edge goes to OTHER_BLOCK. */ +- edge e; +- if (cond_code == GE_EXPR) +- e = true_edge; +- else +- e = false_edge; +- +- bool invert = e->dest == middle_bb; +- +- result = duplicate_ssa_name (result, NULL); +- +- gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); +- +- int prec = TYPE_PRECISION (TREE_TYPE (clhs)); +- gimple *new_stmt +- = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs, +- build_int_cst (integer_type_node, prec - 1)); +- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); +- +- if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs))) +- { +- new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)), +- NOP_EXPR, gimple_assign_lhs (new_stmt)); +- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); +- } +- lhs = gimple_assign_lhs (new_stmt); +- +- if (invert) +- { +- new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)), +- BIT_NOT_EXPR, rhs); +- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); +- rhs = gimple_assign_lhs (new_stmt); +- } +- +- new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs); +- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); +- +- replace_phi_edge_with_variable (cond_bb, e1, phi, result); +- +- /* Note that we optimized this PHI. */ +- return true; +-} +- + /* Auxiliary functions to determine the set of memory accesses which + can't trap because they are preceded by accesses to the same memory + portion. We do that for MEM_REFs, so we only need to track +-- +2.27.0.windows.1 + diff --git a/0064-Backport-phiopt-Use-gphi-phi-instead-of-gimple-phi-s.patch b/0064-Backport-phiopt-Use-gphi-phi-instead-of-gimple-phi-s.patch new file mode 100644 index 0000000000000000000000000000000000000000..de2110998c2957109cba9fa15c2c0a9e0de3e58c --- /dev/null +++ b/0064-Backport-phiopt-Use-gphi-phi-instead-of-gimple-phi-s.patch @@ -0,0 +1,103 @@ +From 0d55d24aa4e47c40f74e0281d023089cfaafcf74 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Thu, 6 May 2021 14:05:06 +0200 +Subject: [PATCH 16/35] [Backport] phiopt: Use gphi *phi instead of gimple *phi + some more + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=cfd65e8d5299a7cf7d2ecd92b0e24ea4cfb697d9 + +Various functions in phiopt are also called with a gphi * but use +gimple * argument for it. + +2021-05-06 Jakub Jelinek + + * tree-ssa-phiopt.c (value_replacement, minmax_replacement, + abs_replacement, xor_replacement, + cond_removal_in_popcount_clz_ctz_pattern, + replace_phi_edge_with_variable): Change type of phi argument from + gimple * to gphi *. +--- + gcc/tree-ssa-phiopt.c | 22 ++++++++++------------ + 1 file changed, 10 insertions(+), 12 deletions(-) + +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 51a2d3684..045a7b1b8 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -59,21 +59,21 @@ static bool match_simplify_replacement (basic_block, basic_block, + static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree, + gimple *); + static int value_replacement (basic_block, basic_block, +- edge, edge, gimple *, tree, tree); ++ edge, edge, gphi *, tree, tree); + static bool minmax_replacement (basic_block, basic_block, +- edge, edge, gimple *, tree, tree); ++ edge, edge, gphi *, tree, tree); + static bool abs_replacement (basic_block, basic_block, +- edge, edge, gimple *, tree, tree); ++ edge, edge, gphi *, tree, tree); + static bool spaceship_replacement (basic_block, basic_block, + edge, edge, gphi *, tree, tree); + static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, +- edge, edge, gimple *, ++ edge, edge, gphi *, + tree, tree); + static bool cond_store_replacement (basic_block, basic_block, edge, edge, + hash_set *); + static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); + static hash_set * get_non_trapping (); +-static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree); ++static void replace_phi_edge_with_variable (basic_block, edge, gphi *, tree); + static void hoist_adjacent_loads (basic_block, basic_block, + basic_block, basic_block); + static bool do_phiopt_pattern (basic_block, basic_block, basic_block); +@@ -389,7 +389,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + + static void + replace_phi_edge_with_variable (basic_block cond_block, +- edge e, gimple *phi, tree new_tree) ++ edge e, gphi *phi, tree new_tree) + { + basic_block bb = gimple_bb (phi); + basic_block block_to_remove; +@@ -1129,8 +1129,7 @@ absorbing_element_p (tree_code code, tree arg, bool right, tree rval) + + static int + value_replacement (basic_block cond_bb, basic_block middle_bb, +- edge e0, edge e1, gimple *phi, +- tree arg0, tree arg1) ++ edge e0, edge e1, gphi *phi, tree arg0, tree arg1) + { + gimple_stmt_iterator gsi; + gimple *cond; +@@ -1438,8 +1437,7 @@ value_replacement (basic_block cond_bb, basic_block middle_bb, + + static bool + minmax_replacement (basic_block cond_bb, basic_block middle_bb, +- edge e0, edge e1, gimple *phi, +- tree arg0, tree arg1) ++ edge e0, edge e1, gphi *phi, tree arg0, tree arg1) + { + tree result, type, rhs; + gcond *cond; +@@ -2240,7 +2238,7 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb, + static bool + cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + basic_block middle_bb, +- edge e1, edge e2, gimple *phi, ++ edge e1, edge e2, gphi *phi, + tree arg0, tree arg1) + { + gimple *cond; +@@ -2398,7 +2396,7 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + static bool + abs_replacement (basic_block cond_bb, basic_block middle_bb, + edge e0 ATTRIBUTE_UNUSED, edge e1, +- gimple *phi, tree arg0, tree arg1) ++ gphi *phi, tree arg0, tree arg1) + { + tree result; + gassign *new_stmt; +-- +2.27.0.windows.1 + diff --git a/0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch b/0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch new file mode 100644 index 0000000000000000000000000000000000000000..db7b4b2ebbba5202bda6d4938e51aa507c321dad --- /dev/null +++ b/0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch @@ -0,0 +1,212 @@ +From 33dc778a34d7b93978efe922bb1b4583d8e6c4bb Mon Sep 17 00:00:00 2001 +From: Roger Sayle +Date: Mon, 2 Aug 2021 13:27:53 +0100 +Subject: [PATCH 17/35] [Backport] Optimize x ? bswap(x) : 0 in tree-ssa-phiopt + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f9fcf754825a1e01033336f84c18690aaa971a6f + +Many thanks again to Jakub Jelinek for a speedy fix for PR 101642. +Interestingly, that test case "bswap16(x) ? : x" also reveals a +missed optimization opportunity. The resulting "x ? bswap(x) : 0" +can be further simplified to just bswap(x). + +Conveniently, tree-ssa-phiopt.c already recognizes/optimizes the +related "x ? popcount(x) : 0", so this patch simply makes that +transformation make general, additionally handling bswap, parity, +ffs and clrsb. All of the required infrastructure is already +present thanks to Jakub previously adding support for clz/ctz. +To reflect this generalization, the name of the function is changed +from cond_removal_in_popcount_clz_ctz_pattern to the hopefully +equally descriptive cond_removal_in_builtin_zero_pattern. + +2021-08-02 Roger Sayle + +gcc/ChangeLog + * tree-ssa-phiopt.c (cond_removal_in_builtin_zero_pattern): + Renamed from cond_removal_in_popcount_clz_ctz_pattern. + Add support for BSWAP, FFS, PARITY and CLRSB builtins. + (tree_ssa_phiop_worker): Update call to function above. + +gcc/testsuite/ChangeLog + * gcc.dg/tree-ssa/phi-opt-25.c: New test case. +--- + gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c | 83 ++++++++++++++++++++++ + gcc/tree-ssa-phiopt.c | 37 +++++++--- + 2 files changed, 109 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c +new file mode 100644 +index 000000000..c52c92e1d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c +@@ -0,0 +1,83 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++ ++unsigned short test_bswap16(unsigned short x) ++{ ++ return x ? __builtin_bswap16(x) : 0; ++} ++ ++unsigned int test_bswap32(unsigned int x) ++{ ++ return x ? __builtin_bswap32(x) : 0; ++} ++ ++unsigned long long test_bswap64(unsigned long long x) ++{ ++ return x ? __builtin_bswap64(x) : 0; ++} ++ ++int test_clrsb(int x) ++{ ++ return x ? __builtin_clrsb(x) : (__SIZEOF_INT__*8-1); ++} ++ ++int test_clrsbl(long x) ++{ ++ return x ? __builtin_clrsbl(x) : (__SIZEOF_LONG__*8-1); ++} ++ ++int test_clrsbll(long long x) ++{ ++ return x ? __builtin_clrsbll(x) : (__SIZEOF_LONG_LONG__*8-1); ++} ++ ++#if 0 ++/* BUILT_IN_FFS is transformed by match.pd */ ++int test_ffs(unsigned int x) ++{ ++ return x ? __builtin_ffs(x) : 0; ++} ++ ++int test_ffsl(unsigned long x) ++{ ++ return x ? __builtin_ffsl(x) : 0; ++} ++ ++int test_ffsll(unsigned long long x) ++{ ++ return x ? __builtin_ffsll(x) : 0; ++} ++#endif ++ ++int test_parity(int x) ++{ ++ return x ? __builtin_parity(x) : 0; ++} ++ ++int test_parityl(long x) ++{ ++ return x ? __builtin_parityl(x) : 0; ++} ++ ++int test_parityll(long long x) ++{ ++ return x ? __builtin_parityll(x) : 0; ++} ++ ++int test_popcount(int x) ++{ ++ return x ? __builtin_popcount(x) : 0; ++} ++ ++int test_popcountl(long x) ++{ ++ return x ? __builtin_popcountl(x) : 0; ++} ++ ++int test_popcountll(long long x) ++{ ++ return x ? __builtin_popcountll(x) : 0; ++} ++ ++/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */ ++ +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 045a7b1b8..21ac08145 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -66,9 +66,9 @@ static bool abs_replacement (basic_block, basic_block, + edge, edge, gphi *, tree, tree); + static bool spaceship_replacement (basic_block, basic_block, + edge, edge, gphi *, tree, tree); +-static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, +- edge, edge, gphi *, +- tree, tree); ++static bool cond_removal_in_builtin_zero_pattern (basic_block, basic_block, ++ edge, edge, gphi *, ++ tree, tree); + static bool cond_store_replacement (basic_block, basic_block, edge, edge, + hash_set *); + static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); +@@ -355,9 +355,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p +- && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, +- e2, phi, arg0, +- arg1)) ++ && cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2, ++ phi, arg0, arg1)) + cfgchanged = true; + else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +@@ -2204,7 +2203,8 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + +-/* Convert ++/* Optimize x ? __builtin_fun (x) : C, where C is __builtin_fun (0). ++ Convert + + + if (b_4(D) != 0) +@@ -2236,10 +2236,10 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb, + instead of 0 above it uses the value from that macro. */ + + static bool +-cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, +- basic_block middle_bb, +- edge e1, edge e2, gphi *phi, +- tree arg0, tree arg1) ++cond_removal_in_builtin_zero_pattern (basic_block cond_bb, ++ basic_block middle_bb, ++ edge e1, edge e2, gphi *phi, ++ tree arg0, tree arg1) + { + gimple *cond; + gimple_stmt_iterator gsi, gsi_from; +@@ -2287,6 +2287,12 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + int val = 0; + switch (cfn) + { ++ case CFN_BUILT_IN_BSWAP16: ++ case CFN_BUILT_IN_BSWAP32: ++ case CFN_BUILT_IN_BSWAP64: ++ case CFN_BUILT_IN_BSWAP128: ++ CASE_CFN_FFS: ++ CASE_CFN_PARITY: + CASE_CFN_POPCOUNT: + break; + CASE_CFN_CLZ: +@@ -2315,6 +2321,15 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + } + } + return false; ++ case BUILT_IN_CLRSB: ++ val = TYPE_PRECISION (integer_type_node) - 1; ++ break; ++ case BUILT_IN_CLRSBL: ++ val = TYPE_PRECISION (long_integer_type_node) - 1; ++ break; ++ case BUILT_IN_CLRSBLL: ++ val = TYPE_PRECISION (long_long_integer_type_node) - 1; ++ break; + default: + return false; + } +-- +2.27.0.windows.1 + diff --git a/0066-Backport-tree-optimization-102880-make-PHI-OPT-recog.patch b/0066-Backport-tree-optimization-102880-make-PHI-OPT-recog.patch new file mode 100644 index 0000000000000000000000000000000000000000..df342347bc4ff38c60ec1bc5e31024717c54b910 --- /dev/null +++ b/0066-Backport-tree-optimization-102880-make-PHI-OPT-recog.patch @@ -0,0 +1,251 @@ +From 77398954ce517aa011b7a254c7aa2858521b2093 Mon Sep 17 00:00:00 2001 +From: Richard Biener +Date: Mon, 15 Nov 2021 15:19:36 +0100 +Subject: [PATCH 18/35] [Backport] tree-optimization/102880 - make PHI-OPT + recognize more CFGs + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f98f373dd822b35c52356b753d528924e9f89678 + +This allows extra edges into the middle BB for the PHI-OPT +transforms using replace_phi_edge_with_variable that do not +end up moving stmts from that middle BB. This avoids regressing +gcc.dg/tree-ssa/ssa-hoist-4.c with the actual fix for PR102880 +where CFG cleanup has the choice to remove two forwarders and +picks "the wrong" leading to + + if (a > b) / + /\ / + / + / | + # PHI + +rather than + + if (a > b) | + /\ | + \ | + / \ | + # PHI + +but it's relatively straight-forward to support extra edges +into the middle-BB in paths ending in replace_phi_edge_with_variable +and that do not require moving stmts. That's because we really +only want to remove the edge from the condition to the middle BB. +Of course actually doing that means updating dominators in non-trival +ways which is why I kept the original code for the single edge +case and simply defer to CFG cleanup by adjusting the condition for +the complicated case. + +The testcase needs to be a GIMPLE one since it's quite unreliable +to produce the desired CFG. + +2021-11-15 Richard Biener + + PR tree-optimization/102880 + * tree-ssa-phiopt.c (tree_ssa_phiopt_worker): Push + single_pred (bb1) condition to places that really need it. + (match_simplify_replacement): Likewise. + (value_replacement): Likewise. + (replace_phi_edge_with_variable): Deal with extra edges + into the middle BB. + + * gcc.dg/tree-ssa/phi-opt-26.c: New testcase. +--- + gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c | 31 +++++++++ + gcc/tree-ssa-phiopt.c | 73 +++++++++++++--------- + 2 files changed, 75 insertions(+), 29 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c +new file mode 100644 +index 000000000..21aa66e38 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fgimple -fdump-tree-phiopt1" } */ ++ ++int __GIMPLE (ssa,startwith("phiopt")) ++foo (int a, int b, int flag) ++{ ++ int res; ++ ++ __BB(2): ++ if (flag_2(D) != 0) ++ goto __BB6; ++ else ++ goto __BB4; ++ ++ __BB(4): ++ if (a_3(D) > b_4(D)) ++ goto __BB7; ++ else ++ goto __BB6; ++ ++ __BB(6): ++ goto __BB7; ++ ++ __BB(7): ++ res_1 = __PHI (__BB4: a_3(D), __BB6: b_4(D)); ++ return res_1; ++} ++ ++/* We should be able to detect MAX despite the extra edge into ++ the middle BB. */ ++/* { dg-final { scan-tree-dump "MAX" "phiopt1" } } */ +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 21ac08145..079d29e74 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -219,7 +219,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + + /* If either bb1's succ or bb2 or bb2's succ is non NULL. */ + if (EDGE_COUNT (bb1->succs) == 0 +- || bb2 == NULL + || EDGE_COUNT (bb2->succs) == 0) + continue; + +@@ -279,14 +278,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + || (e1->flags & EDGE_FALLTHRU) == 0) + continue; + +- /* Also make sure that bb1 only have one predecessor and that it +- is bb. */ +- if (!single_pred_p (bb1) +- || single_pred (bb1) != bb) +- continue; +- + if (do_store_elim) + { ++ /* Also make sure that bb1 only have one predecessor and that it ++ is bb. */ ++ if (!single_pred_p (bb1) ++ || single_pred (bb1) != bb) ++ continue; ++ + /* bb1 is the middle block, bb2 the join block, bb the split block, + e1 the fallthrough edge from bb1 to bb2. We can't do the + optimization if the join block has more than two predecessors. */ +@@ -331,10 +330,11 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + node. */ + gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); + +- gphi *newphi = factor_out_conditional_conversion (e1, e2, phi, +- arg0, arg1, +- cond_stmt); +- if (newphi != NULL) ++ gphi *newphi; ++ if (single_pred_p (bb1) ++ && (newphi = factor_out_conditional_conversion (e1, e2, phi, ++ arg0, arg1, ++ cond_stmt))) + { + phi = newphi; + /* factor_out_conditional_conversion may create a new PHI in +@@ -355,12 +355,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p ++ && single_pred_p (bb1) + && cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2, + phi, arg0, arg1)) + cfgchanged = true; + else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +- else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) ++ else if (single_pred_p (bb1) ++ && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + } + } +@@ -391,35 +393,41 @@ replace_phi_edge_with_variable (basic_block cond_block, + edge e, gphi *phi, tree new_tree) + { + basic_block bb = gimple_bb (phi); +- basic_block block_to_remove; + gimple_stmt_iterator gsi; + + /* Change the PHI argument to new. */ + SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree); + + /* Remove the empty basic block. */ ++ edge edge_to_remove; + if (EDGE_SUCC (cond_block, 0)->dest == bb) ++ edge_to_remove = EDGE_SUCC (cond_block, 1); ++ else ++ edge_to_remove = EDGE_SUCC (cond_block, 0); ++ if (EDGE_COUNT (edge_to_remove->dest->preds) == 1) + { +- EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU; +- EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); +- EDGE_SUCC (cond_block, 0)->probability = profile_probability::always (); +- +- block_to_remove = EDGE_SUCC (cond_block, 1)->dest; ++ e->flags |= EDGE_FALLTHRU; ++ e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); ++ e->probability = profile_probability::always (); ++ delete_basic_block (edge_to_remove->dest); ++ ++ /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ ++ gsi = gsi_last_bb (cond_block); ++ gsi_remove (&gsi, true); + } + else + { +- EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU; +- EDGE_SUCC (cond_block, 1)->flags +- &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); +- EDGE_SUCC (cond_block, 1)->probability = profile_probability::always (); +- +- block_to_remove = EDGE_SUCC (cond_block, 0)->dest; ++ /* If there are other edges into the middle block make ++ CFG cleanup deal with the edge removal to avoid ++ updating dominators here in a non-trivial way. */ ++ gcond *cond = as_a (last_stmt (cond_block)); ++ if (edge_to_remove->flags & EDGE_TRUE_VALUE) ++ gimple_cond_make_false (cond); ++ else ++ gimple_cond_make_true (cond); + } +- delete_basic_block (block_to_remove); + +- /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ +- gsi = gsi_last_bb (cond_block); +- gsi_remove (&gsi, true); ++ statistics_counter_event (cfun, "Replace PHI with variable", 1); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, +@@ -846,6 +854,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + allow it and move it once the transformation is done. */ + if (!empty_block_p (middle_bb)) + { ++ if (!single_pred_p (middle_bb)) ++ return false; ++ + stmt_to_move = last_and_only_stmt (middle_bb); + if (!stmt_to_move) + return false; +@@ -1225,6 +1236,11 @@ value_replacement (basic_block cond_bb, basic_block middle_bb, + } + else + { ++ if (!single_pred_p (middle_bb)) ++ return 0; ++ statistics_counter_event (cfun, "Replace PHI with " ++ "variable/value_replacement", 1); ++ + /* Replace the PHI arguments with arg. */ + SET_PHI_ARG_DEF (phi, e0->dest_idx, arg); + SET_PHI_ARG_DEF (phi, e1->dest_idx, arg); +@@ -1239,7 +1255,6 @@ value_replacement (basic_block cond_bb, basic_block middle_bb, + } + return 1; + } +- + } + + /* Now optimize (x != 0) ? x + y : y to just x + y. */ +-- +2.27.0.windows.1 + diff --git a/0067-Backport-tree-Add-vector_element_bits-_tree-PR94980-.patch b/0067-Backport-tree-Add-vector_element_bits-_tree-PR94980-.patch new file mode 100644 index 0000000000000000000000000000000000000000..06217a8706716d957cbd639b95dbb9b2a52057ac --- /dev/null +++ b/0067-Backport-tree-Add-vector_element_bits-_tree-PR94980-.patch @@ -0,0 +1,250 @@ +From a2f5e6f38fe7b5b32a252643b00dd2d7ab0e3fac Mon Sep 17 00:00:00 2001 +From: Richard Sandiford +Date: Tue, 12 May 2020 09:01:10 +0100 +Subject: [PATCH 19/35] [Backport] tree: Add vector_element_bits(_tree) + [PR94980 1/3] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=d17a896da1e898928d337596d029f0ece0039d55 + +A lot of code that wants to know the number of bits in a vector +element gets that information from the element's TYPE_SIZE, +which is always equal to TYPE_SIZE_UNIT * BITS_PER_UNIT. +This doesn't work for SVE and AVX512-style packed boolean vectors, +where several elements can occupy a single byte. + +This patch introduces a new pair of helpers for getting the true +(possibly sub-byte) size. I made a token attempt to convert obvious +element size calculations, but I'm sure I missed some. + +2020-05-12 Richard Sandiford + +gcc/ + PR tree-optimization/94980 + * tree.h (vector_element_bits, vector_element_bits_tree): Declare. + * tree.c (vector_element_bits, vector_element_bits_tree): New. + * match.pd: Use the new functions instead of determining the + vector element size directly from TYPE_SIZE(_UNIT). + * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise. + * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Likewise. + * tree-vect-stmts.c (vect_is_simple_cond): Likewise. + * tree-vect-generic.c (expand_vector_piecewise): Likewise. + (expand_vector_conversion): Likewise. + (expand_vector_addition): Likewise for a TYPE_SIZE_UNIT used as + a divisor. Convert the dividend to bits to compensate. + * tree-vect-loop.c (vectorizable_live_operation): Call + vector_element_bits instead of open-coding it. +--- + gcc/ChangeLog | 17 +++++++++++++++++ + gcc/match.pd | 2 +- + gcc/tree-vect-data-refs.c | 2 +- + gcc/tree-vect-generic.c | 19 +++++++------------ + gcc/tree-vect-loop.c | 4 +--- + gcc/tree-vect-patterns.c | 3 +-- + gcc/tree-vect-stmts.c | 3 +-- + gcc/tree.c | 24 ++++++++++++++++++++++++ + gcc/tree.h | 2 ++ + 9 files changed, 55 insertions(+), 21 deletions(-) + +diff --git a/gcc/ChangeLog b/gcc/ChangeLog +index 3b1384e70..07aea9b86 100644 +--- a/gcc/ChangeLog ++++ b/gcc/ChangeLog +@@ -1,3 +1,20 @@ ++2020-05-12 Richard Sandiford ++ ++ PR tree-optimization/94980 ++ * tree.h (vector_element_bits, vector_element_bits_tree): Declare. ++ * tree.c (vector_element_bits, vector_element_bits_tree): New. ++ * match.pd: Use the new functions instead of determining the ++ vector element size directly from TYPE_SIZE(_UNIT). ++ * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise. ++ * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Likewise. ++ * tree-vect-stmts.c (vect_is_simple_cond): Likewise. ++ * tree-vect-generic.c (expand_vector_piecewise): Likewise. ++ (expand_vector_conversion): Likewise. ++ (expand_vector_addition): Likewise for a TYPE_SIZE_UNIT used as ++ a divisor. Convert the dividend to bits to compensate. ++ * tree-vect-loop.c (vectorizable_live_operation): Call ++ vector_element_bits instead of open-coding it. ++ + 2021-04-08 Release Manager + + * GCC 10.3.0 released. +diff --git a/gcc/match.pd b/gcc/match.pd +index 5899eea95..79a0228d2 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -6236,7 +6236,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + } + (if (ins) + (bit_insert { op0; } { ins; } +- { bitsize_int (at * tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)))); }) ++ { bitsize_int (at * vector_element_bits (type)); }) + (if (changed) + (vec_perm { op0; } { op1; } { op2; })))))))))) + +diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c +index d78b06455..e4466a4f3 100644 +--- a/gcc/tree-vect-data-refs.c ++++ b/gcc/tree-vect-data-refs.c +@@ -3709,7 +3709,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, + tree *offset_vectype_out) + { + unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); +- unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))); ++ unsigned int element_bits = vector_element_bits (vectype); + if (element_bits != memory_bits) + /* For now the vector elements must be the same width as the + memory elements. */ +diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c +index c10492034..37c3956a4 100644 +--- a/gcc/tree-vect-generic.c ++++ b/gcc/tree-vect-generic.c +@@ -276,8 +276,7 @@ expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, + tree part_width = TYPE_SIZE (inner_type); + tree index = bitsize_int (0); + int nunits = nunits_for_known_piecewise_op (type); +- int delta = tree_to_uhwi (part_width) +- / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type))); ++ int delta = tree_to_uhwi (part_width) / vector_element_bits (type); + int i; + location_t loc = gimple_location (gsi_stmt (*gsi)); + +@@ -357,8 +356,7 @@ expand_vector_addition (gimple_stmt_iterator *gsi, + elem_op_func f, elem_op_func f_parallel, + tree type, tree a, tree b, enum tree_code code) + { +- int parts_per_word = UNITS_PER_WORD +- / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); ++ int parts_per_word = BITS_PER_WORD / vector_element_bits (type); + + if (INTEGRAL_TYPE_P (TREE_TYPE (type)) + && parts_per_word >= 4 +@@ -1733,19 +1731,17 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) + optab optab1 = unknown_optab; + + gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type)); +- gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (ret_type)))); +- gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (arg_type)))); + if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type)) + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type))) + code = FIX_TRUNC_EXPR; + else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type)) + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type))) + code = FLOAT_EXPR; +- if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type))) +- < tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type)))) ++ unsigned int ret_elt_bits = vector_element_bits (ret_type); ++ unsigned int arg_elt_bits = vector_element_bits (arg_type); ++ if (ret_elt_bits < arg_elt_bits) + modifier = NARROW; +- else if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type))) +- > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type)))) ++ else if (ret_elt_bits > arg_elt_bits) + modifier = WIDEN; + + if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)) +@@ -1908,8 +1904,7 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) + tree part_width = TYPE_SIZE (compute_type); + tree index = bitsize_int (0); + int nunits = nunits_for_known_piecewise_op (arg_type); +- int delta = tree_to_uhwi (part_width) +- / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))); ++ int delta = tree_to_uhwi (part_width) / arg_elt_bits; + int i; + location_t loc = gimple_location (gsi_stmt (*gsi)); + +diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c +index 899b56087..7990e31de 100644 +--- a/gcc/tree-vect-loop.c ++++ b/gcc/tree-vect-loop.c +@@ -8059,9 +8059,7 @@ vectorizable_live_operation (stmt_vec_info stmt_info, + : gimple_get_lhs (stmt); + lhs_type = TREE_TYPE (lhs); + +- bitsize = (VECTOR_BOOLEAN_TYPE_P (vectype) +- ? bitsize_int (TYPE_PRECISION (TREE_TYPE (vectype))) +- : TYPE_SIZE (TREE_TYPE (vectype))); ++ bitsize = vector_element_bits_tree (vectype); + vec_bitsize = TYPE_SIZE (vectype); + + /* Get the vectorized lhs of STMT and the lane to use (counted in bits). */ +diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c +index 84d7ddb17..b076740ef 100644 +--- a/gcc/tree-vect-patterns.c ++++ b/gcc/tree-vect-patterns.c +@@ -4406,8 +4406,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out) + || dt == vect_constant_def)) + { + tree wide_scalar_type = build_nonstandard_integer_type +- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))), +- TYPE_UNSIGNED (rhs1_type)); ++ (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type)); + tree vectype3 = get_vectype_for_scalar_type (vinfo, + wide_scalar_type); + if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1))) +diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +index 4636b7ba2..0bdf9a547 100644 +--- a/gcc/tree-vect-stmts.c ++++ b/gcc/tree-vect-stmts.c +@@ -10717,8 +10717,7 @@ vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node, + && tree_int_cst_lt (TYPE_SIZE (scalar_type), + TYPE_SIZE (TREE_TYPE (vectype)))) + scalar_type = build_nonstandard_integer_type +- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))), +- TYPE_UNSIGNED (scalar_type)); ++ (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type)); + *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type, + slp_node); + } +diff --git a/gcc/tree.c b/gcc/tree.c +index 3e6647ae0..9a0cedf10 100644 +--- a/gcc/tree.c ++++ b/gcc/tree.c +@@ -13892,6 +13892,30 @@ vector_type_mode (const_tree t) + return mode; + } + ++/* Return the size in bits of each element of vector type TYPE. */ ++ ++unsigned int ++vector_element_bits (const_tree type) ++{ ++ gcc_checking_assert (VECTOR_TYPE_P (type)); ++ if (VECTOR_BOOLEAN_TYPE_P (type)) ++ return vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (type)), ++ TYPE_VECTOR_SUBPARTS (type)); ++ return tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type))); ++} ++ ++/* Calculate the size in bits of each element of vector type TYPE ++ and return the result as a tree of type bitsizetype. */ ++ ++tree ++vector_element_bits_tree (const_tree type) ++{ ++ gcc_checking_assert (VECTOR_TYPE_P (type)); ++ if (VECTOR_BOOLEAN_TYPE_P (type)) ++ return bitsize_int (vector_element_bits (type)); ++ return TYPE_SIZE (TREE_TYPE (type)); ++} ++ + /* Verify that basic properties of T match TV and thus T can be a variant of + TV. TV should be the more specified variant (i.e. the main variant). */ + +diff --git a/gcc/tree.h b/gcc/tree.h +index bddc6e528..c66207fa0 100644 +--- a/gcc/tree.h ++++ b/gcc/tree.h +@@ -1996,6 +1996,8 @@ class auto_suppress_location_wrappers + + extern machine_mode element_mode (const_tree); + extern machine_mode vector_type_mode (const_tree); ++extern unsigned int vector_element_bits (const_tree); ++extern tree vector_element_bits_tree (const_tree); + + /* The "canonical" type for this type node, which is used by frontends to + compare the type for equality with another type. If two types are +-- +2.27.0.windows.1 + diff --git a/0068-Backport-Lower-VEC_COND_EXPR-into-internal-functions.patch b/0068-Backport-Lower-VEC_COND_EXPR-into-internal-functions.patch new file mode 100644 index 0000000000000000000000000000000000000000..6bbb8adbbdc5ddf0c42e665ed134a3e8cb76908c --- /dev/null +++ b/0068-Backport-Lower-VEC_COND_EXPR-into-internal-functions.patch @@ -0,0 +1,1063 @@ +From 3a45b2fc131e4639b05f62d6064bd964d129c19b Mon Sep 17 00:00:00 2001 +From: Martin Liska +Date: Mon, 9 Mar 2020 13:23:03 +0100 +Subject: [PATCH 20/35] [Backport] Lower VEC_COND_EXPR into internal functions. + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=502d63b6d6141597bb18fd23c87736a1b384cf8f + +gcc/ChangeLog: + + * Makefile.in: Add new file. + * expr.c (expand_expr_real_2): Add gcc_unreachable as we should + not meet this condition. + (do_store_flag): Likewise. + * gimplify.c (gimplify_expr): Gimplify first argument of + VEC_COND_EXPR to be a SSA name. + * internal-fn.c (vec_cond_mask_direct): New. + (vec_cond_direct): Likewise. + (vec_condu_direct): Likewise. + (vec_condeq_direct): Likewise. + (expand_vect_cond_optab_fn): New. + (expand_vec_cond_optab_fn): Likewise. + (expand_vec_condu_optab_fn): Likewise. + (expand_vec_condeq_optab_fn): Likewise. + (expand_vect_cond_mask_optab_fn): Likewise. + (expand_vec_cond_mask_optab_fn): Likewise. + (direct_vec_cond_mask_optab_supported_p): Likewise. + (direct_vec_cond_optab_supported_p): Likewise. + (direct_vec_condu_optab_supported_p): Likewise. + (direct_vec_condeq_optab_supported_p): Likewise. + * internal-fn.def (VCOND): New OPTAB. + (VCONDU): Likewise. + (VCONDEQ): Likewise. + (VCOND_MASK): Likewise. + * optabs.c (get_rtx_code): Make it global. + (expand_vec_cond_mask_expr): Removed. + (expand_vec_cond_expr): Removed. + * optabs.h (expand_vec_cond_expr): Likewise. + (vector_compare_rtx): Make it global. + * passes.def: Add new pass_gimple_isel pass. + * tree-cfg.c (verify_gimple_assign_ternary): Add check + for VEC_COND_EXPR about first argument. + * tree-pass.h (make_pass_gimple_isel): New. + * tree-ssa-forwprop.c (pass_forwprop::execute): Prevent + propagation of the first argument of a VEC_COND_EXPR. + * tree-ssa-reassoc.c (ovce_extract_ops): Support SSA_NAME as + first argument of a VEC_COND_EXPR. + (optimize_vec_cond_expr): Likewise. + * tree-vect-generic.c (expand_vector_divmod): Make SSA_NAME + for a first argument of created VEC_COND_EXPR. + (expand_vector_condition): Fix coding style. + * tree-vect-stmts.c (vectorizable_condition): Gimplify + first argument. + * gimple-isel.cc: New file. + +gcc/testsuite/ChangeLog: + + * g++.dg/vect/vec-cond-expr-eh.C: New test. +--- + gcc/Makefile.in | 2 + + gcc/expr.c | 25 +- + gcc/gimple-isel.cc | 244 +++++++++++++++++++ + gcc/gimplify.c | 15 +- + gcc/internal-fn.c | 89 +++++++ + gcc/internal-fn.def | 5 + + gcc/optabs.c | 124 +--------- + gcc/optabs.h | 7 +- + gcc/passes.def | 1 + + gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C | 17 ++ + gcc/tree-cfg.c | 2 +- + gcc/tree-pass.h | 1 + + gcc/tree-ssa-forwprop.c | 3 +- + gcc/tree-ssa-reassoc.c | 64 +++-- + gcc/tree-vect-generic.c | 45 ++-- + gcc/tree-vect-stmts.c | 8 +- + 16 files changed, 441 insertions(+), 211 deletions(-) + create mode 100644 gcc/gimple-isel.cc + create mode 100644 gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 2b2bf474a..3f06b8907 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1623,6 +1623,7 @@ OBJS = \ + tree-streamer-out.o \ + tree-tailcall.o \ + tree-vect-generic.o \ ++ gimple-isel.o \ + tree-vect-patterns.o \ + tree-vect-data-refs.o \ + tree-vect-stmts.o \ +@@ -2591,6 +2592,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ + $(srcdir)/dwarf2cfi.c \ + $(srcdir)/dwarf2out.c \ + $(srcdir)/tree-vect-generic.c \ ++ $(srcdir)/gimple-isel.cc \ + $(srcdir)/dojump.c $(srcdir)/emit-rtl.h \ + $(srcdir)/emit-rtl.c $(srcdir)/except.h $(srcdir)/explow.c $(srcdir)/expr.c \ + $(srcdir)/expr.h \ +diff --git a/gcc/expr.c b/gcc/expr.c +index d66fdd4e9..c468b5eb9 100644 +--- a/gcc/expr.c ++++ b/gcc/expr.c +@@ -9286,17 +9286,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, + if (temp != 0) + return temp; + +- /* For vector MIN , expand it a VEC_COND_EXPR +- and similarly for MAX . */ + if (VECTOR_TYPE_P (type)) +- { +- tree t0 = make_tree (type, op0); +- tree t1 = make_tree (type, op1); +- tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR, +- type, t0, t1); +- return expand_vec_cond_expr (type, comparison, t0, t1, +- original_target); +- } ++ gcc_unreachable (); + + /* At this point, a MEM target is no longer useful; we will get better + code without it. */ +@@ -9885,10 +9876,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, + return temp; + } + +- case VEC_COND_EXPR: +- target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target); +- return target; +- + case VEC_DUPLICATE_EXPR: + op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier); + target = expand_vector_broadcast (mode, op0); +@@ -12222,8 +12209,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode) + STRIP_NOPS (arg1); + + /* For vector typed comparisons emit code to generate the desired +- all-ones or all-zeros mask. Conveniently use the VEC_COND_EXPR +- expander for this. */ ++ all-ones or all-zeros mask. */ + if (TREE_CODE (ops->type) == VECTOR_TYPE) + { + tree ifexp = build2 (ops->code, ops->type, arg0, arg1); +@@ -12231,12 +12217,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode) + && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code)) + return expand_vec_cmp_expr (ops->type, ifexp, target); + else +- { +- tree if_true = constant_boolean_node (true, ops->type); +- tree if_false = constant_boolean_node (false, ops->type); +- return expand_vec_cond_expr (ops->type, ifexp, if_true, +- if_false, target); +- } ++ gcc_unreachable (); + } + + /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial +diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc +new file mode 100644 +index 000000000..97f920805 +--- /dev/null ++++ b/gcc/gimple-isel.cc +@@ -0,0 +1,244 @@ ++/* Schedule GIMPLE vector statements. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "rtl.h" ++#include "tree.h" ++#include "gimple.h" ++#include "tree-pass.h" ++#include "ssa.h" ++#include "expmed.h" ++#include "optabs-tree.h" ++#include "tree-eh.h" ++#include "gimple-iterator.h" ++#include "gimplify-me.h" ++#include "gimplify.h" ++#include "tree-cfg.h" ++ ++/* Expand all VEC_COND_EXPR gimple assignments into calls to internal ++ function based on type of selected expansion. */ ++ ++static gimple * ++gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, ++ hash_map *vec_cond_ssa_name_uses) ++{ ++ tree lhs, op0a = NULL_TREE, op0b = NULL_TREE; ++ enum tree_code code; ++ enum tree_code tcode; ++ machine_mode cmp_op_mode; ++ bool unsignedp; ++ enum insn_code icode; ++ imm_use_iterator imm_iter; ++ ++ /* Only consider code == GIMPLE_ASSIGN. */ ++ gassign *stmt = dyn_cast (gsi_stmt (*gsi)); ++ if (!stmt) ++ return NULL; ++ ++ code = gimple_assign_rhs_code (stmt); ++ if (code != VEC_COND_EXPR) ++ return NULL; ++ ++ tree op0 = gimple_assign_rhs1 (stmt); ++ tree op1 = gimple_assign_rhs2 (stmt); ++ tree op2 = gimple_assign_rhs3 (stmt); ++ lhs = gimple_assign_lhs (stmt); ++ machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); ++ ++ gcc_assert (!COMPARISON_CLASS_P (op0)); ++ if (TREE_CODE (op0) == SSA_NAME) ++ { ++ unsigned int used_vec_cond_exprs = 0; ++ unsigned int *slot = vec_cond_ssa_name_uses->get (op0); ++ if (slot) ++ used_vec_cond_exprs = *slot; ++ else ++ { ++ gimple *use_stmt; ++ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0) ++ { ++ gassign *assign = dyn_cast (use_stmt); ++ if (assign != NULL ++ && gimple_assign_rhs_code (assign) == VEC_COND_EXPR ++ && gimple_assign_rhs1 (assign) == op0) ++ used_vec_cond_exprs++; ++ } ++ vec_cond_ssa_name_uses->put (op0, used_vec_cond_exprs); ++ } ++ ++ gassign *def_stmt = dyn_cast (SSA_NAME_DEF_STMT (op0)); ++ if (def_stmt) ++ { ++ tcode = gimple_assign_rhs_code (def_stmt); ++ op0a = gimple_assign_rhs1 (def_stmt); ++ op0b = gimple_assign_rhs2 (def_stmt); ++ ++ tree op0a_type = TREE_TYPE (op0a); ++ if (used_vec_cond_exprs >= 2 ++ && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type)) ++ != CODE_FOR_nothing) ++ && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode)) ++ { ++ /* Keep the SSA name and use vcond_mask. */ ++ tcode = TREE_CODE (op0); ++ } ++ } ++ else ++ tcode = TREE_CODE (op0); ++ } ++ else ++ tcode = TREE_CODE (op0); ++ ++ if (TREE_CODE_CLASS (tcode) != tcc_comparison) ++ { ++ gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0))); ++ if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0))) ++ != CODE_FOR_nothing) ++ return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2); ++ /* Fake op0 < 0. */ ++ else ++ { ++ gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0))) ++ == MODE_VECTOR_INT); ++ op0a = op0; ++ op0b = build_zero_cst (TREE_TYPE (op0)); ++ tcode = LT_EXPR; ++ } ++ } ++ cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a)); ++ unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a)); ++ ++ ++ gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode)) ++ && known_eq (GET_MODE_NUNITS (mode), ++ GET_MODE_NUNITS (cmp_op_mode))); ++ ++ icode = get_vcond_icode (mode, cmp_op_mode, unsignedp); ++ if (icode == CODE_FOR_nothing) ++ { ++ if (tcode == LT_EXPR ++ && op0a == op0 ++ && TREE_CODE (op0) == VECTOR_CST) ++ { ++ /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR ++ into a constant when only get_vcond_eq_icode is supported. ++ Verify < 0 and != 0 behave the same and change it to NE_EXPR. */ ++ unsigned HOST_WIDE_INT nelts; ++ if (!VECTOR_CST_NELTS (op0).is_constant (&nelts)) ++ { ++ if (VECTOR_CST_STEPPED_P (op0)) ++ gcc_unreachable (); ++ nelts = vector_cst_encoded_nelts (op0); ++ } ++ for (unsigned int i = 0; i < nelts; ++i) ++ if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1) ++ gcc_unreachable (); ++ tcode = NE_EXPR; ++ } ++ if (tcode == EQ_EXPR || tcode == NE_EXPR) ++ { ++ tree tcode_tree = build_int_cst (integer_type_node, tcode); ++ return gimple_build_call_internal (IFN_VCONDEQ, 5, op0a, op0b, op1, ++ op2, tcode_tree); ++ } ++ } ++ ++ gcc_assert (icode != CODE_FOR_nothing); ++ tree tcode_tree = build_int_cst (integer_type_node, tcode); ++ return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND, ++ 5, op0a, op0b, op1, op2, tcode_tree); ++} ++ ++ ++ ++/* Iterate all gimple statements and try to expand ++ VEC_COND_EXPR assignments. */ ++ ++static unsigned int ++gimple_expand_vec_cond_exprs (void) ++{ ++ gimple_stmt_iterator gsi; ++ basic_block bb; ++ bool cfg_changed = false; ++ hash_map vec_cond_ssa_name_uses; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple *g = gimple_expand_vec_cond_expr (&gsi, ++ &vec_cond_ssa_name_uses); ++ if (g != NULL) ++ { ++ tree lhs = gimple_assign_lhs (gsi_stmt (gsi)); ++ gimple_set_lhs (g, lhs); ++ gsi_replace (&gsi, g, false); ++ } ++ } ++ } ++ ++ return cfg_changed ? TODO_cleanup_cfg : 0; ++} ++ ++namespace { ++ ++const pass_data pass_data_gimple_isel = ++{ ++ GIMPLE_PASS, /* type */ ++ "isel", /* name */ ++ OPTGROUP_VEC, /* optinfo_flags */ ++ TV_NONE, /* tv_id */ ++ PROP_cfg, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_update_ssa, /* todo_flags_finish */ ++}; ++ ++class pass_gimple_isel : public gimple_opt_pass ++{ ++public: ++ pass_gimple_isel (gcc::context *ctxt) ++ : gimple_opt_pass (pass_data_gimple_isel, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *) ++ { ++ return true; ++ } ++ ++ virtual unsigned int execute (function *) ++ { ++ return gimple_expand_vec_cond_exprs (); ++ } ++ ++}; // class pass_gimple_isel ++ ++} // anon namespace ++ ++gimple_opt_pass * ++make_pass_gimple_isel (gcc::context *ctxt) ++{ ++ return new pass_gimple_isel (ctxt); ++} ++ +diff --git a/gcc/gimplify.c b/gcc/gimplify.c +index 89a4ae087..16b2f4328 100644 +--- a/gcc/gimplify.c ++++ b/gcc/gimplify.c +@@ -14272,20 +14272,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, + } + + case VEC_COND_EXPR: +- { +- enum gimplify_status r0, r1, r2; +- +- r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p, +- post_p, is_gimple_condexpr, fb_rvalue); +- r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p, +- post_p, is_gimple_val, fb_rvalue); +- r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p, +- post_p, is_gimple_val, fb_rvalue); +- +- ret = MIN (MIN (r0, r1), r2); +- recalculate_side_effects (*expr_p); +- } +- break; ++ goto expr_3; + + case VEC_PERM_EXPR: + /* Classified as tcc_expression. */ +diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c +index 5e9aa6072..644f234e0 100644 +--- a/gcc/internal-fn.c ++++ b/gcc/internal-fn.c +@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see + #include "gimple-ssa.h" + #include "tree-phinodes.h" + #include "ssa-iterators.h" ++#include "explow.h" + + /* The names of each internal function, indexed by function number. */ + const char *const internal_fn_name_array[] = { +@@ -107,6 +108,10 @@ init_internal_fns () + #define mask_store_direct { 3, 2, false } + #define store_lanes_direct { 0, 0, false } + #define mask_store_lanes_direct { 0, 0, false } ++#define vec_cond_mask_direct { 0, 0, false } ++#define vec_cond_direct { 0, 0, false } ++#define vec_condu_direct { 0, 0, false } ++#define vec_condeq_direct { 0, 0, false } + #define scatter_store_direct { 3, 1, false } + #define unary_direct { 0, 0, true } + #define binary_direct { 0, 0, true } +@@ -2548,6 +2553,86 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab) + + #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn + ++/* Expand VCOND, VCONDU and VCONDEQ optab internal functions. ++ The expansion of STMT happens based on OPTAB table associated. */ ++ ++static void ++expand_vect_cond_optab_fn (internal_fn, gcall *stmt, convert_optab optab) ++{ ++ class expand_operand ops[6]; ++ insn_code icode; ++ tree lhs = gimple_call_lhs (stmt); ++ tree op0a = gimple_call_arg (stmt, 0); ++ tree op0b = gimple_call_arg (stmt, 1); ++ tree op1 = gimple_call_arg (stmt, 2); ++ tree op2 = gimple_call_arg (stmt, 3); ++ enum tree_code tcode = (tree_code) int_cst_value (gimple_call_arg (stmt, 4)); ++ ++ tree vec_cond_type = TREE_TYPE (lhs); ++ tree op_mode = TREE_TYPE (op0a); ++ bool unsignedp = TYPE_UNSIGNED (op_mode); ++ ++ machine_mode mode = TYPE_MODE (vec_cond_type); ++ machine_mode cmp_op_mode = TYPE_MODE (op_mode); ++ ++ icode = convert_optab_handler (optab, mode, cmp_op_mode); ++ rtx comparison ++ = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, icode, 4); ++ rtx rtx_op1 = expand_normal (op1); ++ rtx rtx_op2 = expand_normal (op2); ++ ++ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); ++ create_output_operand (&ops[0], target, mode); ++ create_input_operand (&ops[1], rtx_op1, mode); ++ create_input_operand (&ops[2], rtx_op2, mode); ++ create_fixed_operand (&ops[3], comparison); ++ create_fixed_operand (&ops[4], XEXP (comparison, 0)); ++ create_fixed_operand (&ops[5], XEXP (comparison, 1)); ++ expand_insn (icode, 6, ops); ++} ++ ++#define expand_vec_cond_optab_fn expand_vect_cond_optab_fn ++#define expand_vec_condu_optab_fn expand_vect_cond_optab_fn ++#define expand_vec_condeq_optab_fn expand_vect_cond_optab_fn ++ ++/* Expand VCOND_MASK optab internal function. ++ The expansion of STMT happens based on OPTAB table associated. */ ++ ++static void ++expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab) ++{ ++ class expand_operand ops[4]; ++ ++ tree lhs = gimple_call_lhs (stmt); ++ tree op0 = gimple_call_arg (stmt, 0); ++ tree op1 = gimple_call_arg (stmt, 1); ++ tree op2 = gimple_call_arg (stmt, 2); ++ tree vec_cond_type = TREE_TYPE (lhs); ++ ++ machine_mode mode = TYPE_MODE (vec_cond_type); ++ machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0)); ++ enum insn_code icode = convert_optab_handler (optab, mode, mask_mode); ++ rtx mask, rtx_op1, rtx_op2; ++ ++ gcc_assert (icode != CODE_FOR_nothing); ++ ++ mask = expand_normal (op0); ++ rtx_op1 = expand_normal (op1); ++ rtx_op2 = expand_normal (op2); ++ ++ mask = force_reg (mask_mode, mask); ++ rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1); ++ ++ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); ++ create_output_operand (&ops[0], target, mode); ++ create_input_operand (&ops[1], rtx_op1, mode); ++ create_input_operand (&ops[2], rtx_op2, mode); ++ create_input_operand (&ops[3], mask, mask_mode); ++ expand_insn (icode, 4, ops); ++} ++ ++#define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn ++ + static void + expand_ABNORMAL_DISPATCHER (internal_fn, gcall *) + { +@@ -3131,6 +3216,10 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, + #define direct_mask_store_optab_supported_p direct_optab_supported_p + #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p + #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p ++#define direct_vec_cond_mask_optab_supported_p multi_vector_optab_supported_p ++#define direct_vec_cond_optab_supported_p multi_vector_optab_supported_p ++#define direct_vec_condu_optab_supported_p multi_vector_optab_supported_p ++#define direct_vec_condeq_optab_supported_p multi_vector_optab_supported_p + #define direct_scatter_store_optab_supported_p convert_optab_supported_p + #define direct_while_optab_supported_p convert_optab_supported_p + #define direct_fold_extract_optab_supported_p direct_optab_supported_p +diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def +index 1d190d492..0c6fc3711 100644 +--- a/gcc/internal-fn.def ++++ b/gcc/internal-fn.def +@@ -136,6 +136,11 @@ DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes) + DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0, + vec_mask_store_lanes, mask_store_lanes) + ++DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond) ++DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu) ++DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq) ++DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask) ++ + DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) + DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW, + check_raw_ptrs, check_ptrs) +diff --git a/gcc/optabs.c b/gcc/optabs.c +index c3751fdf7..64a1a1768 100644 +--- a/gcc/optabs.c ++++ b/gcc/optabs.c +@@ -5454,7 +5454,7 @@ get_rtx_code (enum tree_code tcode, bool unsignedp) + first comparison operand for insn ICODE. Do not generate the + compare instruction itself. */ + +-static rtx ++rtx + vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode, + tree t_op0, tree t_op1, bool unsignedp, + enum insn_code icode, unsigned int opno) +@@ -5821,128 +5821,6 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) + return tmp; + } + +-/* Generate insns for a VEC_COND_EXPR with mask, given its TYPE and its +- three operands. */ +- +-rtx +-expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2, +- rtx target) +-{ +- class expand_operand ops[4]; +- machine_mode mode = TYPE_MODE (vec_cond_type); +- machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0)); +- enum insn_code icode = get_vcond_mask_icode (mode, mask_mode); +- rtx mask, rtx_op1, rtx_op2; +- +- if (icode == CODE_FOR_nothing) +- return 0; +- +- mask = expand_normal (op0); +- rtx_op1 = expand_normal (op1); +- rtx_op2 = expand_normal (op2); +- +- mask = force_reg (mask_mode, mask); +- rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1); +- +- create_output_operand (&ops[0], target, mode); +- create_input_operand (&ops[1], rtx_op1, mode); +- create_input_operand (&ops[2], rtx_op2, mode); +- create_input_operand (&ops[3], mask, mask_mode); +- expand_insn (icode, 4, ops); +- +- return ops[0].value; +-} +- +-/* Generate insns for a VEC_COND_EXPR, given its TYPE and its +- three operands. */ +- +-rtx +-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2, +- rtx target) +-{ +- class expand_operand ops[6]; +- enum insn_code icode; +- rtx comparison, rtx_op1, rtx_op2; +- machine_mode mode = TYPE_MODE (vec_cond_type); +- machine_mode cmp_op_mode; +- bool unsignedp; +- tree op0a, op0b; +- enum tree_code tcode; +- +- if (COMPARISON_CLASS_P (op0)) +- { +- op0a = TREE_OPERAND (op0, 0); +- op0b = TREE_OPERAND (op0, 1); +- tcode = TREE_CODE (op0); +- } +- else +- { +- gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0))); +- if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0))) +- != CODE_FOR_nothing) +- return expand_vec_cond_mask_expr (vec_cond_type, op0, op1, +- op2, target); +- /* Fake op0 < 0. */ +- else +- { +- gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0))) +- == MODE_VECTOR_INT); +- op0a = op0; +- op0b = build_zero_cst (TREE_TYPE (op0)); +- tcode = LT_EXPR; +- } +- } +- cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a)); +- unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a)); +- +- +- gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode)) +- && known_eq (GET_MODE_NUNITS (mode), +- GET_MODE_NUNITS (cmp_op_mode))); +- +- icode = get_vcond_icode (mode, cmp_op_mode, unsignedp); +- if (icode == CODE_FOR_nothing) +- { +- if (tcode == LT_EXPR +- && op0a == op0 +- && TREE_CODE (op0) == VECTOR_CST) +- { +- /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR +- into a constant when only get_vcond_eq_icode is supported. +- Verify < 0 and != 0 behave the same and change it to NE_EXPR. */ +- unsigned HOST_WIDE_INT nelts; +- if (!VECTOR_CST_NELTS (op0).is_constant (&nelts)) +- { +- if (VECTOR_CST_STEPPED_P (op0)) +- return 0; +- nelts = vector_cst_encoded_nelts (op0); +- } +- for (unsigned int i = 0; i < nelts; ++i) +- if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1) +- return 0; +- tcode = NE_EXPR; +- } +- if (tcode == EQ_EXPR || tcode == NE_EXPR) +- icode = get_vcond_eq_icode (mode, cmp_op_mode); +- if (icode == CODE_FOR_nothing) +- return 0; +- } +- +- comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, +- icode, 4); +- rtx_op1 = expand_normal (op1); +- rtx_op2 = expand_normal (op2); +- +- create_output_operand (&ops[0], target, mode); +- create_input_operand (&ops[1], rtx_op1, mode); +- create_input_operand (&ops[2], rtx_op2, mode); +- create_fixed_operand (&ops[3], comparison); +- create_fixed_operand (&ops[4], XEXP (comparison, 0)); +- create_fixed_operand (&ops[5], XEXP (comparison, 1)); +- expand_insn (icode, 6, ops); +- return ops[0].value; +-} +- + /* Generate VEC_SERIES_EXPR , returning a value of mode VMODE. + Use TARGET for the result if nonnull and convenient. */ + +diff --git a/gcc/optabs.h b/gcc/optabs.h +index 5bd19503a..7c2ec257c 100644 +--- a/gcc/optabs.h ++++ b/gcc/optabs.h +@@ -321,9 +321,6 @@ extern rtx expand_vec_perm_const (machine_mode, rtx, rtx, + /* Generate code for vector comparison. */ + extern rtx expand_vec_cmp_expr (tree, tree, rtx); + +-/* Generate code for VEC_COND_EXPR. */ +-extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx); +- + /* Generate code for VEC_SERIES_EXPR. */ + extern rtx expand_vec_series_expr (machine_mode, rtx, rtx, rtx); + +@@ -364,5 +361,9 @@ extern void expand_jump_insn (enum insn_code icode, unsigned int nops, + class expand_operand *ops); + + extern enum rtx_code get_rtx_code (enum tree_code tcode, bool unsignedp); ++extern rtx vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode, ++ tree t_op0, tree t_op1, bool unsignedp, ++ enum insn_code icode, unsigned int opno); ++ + + #endif /* GCC_OPTABS_H */ +diff --git a/gcc/passes.def b/gcc/passes.def +index 94554cc1d..5a62819cc 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -403,6 +403,7 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_cleanup_eh); + NEXT_PASS (pass_lower_resx); + NEXT_PASS (pass_nrv); ++ NEXT_PASS (pass_gimple_isel); + NEXT_PASS (pass_cleanup_cfg_post_optimizing); + NEXT_PASS (pass_warn_function_noreturn); + NEXT_PASS (pass_gen_hsail); +diff --git a/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C b/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C +new file mode 100644 +index 000000000..00fe24224 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-fnon-call-exceptions" } */ ++ ++typedef double v2df __attribute__((vector_size(16))); ++ ++v2df foo (v2df a, v2df b, v2df c, v2df d) ++{ ++ try ++ { ++ v2df res = a < b ? c : d; ++ return res; ++ } ++ catch (...) ++ { ++ return (v2df){}; ++ } ++} +diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c +index 1af59fc6f..d82fe23d8 100644 +--- a/gcc/tree-cfg.c ++++ b/gcc/tree-cfg.c +@@ -4196,7 +4196,7 @@ verify_gimple_assign_ternary (gassign *stmt) + return true; + } + +- if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR) ++ if ((rhs_code == COND_EXPR + ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1)) + || !is_gimple_val (rhs2) + || !is_gimple_val (rhs3)) +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 09dd9b289..1c620b28e 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -631,6 +631,7 @@ extern gimple_opt_pass *make_pass_local_fn_summary (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_lower_vaarg (gcc::context *ctxt); ++extern gimple_opt_pass *make_pass_gimple_isel (gcc::context *ctxt); + + /* Current optimization pass. */ + extern opt_pass *current_pass; +diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c +index 3161d2e39..ba0b55f4a 100644 +--- a/gcc/tree-ssa-forwprop.c ++++ b/gcc/tree-ssa-forwprop.c +@@ -3131,8 +3131,7 @@ pass_forwprop::execute (function *fun) + tree rhs1 = gimple_assign_rhs1 (stmt); + enum tree_code code = gimple_assign_rhs_code (stmt); + +- if (code == COND_EXPR +- || code == VEC_COND_EXPR) ++ if (code == COND_EXPR) + { + /* In this case the entire COND_EXPR is in rhs1. */ + if (forward_propagate_into_cond (&gsi)) +diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c +index af8df8563..5f978ac78 100644 +--- a/gcc/tree-ssa-reassoc.c ++++ b/gcc/tree-ssa-reassoc.c +@@ -3830,7 +3830,8 @@ optimize_range_tests (enum tree_code opcode, + to type of comparison. */ + + static tree_code +-ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type) ++ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type, ++ tree *lhs, tree *rhs, gassign **vcond) + { + if (TREE_CODE (var) != SSA_NAME) + return ERROR_MARK; +@@ -3838,6 +3839,8 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type) + gassign *stmt = dyn_cast (SSA_NAME_DEF_STMT (var)); + if (stmt == NULL) + return ERROR_MARK; ++ if (*vcond) ++ *vcond = stmt; + + /* ??? If we start creating more COND_EXPR, we could perform + this same optimization with them. For now, simplify. */ +@@ -3846,9 +3849,20 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type) + + tree cond = gimple_assign_rhs1 (stmt); + tree_code cmp = TREE_CODE (cond); +- if (TREE_CODE_CLASS (cmp) != tcc_comparison) ++ if (cmp != SSA_NAME) + return ERROR_MARK; + ++ gassign *assign = dyn_cast (SSA_NAME_DEF_STMT (cond)); ++ if (stmt == NULL ++ || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison) ++ return ERROR_MARK; ++ ++ cmp = gimple_assign_rhs_code (assign); ++ if (lhs) ++ *lhs = gimple_assign_rhs1 (assign); ++ if (rhs) ++ *rhs = gimple_assign_rhs2 (assign); ++ + /* ??? For now, allow only canonical true and false result vectors. + We could expand this to other constants should the need arise, + but at the moment we don't create them. */ +@@ -3869,7 +3883,7 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type) + + /* Success! */ + if (rets) +- *rets = stmt; ++ *rets = assign; + if (reti) + *reti = inv; + if (type) +@@ -3893,10 +3907,11 @@ optimize_vec_cond_expr (tree_code opcode, vec *ops) + { + tree elt0 = (*ops)[i]->op; + +- gassign *stmt0; ++ gassign *stmt0, *vcond0; + bool invert; +- tree type; +- tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type); ++ tree type, lhs0, rhs0; ++ tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0, ++ &rhs0, &vcond0); + if (cmp0 == ERROR_MARK) + continue; + +@@ -3904,26 +3919,20 @@ optimize_vec_cond_expr (tree_code opcode, vec *ops) + { + tree &elt1 = (*ops)[j]->op; + +- gassign *stmt1; +- tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL); ++ gassign *stmt1, *vcond1; ++ tree lhs1, rhs1; ++ tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1, ++ &rhs1, &vcond1); + if (cmp1 == ERROR_MARK) + continue; + +- tree cond0 = gimple_assign_rhs1 (stmt0); +- tree x0 = TREE_OPERAND (cond0, 0); +- tree y0 = TREE_OPERAND (cond0, 1); +- +- tree cond1 = gimple_assign_rhs1 (stmt1); +- tree x1 = TREE_OPERAND (cond1, 0); +- tree y1 = TREE_OPERAND (cond1, 1); +- + tree comb; + if (opcode == BIT_AND_EXPR) +- comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1, +- y1); ++ comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0, ++ cmp1, lhs1, rhs1); + else if (opcode == BIT_IOR_EXPR) +- comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1, +- y1); ++ comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0, ++ cmp1, lhs1, rhs1); + else + gcc_unreachable (); + if (comb == NULL) +@@ -3933,19 +3942,22 @@ optimize_vec_cond_expr (tree_code opcode, vec *ops) + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Transforming "); +- print_generic_expr (dump_file, cond0); ++ print_generic_expr (dump_file, gimple_assign_lhs (stmt0)); + fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|'); +- print_generic_expr (dump_file, cond1); ++ print_generic_expr (dump_file, gimple_assign_lhs (stmt1)); + fprintf (dump_file, " into "); + print_generic_expr (dump_file, comb); + fputc ('\n', dump_file); + } + +- gimple_assign_set_rhs1 (stmt0, comb); ++ gimple_stmt_iterator gsi = gsi_for_stmt (vcond0); ++ tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE, ++ true, GSI_SAME_STMT); + if (invert) +- std::swap (*gimple_assign_rhs2_ptr (stmt0), +- *gimple_assign_rhs3_ptr (stmt0)); +- update_stmt (stmt0); ++ swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0), ++ gimple_assign_rhs3_ptr (vcond0)); ++ gimple_assign_set_rhs1 (vcond0, exp); ++ update_stmt (vcond0); + + elt1 = error_mark_node; + any_changes = true; +diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c +index 37c3956a4..0ec4412bc 100644 +--- a/gcc/tree-vect-generic.c ++++ b/gcc/tree-vect-generic.c +@@ -693,12 +693,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, + if (addend == NULL_TREE + && expand_vec_cond_expr_p (type, type, LT_EXPR)) + { +- tree zero, cst, cond, mask_type; +- gimple *stmt; ++ tree zero, cst, mask_type, mask; ++ gimple *stmt, *cond; + + mask_type = truth_type_for (type); + zero = build_zero_cst (type); +- cond = build2 (LT_EXPR, mask_type, op0, zero); ++ mask = make_ssa_name (mask_type); ++ cond = gimple_build_assign (mask, LT_EXPR, op0, zero); ++ gsi_insert_before (gsi, cond, GSI_SAME_STMT); + tree_vector_builder vec (type, nunits, 1); + for (i = 0; i < nunits; i++) + vec.quick_push (build_int_cst (TREE_TYPE (type), +@@ -706,8 +708,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, + << shifts[i]) - 1)); + cst = vec.build (); + addend = make_ssa_name (type); +- stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond, +- cst, zero); ++ stmt ++ = gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero); + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); + } + } +@@ -949,21 +951,28 @@ expand_vector_condition (gimple_stmt_iterator *gsi) + tree index = bitsize_int (0); + tree comp_width = width; + tree comp_index = index; +- int i; + location_t loc = gimple_location (gsi_stmt (*gsi)); ++ tree_code code = TREE_CODE (a); + +- if (!is_gimple_val (a)) ++ if (code == SSA_NAME) + { +- gcc_assert (COMPARISON_CLASS_P (a)); +- a_is_comparison = true; +- a1 = TREE_OPERAND (a, 0); +- a2 = TREE_OPERAND (a, 1); +- comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); +- comp_width = TYPE_SIZE (comp_inner_type); ++ gimple *assign = SSA_NAME_DEF_STMT (a); ++ if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison) ++ { ++ a_is_comparison = true; ++ a1 = gimple_assign_rhs1 (assign); ++ a2 = gimple_assign_rhs2 (assign); ++ code = gimple_assign_rhs_code (assign); ++ comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); ++ comp_width = vector_element_bits_tree (TREE_TYPE (a1)); ++ } + } + +- if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a))) +- return; ++ if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code)) ++ { ++ gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST); ++ return; ++ } + + /* Handle vector boolean types with bitmasks. If there is a comparison + and we can expand the comparison into the vector boolean bitmask, +@@ -986,7 +995,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi) + : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a)))) + { + if (a_is_comparison) +- a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2); ++ a = gimplify_build2 (gsi, code, type, a1, a2); + a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b); + a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a); + a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c); +@@ -1017,7 +1026,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi) + + int nunits = nunits_for_known_piecewise_op (type); + vec_alloc (v, nunits); +- for (i = 0; i < nunits; i++) ++ for (int i = 0; i < nunits; i++) + { + tree aa, result; + tree bb = tree_vec_extract (gsi, inner_type, b, width, index); +@@ -1028,7 +1037,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi) + comp_width, comp_index); + tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, + comp_width, comp_index); +- aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2); ++ aa = fold_build2 (code, cond_type, aa1, aa2); + } + else if (a_is_scalar_bitmask) + { +diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +index 0bdf9a547..2c2197022 100644 +--- a/gcc/tree-vect-stmts.c ++++ b/gcc/tree-vect-stmts.c +@@ -11100,8 +11100,12 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + { + vec_cond_rhs = vec_oprnds1[i]; + if (bitop1 == NOP_EXPR) +- vec_compare = build2 (cond_code, vec_cmp_type, +- vec_cond_lhs, vec_cond_rhs); ++ { ++ gimple_seq stmts = NULL; ++ vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type, ++ vec_cond_lhs, vec_cond_rhs); ++ gsi_insert_before (gsi, stmts, GSI_SAME_STMT); ++ } + else + { + new_temp = make_ssa_name (vec_cmp_type); +-- +2.27.0.windows.1 + diff --git a/0069-Backport-gimple-match-Add-a-gimple_extract_op-functi.patch b/0069-Backport-gimple-match-Add-a-gimple_extract_op-functi.patch new file mode 100644 index 0000000000000000000000000000000000000000..7ef1e00ab05963a51e7274c3ea8cd8f5342d9a2b --- /dev/null +++ b/0069-Backport-gimple-match-Add-a-gimple_extract_op-functi.patch @@ -0,0 +1,379 @@ +From 21d265af074726b166e08301a2f847c474fcb680 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford +Date: Tue, 30 Nov 2021 09:52:24 +0000 +Subject: [PATCH 21/35] [Backport] gimple-match: Add a gimple_extract_op + function + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=33973fa754de1f95d459bfca66c0d80deec36537 + +code_helper and gimple_match_op seem like generally useful ways +of summing up a gimple_assign or gimple_call (or gimple_cond). +This patch adds a gimple_extract_op function that can be used +for that. + +gcc/ + * gimple-match.h (code_helper): Add functions for querying whether + the code represents an internal_fn or a built_in_function. + Provide explicit conversion operators for both cases. + (gimple_extract_op): Declare. + * gimple-match-head.c (gimple_extract): New function, extracted from... + (gimple_simplify): ...here. + (gimple_extract_op): New function. +--- + gcc/gimple-match-head.c | 219 ++++++++++++++++++++-------------------- + gcc/gimple-match.h | 27 +++++ + 2 files changed, 135 insertions(+), 111 deletions(-) + +diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c +index 9b3e7298d..c1dea1734 100644 +--- a/gcc/gimple-match-head.c ++++ b/gcc/gimple-match-head.c +@@ -884,12 +884,20 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op, + return true; + } + +-/* The main STMT based simplification entry. It is used by the fold_stmt +- and the fold_stmt_to_constant APIs. */ ++/* Common subroutine of gimple_extract_op and gimple_simplify. Try to ++ describe STMT in RES_OP, returning true on success. Before recording ++ an operand, call: + +-bool +-gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, +- tree (*valueize)(tree), tree (*top_valueize)(tree)) ++ - VALUEIZE_CONDITION for a COND_EXPR condition ++ - VALUEIZE_OP for every other top-level operand ++ ++ Both routines take a tree argument and returns a tree. */ ++ ++template ++inline bool ++gimple_extract (gimple *stmt, gimple_match_op *res_op, ++ ValueizeOp valueize_op, ++ ValueizeCondition valueize_condition) + { + switch (gimple_code (stmt)) + { +@@ -905,101 +913,50 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, + || code == VIEW_CONVERT_EXPR) + { + tree op0 = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); +- bool valueized = false; +- op0 = do_valueize (op0, top_valueize, valueized); +- res_op->set_op (code, type, op0); +- return (gimple_resimplify1 (seq, res_op, valueize) +- || valueized); ++ res_op->set_op (code, type, valueize_op (op0)); ++ return true; + } + else if (code == BIT_FIELD_REF) + { + tree rhs1 = gimple_assign_rhs1 (stmt); +- tree op0 = TREE_OPERAND (rhs1, 0); +- bool valueized = false; +- op0 = do_valueize (op0, top_valueize, valueized); ++ tree op0 = valueize_op (TREE_OPERAND (rhs1, 0)); + res_op->set_op (code, type, op0, + TREE_OPERAND (rhs1, 1), + TREE_OPERAND (rhs1, 2), + REF_REVERSE_STORAGE_ORDER (rhs1)); +- if (res_op->reverse) +- return valueized; +- return (gimple_resimplify3 (seq, res_op, valueize) +- || valueized); ++ return true; + } +- else if (code == SSA_NAME +- && top_valueize) ++ else if (code == SSA_NAME) + { + tree op0 = gimple_assign_rhs1 (stmt); +- tree valueized = top_valueize (op0); +- if (!valueized || op0 == valueized) +- return false; +- res_op->set_op (TREE_CODE (op0), type, valueized); ++ res_op->set_op (TREE_CODE (op0), type, valueize_op (op0)); + return true; + } + break; + case GIMPLE_UNARY_RHS: + { + tree rhs1 = gimple_assign_rhs1 (stmt); +- bool valueized = false; +- rhs1 = do_valueize (rhs1, top_valueize, valueized); +- res_op->set_op (code, type, rhs1); +- return (gimple_resimplify1 (seq, res_op, valueize) +- || valueized); ++ res_op->set_op (code, type, valueize_op (rhs1)); ++ return true; + } + case GIMPLE_BINARY_RHS: + { +- tree rhs1 = gimple_assign_rhs1 (stmt); +- tree rhs2 = gimple_assign_rhs2 (stmt); +- bool valueized = false; +- rhs1 = do_valueize (rhs1, top_valueize, valueized); +- rhs2 = do_valueize (rhs2, top_valueize, valueized); ++ tree rhs1 = valueize_op (gimple_assign_rhs1 (stmt)); ++ tree rhs2 = valueize_op (gimple_assign_rhs2 (stmt)); + res_op->set_op (code, type, rhs1, rhs2); +- return (gimple_resimplify2 (seq, res_op, valueize) +- || valueized); ++ return true; + } + case GIMPLE_TERNARY_RHS: + { +- bool valueized = false; + tree rhs1 = gimple_assign_rhs1 (stmt); +- /* If this is a [VEC_]COND_EXPR first try to simplify an +- embedded GENERIC condition. */ +- if (code == COND_EXPR +- || code == VEC_COND_EXPR) +- { +- if (COMPARISON_CLASS_P (rhs1)) +- { +- tree lhs = TREE_OPERAND (rhs1, 0); +- tree rhs = TREE_OPERAND (rhs1, 1); +- lhs = do_valueize (lhs, top_valueize, valueized); +- rhs = do_valueize (rhs, top_valueize, valueized); +- gimple_match_op res_op2 (res_op->cond, TREE_CODE (rhs1), +- TREE_TYPE (rhs1), lhs, rhs); +- if ((gimple_resimplify2 (seq, &res_op2, valueize) +- || valueized) +- && res_op2.code.is_tree_code ()) +- { +- valueized = true; +- if (TREE_CODE_CLASS ((enum tree_code) res_op2.code) +- == tcc_comparison) +- rhs1 = build2 (res_op2.code, TREE_TYPE (rhs1), +- res_op2.ops[0], res_op2.ops[1]); +- else if (res_op2.code == SSA_NAME +- || res_op2.code == INTEGER_CST +- || res_op2.code == VECTOR_CST) +- rhs1 = res_op2.ops[0]; +- else +- valueized = false; +- } +- } +- } +- tree rhs2 = gimple_assign_rhs2 (stmt); +- tree rhs3 = gimple_assign_rhs3 (stmt); +- rhs1 = do_valueize (rhs1, top_valueize, valueized); +- rhs2 = do_valueize (rhs2, top_valueize, valueized); +- rhs3 = do_valueize (rhs3, top_valueize, valueized); ++ if (code == COND_EXPR && COMPARISON_CLASS_P (rhs1)) ++ rhs1 = valueize_condition (rhs1); ++ else ++ rhs1 = valueize_op (rhs1); ++ tree rhs2 = valueize_op (gimple_assign_rhs2 (stmt)); ++ tree rhs3 = valueize_op (gimple_assign_rhs3 (stmt)); + res_op->set_op (code, type, rhs1, rhs2, rhs3); +- return (gimple_resimplify3 (seq, res_op, valueize) +- || valueized); ++ return true; + } + default: + gcc_unreachable (); +@@ -1013,7 +970,6 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, + && gimple_call_num_args (stmt) >= 1 + && gimple_call_num_args (stmt) <= 5) + { +- bool valueized = false; + combined_fn cfn; + if (gimple_call_internal_p (stmt)) + cfn = as_combined_fn (gimple_call_internal_fn (stmt)); +@@ -1023,7 +979,7 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, + if (!fn) + return false; + +- fn = do_valueize (fn, top_valueize, valueized); ++ fn = valueize_op (fn); + if (TREE_CODE (fn) != ADDR_EXPR + || TREE_CODE (TREE_OPERAND (fn, 0)) != FUNCTION_DECL) + return false; +@@ -1039,47 +995,17 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, + unsigned int num_args = gimple_call_num_args (stmt); + res_op->set_op (cfn, TREE_TYPE (gimple_call_lhs (stmt)), num_args); + for (unsigned i = 0; i < num_args; ++i) +- { +- tree arg = gimple_call_arg (stmt, i); +- res_op->ops[i] = do_valueize (arg, top_valueize, valueized); +- } +- if (internal_fn_p (cfn) +- && try_conditional_simplification (as_internal_fn (cfn), +- res_op, seq, valueize)) +- return true; +- switch (num_args) +- { +- case 1: +- return (gimple_resimplify1 (seq, res_op, valueize) +- || valueized); +- case 2: +- return (gimple_resimplify2 (seq, res_op, valueize) +- || valueized); +- case 3: +- return (gimple_resimplify3 (seq, res_op, valueize) +- || valueized); +- case 4: +- return (gimple_resimplify4 (seq, res_op, valueize) +- || valueized); +- case 5: +- return (gimple_resimplify5 (seq, res_op, valueize) +- || valueized); +- default: +- gcc_unreachable (); +- } ++ res_op->ops[i] = valueize_op (gimple_call_arg (stmt, i)); ++ return true; + } + break; + + case GIMPLE_COND: + { +- tree lhs = gimple_cond_lhs (stmt); +- tree rhs = gimple_cond_rhs (stmt); +- bool valueized = false; +- lhs = do_valueize (lhs, top_valueize, valueized); +- rhs = do_valueize (rhs, top_valueize, valueized); ++ tree lhs = valueize_op (gimple_cond_lhs (stmt)); ++ tree rhs = valueize_op (gimple_cond_rhs (stmt)); + res_op->set_op (gimple_cond_code (stmt), boolean_type_node, lhs, rhs); +- return (gimple_resimplify2 (seq, res_op, valueize) +- || valueized); ++ return true; + } + + default: +@@ -1089,6 +1015,77 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, + return false; + } + ++/* Try to describe STMT in RES_OP, returning true on success. ++ For GIMPLE_CONDs, describe the condition that is being tested. ++ For GIMPLE_ASSIGNs, describe the rhs of the assignment. ++ For GIMPLE_CALLs, describe the call. */ ++ ++bool ++gimple_extract_op (gimple *stmt, gimple_match_op *res_op) ++{ ++ auto nop = [](tree op) { return op; }; ++ return gimple_extract (stmt, res_op, nop, nop); ++} ++ ++/* The main STMT based simplification entry. It is used by the fold_stmt ++ and the fold_stmt_to_constant APIs. */ ++ ++bool ++gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, ++ tree (*valueize)(tree), tree (*top_valueize)(tree)) ++{ ++ bool valueized = false; ++ auto valueize_op = [&](tree op) ++ { ++ return do_valueize (op, top_valueize, valueized); ++ }; ++ auto valueize_condition = [&](tree op) -> tree ++ { ++ bool cond_valueized = false; ++ tree lhs = do_valueize (TREE_OPERAND (op, 0), top_valueize, ++ cond_valueized); ++ tree rhs = do_valueize (TREE_OPERAND (op, 1), top_valueize, ++ cond_valueized); ++ gimple_match_op res_op2 (res_op->cond, TREE_CODE (op), ++ TREE_TYPE (op), lhs, rhs); ++ if ((gimple_resimplify2 (seq, &res_op2, valueize) ++ || cond_valueized) ++ && res_op2.code.is_tree_code ()) ++ { ++ if (TREE_CODE_CLASS ((tree_code) res_op2.code) == tcc_comparison) ++ { ++ valueized = true; ++ return build2 (res_op2.code, TREE_TYPE (op), ++ res_op2.ops[0], res_op2.ops[1]); ++ } ++ else if (res_op2.code == SSA_NAME ++ || res_op2.code == INTEGER_CST ++ || res_op2.code == VECTOR_CST) ++ { ++ valueized = true; ++ return res_op2.ops[0]; ++ } ++ } ++ return valueize_op (op); ++ }; ++ ++ if (!gimple_extract (stmt, res_op, valueize_op, valueize_condition)) ++ return false; ++ ++ if (res_op->code.is_internal_fn ()) ++ { ++ internal_fn ifn = internal_fn (res_op->code); ++ if (try_conditional_simplification (ifn, res_op, seq, valueize)) ++ return true; ++ } ++ ++ if (!res_op->reverse ++ && res_op->num_ops ++ && res_op->resimplify (seq, valueize)) ++ return true; ++ ++ return valueized; ++} + + /* Helper for the autogenerated code, valueize OP. */ + +diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h +index 097898aed..39858c45f 100644 +--- a/gcc/gimple-match.h ++++ b/gcc/gimple-match.h +@@ -33,13 +33,39 @@ public: + code_helper (combined_fn fn) : rep (-(int) fn) {} + operator tree_code () const { return (tree_code) rep; } + operator combined_fn () const { return (combined_fn) -rep; } ++ explicit operator internal_fn () const; ++ explicit operator built_in_function () const; + bool is_tree_code () const { return rep > 0; } + bool is_fn_code () const { return rep < 0; } ++ bool is_internal_fn () const; ++ bool is_builtin_fn () const; + int get_rep () const { return rep; } + private: + int rep; + }; + ++inline code_helper::operator internal_fn () const ++{ ++ return as_internal_fn (combined_fn (*this)); ++} ++ ++inline code_helper::operator built_in_function () const ++{ ++ return as_builtin_fn (combined_fn (*this)); ++} ++ ++inline bool ++code_helper::is_internal_fn () const ++{ ++ return is_fn_code () && internal_fn_p (combined_fn (*this)); ++} ++ ++inline bool ++code_helper::is_builtin_fn () const ++{ ++ return is_fn_code () && builtin_fn_p (combined_fn (*this)); ++} ++ + /* Represents the condition under which an operation should happen, + and the value to use otherwise. The condition applies elementwise + (as for VEC_COND_EXPR) if the values are vectors. */ +@@ -333,6 +359,7 @@ gimple_simplified_result_is_gimple_val (const gimple_match_op *op) + + extern tree (*mprts_hook) (gimple_match_op *); + ++bool gimple_extract_op (gimple *, gimple_match_op *); + bool gimple_simplify (gimple *, gimple_match_op *, gimple_seq *, + tree (*)(tree), tree (*)(tree)); + tree maybe_push_res_to_seq (gimple_match_op *, gimple_seq *, +-- +2.27.0.windows.1 + diff --git a/0070-Backport-aarch64-Fix-subs_compare_2.c-regression-PR1.patch b/0070-Backport-aarch64-Fix-subs_compare_2.c-regression-PR1.patch new file mode 100644 index 0000000000000000000000000000000000000000..ab9ac6ef54a0c1aee594ed6726f2f8cc1f6f6352 --- /dev/null +++ b/0070-Backport-aarch64-Fix-subs_compare_2.c-regression-PR1.patch @@ -0,0 +1,1004 @@ +From f6b6948de1d836b594ad388388b7121dd7a702cb Mon Sep 17 00:00:00 2001 +From: Richard Sandiford +Date: Tue, 15 Feb 2022 18:09:35 +0000 +Subject: [PATCH 22/35] [Backport] aarch64: Fix subs_compare_2.c regression + [PR100874] +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=8e84b2b37a541b27feea69769fc314d534464ebd + +subs_compare_2.c tests that we can use a SUBS+CSEL sequence for: + +unsigned int +foo (unsigned int a, unsigned int b) +{ + unsigned int x = a - 4; + if (a < 4) + return x; + else + return 0; +} + +As Andrew notes in the PR, this is effectively MIN (x, 4) - 4, +and it is now recognised as such by phiopt. Previously it was +if-converted in RTL instead. + +I tried to look for ways to generalise this to other situations +and to other ?:-style operations, not just max and min. However, +for general ?: we tend to push an outer “- CST” into the arms of +the ?: -- at least if one of them simplifies -- so I didn't find +any useful abstraction. + +This patch therefore adds a pattern specifically for +max/min(a,cst)-cst. I'm not thrilled at having to do this, +but it seems like the least worst fix in the circumstances. +Also, max(a,cst)-cst for unsigned a is a useful saturating +subtraction idiom and so is arguably worth its own code +for that reason. + +gcc/ + PR target/100874 + * config/aarch64/aarch64-protos.h (aarch64_maxmin_plus_const): + Declare. + * config/aarch64/aarch64.cc (aarch64_maxmin_plus_const): New function. + * config/aarch64/aarch64.md (*aarch64_minmax_plus): New pattern. + +gcc/testsuite/ + * gcc.target/aarch64/max_plus_1.c: New test. + * gcc.target/aarch64/max_plus_2.c: Likewise. + * gcc.target/aarch64/max_plus_3.c: Likewise. + * gcc.target/aarch64/max_plus_4.c: Likewise. + * gcc.target/aarch64/max_plus_5.c: Likewise. + * gcc.target/aarch64/max_plus_6.c: Likewise. + * gcc.target/aarch64/max_plus_7.c: Likewise. + * gcc.target/aarch64/min_plus_1.c: Likewise. + * gcc.target/aarch64/min_plus_2.c: Likewise. + * gcc.target/aarch64/min_plus_3.c: Likewise. + * gcc.target/aarch64/min_plus_4.c: Likewise. + * gcc.target/aarch64/min_plus_5.c: Likewise. + * gcc.target/aarch64/min_plus_6.c: Likewise. + * gcc.target/aarch64/min_plus_7.c: Likewise. +--- + gcc/config/aarch64/aarch64-protos.h | 1 + + gcc/config/aarch64/aarch64.c | 104 ++++++++++++ + gcc/config/aarch64/aarch64.md | 27 ++++ + gcc/testsuite/gcc.target/aarch64/max_plus_1.c | 149 ++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/max_plus_2.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/max_plus_3.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/max_plus_4.c | 30 ++++ + gcc/testsuite/gcc.target/aarch64/max_plus_5.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/max_plus_6.c | 9 ++ + gcc/testsuite/gcc.target/aarch64/max_plus_7.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/min_plus_1.c | 149 ++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/min_plus_2.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/min_plus_3.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/min_plus_4.c | 30 ++++ + gcc/testsuite/gcc.target/aarch64/min_plus_5.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/min_plus_6.c | 9 ++ + gcc/testsuite/gcc.target/aarch64/min_plus_7.c | 35 ++++ + 17 files changed, 788 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_6.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_7.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_6.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_7.c + +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 226f3a8ff..9b6d309a7 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -696,6 +696,7 @@ bool aarch64_legitimate_address_p (machine_mode, rtx, bool, + aarch64_addr_query_type = ADDR_QUERY_M); + machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); + rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); ++bool aarch64_maxmin_plus_const (rtx_code, rtx *, bool); + rtx aarch64_load_tp (rtx); + + void aarch64_expand_compare_and_swap (rtx op[]); +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index f78942b04..85dbd3898 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -3038,6 +3038,110 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y, + return aarch64_gen_compare_reg (code, x, y); + } + ++/* Consider the operation: ++ ++ OPERANDS[0] = CODE (OPERANDS[1], OPERANDS[2]) + OPERANDS[3] ++ ++ where: ++ ++ - CODE is [SU]MAX or [SU]MIN ++ - OPERANDS[2] and OPERANDS[3] are constant integers ++ - OPERANDS[3] is a positive or negative shifted 12-bit immediate ++ - all operands have mode MODE ++ ++ Decide whether it is possible to implement the operation using: ++ ++ SUBS , OPERANDS[1], -OPERANDS[3] ++ or ++ ADDS , OPERANDS[1], OPERANDS[3] ++ ++ followed by: ++ ++ OPERANDS[0], , [wx]zr, ++ ++ where is one of CSEL, CSINV or CSINC. Return true if so. ++ If GENERATE_P is true, also update OPERANDS as follows: ++ ++ OPERANDS[4] = -OPERANDS[3] ++ OPERANDS[5] = the rtl condition representing ++ OPERANDS[6] = ++ OPERANDS[7] = 0 for CSEL, -1 for CSINV or 1 for CSINC. */ ++bool ++aarch64_maxmin_plus_const (rtx_code code, rtx *operands, bool generate_p) ++{ ++ signop sgn = (code == UMAX || code == UMIN ? UNSIGNED : SIGNED); ++ rtx dst = operands[0]; ++ rtx maxmin_op = operands[2]; ++ rtx add_op = operands[3]; ++ machine_mode mode = GET_MODE (dst); ++ ++ /* max (x, y) - z == (x >= y + 1 ? x : y) - z ++ == (x >= y ? x : y) - z ++ == (x > y ? x : y) - z ++ == (x > y - 1 ? x : y) - z ++ ++ min (x, y) - z == (x <= y - 1 ? x : y) - z ++ == (x <= y ? x : y) - z ++ == (x < y ? x : y) - z ++ == (x < y + 1 ? x : y) - z ++ ++ Check whether z is in { y - 1, y, y + 1 } and pick the form(s) for ++ which x is compared with z. Set DIFF to y - z. Thus the supported ++ combinations are as follows, with DIFF being the value after the ":": ++ ++ max (x, y) - z == x >= y + 1 ? x - (y + 1) : -1 [z == y + 1] ++ == x >= y ? x - y : 0 [z == y] ++ == x > y ? x - y : 0 [z == y] ++ == x > y - 1 ? x - (y - 1) : 1 [z == y - 1] ++ ++ min (x, y) - z == x <= y - 1 ? x - (y - 1) : 1 [z == y - 1] ++ == x <= y ? x - y : 0 [z == y] ++ == x < y ? x - y : 0 [z == y] ++ == x < y + 1 ? x - (y + 1) : -1 [z == y + 1]. */ ++ auto maxmin_val = rtx_mode_t (maxmin_op, mode); ++ auto add_val = rtx_mode_t (add_op, mode); ++ auto sub_val = wi::neg (add_val); ++ auto diff = wi::sub (maxmin_val, sub_val); ++ if (!(diff == 0 ++ || (diff == 1 && wi::gt_p (maxmin_val, sub_val, sgn)) ++ || (diff == -1 && wi::lt_p (maxmin_val, sub_val, sgn)))) ++ return false; ++ ++ if (!generate_p) ++ return true; ++ ++ rtx_code cmp; ++ switch (code) ++ { ++ case SMAX: ++ cmp = diff == 1 ? GT : GE; ++ break; ++ case UMAX: ++ cmp = diff == 1 ? GTU : GEU; ++ break; ++ case SMIN: ++ cmp = diff == -1 ? LT : LE; ++ break; ++ case UMIN: ++ cmp = diff == -1 ? LTU : LEU; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ rtx cc = gen_rtx_REG (CCmode, CC_REGNUM); ++ ++ operands[4] = immed_wide_int_const (sub_val, mode); ++ operands[5] = gen_rtx_fmt_ee (cmp, VOIDmode, cc, const0_rtx); ++ if (can_create_pseudo_p ()) ++ operands[6] = gen_reg_rtx (mode); ++ else ++ operands[6] = dst; ++ operands[7] = immed_wide_int_const (diff, mode); ++ ++ return true; ++} ++ ++ + /* Build the SYMBOL_REF for __tls_get_addr. */ + + static GTY(()) rtx tls_get_addr_libfunc; +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index ee80261f1..7c2562f49 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -4499,6 +4499,33 @@ + } + ) + ++;; Implement MAX/MIN (A, B) - C using SUBS/ADDS followed by CSEL/CSINV/CSINC. ++;; See aarch64_maxmin_plus_const for details about the supported cases. ++(define_insn_and_split "*aarch64_minmax_plus" ++ [(set (match_operand:GPI 0 "register_operand" "=r") ++ (plus:GPI ++ (MAXMIN:GPI ++ (match_operand:GPI 1 "register_operand" "r") ++ (match_operand:GPI 2 "const_int_operand")) ++ (match_operand:GPI 3 "aarch64_plus_immediate"))) ++ (clobber (reg:CC CC_REGNUM))] ++ "aarch64_maxmin_plus_const (, operands, false)" ++ "#" ++ "&& 1" ++ [(parallel ++ [(set (reg:CC CC_REGNUM) ++ (compare:CC (match_dup 1) (match_dup 4))) ++ (set (match_dup 6) ++ (plus:GPI (match_dup 1) (match_dup 3)))]) ++ (set (match_dup 0) ++ (if_then_else:GPI (match_dup 5) (match_dup 6) (match_dup 7)))] ++ { ++ if (!aarch64_maxmin_plus_const (, operands, true)) ++ gcc_unreachable (); ++ } ++ [(set_attr "length" "8")] ++) ++ + ;; ------------------------------------------------------------------- + ;; Logical operations + ;; ------------------------------------------------------------------- +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_1.c b/gcc/testsuite/gcc.target/aarch64/max_plus_1.c +new file mode 100644 +index 000000000..ef336aeec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_1.c +@@ -0,0 +1,149 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** adds (w[0-9]+), w0, #4 ++** csel w0, \1, wzr, g[te] ++** ret ++*/ ++/* ++** f2: ++** adds (w[0-9]+), w0, #4 ++** csel w0, \1, wzr, g[te] ++** ret ++*/ ++/* ++** f3: ++** adds (w[0-9]+), w0, #5 ++** csinc w0, \1, wzr, gt ++** ret ++*/ ++/* ++** f4: ++** adds (w[0-9]+), w0, #3 ++** csinv w0, \1, wzr, ge ++** ret ++*/ ++ ++#ifndef TYPE ++#define TYPE int32_t ++#define TYPE_MIN INT32_MIN ++#define TYPE_MAX INT32_MAX ++#define VALUE -4 ++#endif ++ ++#include ++ ++TYPE __attribute__((noipa)) ++f1 (TYPE x) ++{ ++ return (x > VALUE ? x - VALUE : 0); ++} ++ ++TYPE __attribute__((noipa)) ++f2 (TYPE x) ++{ ++ return (x > VALUE ? x : VALUE) - VALUE; ++} ++ ++TYPE __attribute__((noipa)) ++f3 (TYPE x) ++{ ++ return (x > VALUE ? x : VALUE) - (VALUE - 1); ++} ++ ++TYPE __attribute__((noipa)) ++f4 (TYPE x) ++{ ++ return (x > VALUE ? x : VALUE) - (VALUE + 1); ++} ++ ++TYPE __attribute__((noipa)) ++f5 (TYPE x) ++{ ++ return (x > VALUE ? x : VALUE) - (VALUE + 2); ++} ++ ++TYPE __attribute__((noipa)) ++f6 (TYPE x) ++{ ++ return (x > VALUE ? x : VALUE) - (VALUE - 2); ++} ++ ++int ++main (void) ++{ ++ TYPE max_test = TYPE_MAX; ++ if (TYPE_MIN < 0 && VALUE < 0) ++ max_test += VALUE; ++ ++ if (f1 (TYPE_MIN) != 0) ++ __builtin_abort (); ++ if (f1 (VALUE - 1) != 0) ++ __builtin_abort (); ++ if (f1 (VALUE) != 0) ++ __builtin_abort (); ++ if (f1 (VALUE + 1) != 1) ++ __builtin_abort (); ++ if (f1 (max_test) != max_test - VALUE) ++ __builtin_abort (); ++ ++ if (f2 (TYPE_MIN) != 0) ++ __builtin_abort (); ++ if (f2 (VALUE - 1) != 0) ++ __builtin_abort (); ++ if (f2 (VALUE) != 0) ++ __builtin_abort (); ++ if (f2 (VALUE + 1) != 1) ++ __builtin_abort (); ++ if (f2 (max_test) != max_test - VALUE) ++ __builtin_abort (); ++ ++ if (f3 (TYPE_MIN) != 1) ++ __builtin_abort (); ++ if (f3 (VALUE - 1) != 1) ++ __builtin_abort (); ++ if (f3 (VALUE) != 1) ++ __builtin_abort (); ++ if (f3 (VALUE + 1) != 2) ++ __builtin_abort (); ++ if (f3 (max_test - 1) != max_test - VALUE) ++ __builtin_abort (); ++ ++ if (f4 (TYPE_MIN) != -1) ++ __builtin_abort (); ++ if (f4 (VALUE - 1) != -1) ++ __builtin_abort (); ++ if (f4 (VALUE) != -1) ++ __builtin_abort (); ++ if (f4 (VALUE + 1) != 0) ++ __builtin_abort (); ++ if (f4 (max_test) != max_test - VALUE - 1) ++ __builtin_abort (); ++ ++ if (f5 (TYPE_MIN) != -2) ++ __builtin_abort (); ++ if (f5 (VALUE - 1) != -2) ++ __builtin_abort (); ++ if (f5 (VALUE) != -2) ++ __builtin_abort (); ++ if (f5 (VALUE + 1) != -1) ++ __builtin_abort (); ++ if (f5 (max_test) != max_test - VALUE - 2) ++ __builtin_abort (); ++ ++ if (f6 (TYPE_MIN) != 2) ++ __builtin_abort (); ++ if (f6 (VALUE - 1) != 2) ++ __builtin_abort (); ++ if (f6 (VALUE) != 2) ++ __builtin_abort (); ++ if (f6 (VALUE + 1) != 3) ++ __builtin_abort (); ++ if (VALUE <= max_test - 2 && f6 (max_test - 2) != max_test - VALUE) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_2.c b/gcc/testsuite/gcc.target/aarch64/max_plus_2.c +new file mode 100644 +index 000000000..a2a1295d9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_2.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** adds (x[0-9]+), x0, #4094 ++** csel x0, \1, xzr, g[te] ++** ret ++*/ ++/* ++** f2: ++** adds (x[0-9]+), x0, #4094 ++** csel x0, \1, xzr, g[te] ++** ret ++*/ ++/* ++** f3: ++** adds (x[0-9]+), x0, #4095 ++** csinc x0, \1, xzr, gt ++** ret ++*/ ++/* ++** f4: ++** adds (x[0-9]+), x0, #4093 ++** csinv x0, \1, xzr, ge ++** ret ++*/ ++ ++#define TYPE int64_t ++#define TYPE_MIN INT64_MIN ++#define TYPE_MAX INT64_MAX ++#define VALUE -4094 ++ ++#include "max_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_3.c b/gcc/testsuite/gcc.target/aarch64/max_plus_3.c +new file mode 100644 +index 000000000..a9792ecc9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_3.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** adds (w[0-9]+), w0, #4095 ++** csel w0, \1, wzr, g[te] ++** ret ++*/ ++/* ++** f2: ++** adds (w[0-9]+), w0, #4095 ++** csel w0, \1, wzr, g[te] ++** ret ++*/ ++/* ++** f3: ++** adds (w[0-9]+), w0, #4096 ++** csinc w0, \1, wzr, gt ++** ret ++*/ ++/* ++** f4: ++** adds (w[0-9]+), w0, #4094 ++** csinv w0, \1, wzr, ge ++** ret ++*/ ++ ++#define TYPE int32_t ++#define TYPE_MIN INT32_MIN ++#define TYPE_MAX INT32_MAX ++#define VALUE -4095 ++ ++#include "max_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_4.c b/gcc/testsuite/gcc.target/aarch64/max_plus_4.c +new file mode 100644 +index 000000000..5090fa101 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_4.c +@@ -0,0 +1,30 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** adds (x[0-9]+), x0, #4096 ++** csel x0, \1, xzr, g[te] ++** ret ++*/ ++/* ++** f2: ++** adds (x[0-9]+), x0, #4096 ++** csel x0, \1, xzr, g[te] ++** ret ++*/ ++/* f3 out of range */ ++/* ++** f4: ++** adds (x[0-9]+), x0, #4095 ++** csinv x0, \1, xzr, ge ++** ret ++*/ ++ ++#define TYPE int64_t ++#define TYPE_MIN INT64_MIN ++#define TYPE_MAX INT64_MAX ++#define VALUE -4096 ++ ++#include "max_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_5.c b/gcc/testsuite/gcc.target/aarch64/max_plus_5.c +new file mode 100644 +index 000000000..63f3b3442 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_5.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** adds (w[0-9]+), w0, #4095 ++** csel w0, \1, wzr, (cs|hi) ++** ret ++*/ ++/* ++** f2: ++** adds (w[0-9]+), w0, #4095 ++** csel w0, \1, wzr, (cs|hi) ++** ret ++*/ ++/* ++** f3: ++** adds (w[0-9]+), w0, #4096 ++** csinc w0, \1, wzr, hi ++** ret ++*/ ++/* ++** f4: ++** adds (w[0-9]+), w0, #4094 ++** csinv w0, \1, wzr, cs ++** ret ++*/ ++ ++#define TYPE uint32_t ++#define TYPE_MIN 0 ++#define TYPE_MAX UINT32_MAX ++#define VALUE (uint32_t)-4095 ++ ++#include "max_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_6.c b/gcc/testsuite/gcc.target/aarch64/max_plus_6.c +new file mode 100644 +index 000000000..ad592c690 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_6.c +@@ -0,0 +1,9 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++ ++#define TYPE uint64_t ++#define TYPE_MIN 0 ++#define TYPE_MAX UINT64_MAX ++#define VALUE (uint64_t)-2 ++ ++#include "max_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_7.c b/gcc/testsuite/gcc.target/aarch64/max_plus_7.c +new file mode 100644 +index 000000000..ac9f27dec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_7.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** adds (x[0-9]+), x0, #3 ++** csel x0, \1, xzr, (cs|hi) ++** ret ++*/ ++/* ++** f2: ++** adds (x[0-9]+), x0, #3 ++** csel x0, \1, xzr, (cs|hi) ++** ret ++*/ ++/* ++** f3: ++** adds (x[0-9]+), x0, #4 ++** csinc x0, \1, xzr, hi ++** ret ++*/ ++/* ++** f4: ++** adds (x[0-9]+), x0, #2 ++** csinv x0, \1, xzr, cs ++** ret ++*/ ++ ++#define TYPE uint64_t ++#define TYPE_MIN 0 ++#define TYPE_MAX UINT64_MAX ++#define VALUE (uint64_t)-3 ++ ++#include "max_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_1.c b/gcc/testsuite/gcc.target/aarch64/min_plus_1.c +new file mode 100644 +index 000000000..f4c9106df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_1.c +@@ -0,0 +1,149 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** subs (w[0-9]+), w0, #?4 ++** csel w0, \1, wzr, l[te] ++** ret ++*/ ++/* ++** f2: ++** subs (w[0-9]+), w0, #?4 ++** csel w0, \1, wzr, l[te] ++** ret ++*/ ++/* ++** f3: ++** subs (w[0-9]+), w0, #?3 ++** csinc w0, \1, wzr, le ++** ret ++*/ ++/* ++** f4: ++** subs (w[0-9]+), w0, #?5 ++** csinv w0, \1, wzr, lt ++** ret ++*/ ++ ++#ifndef TYPE ++#define TYPE int32_t ++#define TYPE_MIN INT32_MIN ++#define TYPE_MAX INT32_MAX ++#define VALUE 4 ++#endif ++ ++#include ++ ++TYPE __attribute__((noipa)) ++f1 (TYPE x) ++{ ++ return (x < VALUE ? x - VALUE : 0); ++} ++ ++TYPE __attribute__((noipa)) ++f2 (TYPE x) ++{ ++ return (x < VALUE ? x : VALUE) - VALUE; ++} ++ ++TYPE __attribute__((noipa)) ++f3 (TYPE x) ++{ ++ return (x < VALUE ? x : VALUE) - (VALUE - 1); ++} ++ ++TYPE __attribute__((noipa)) ++f4 (TYPE x) ++{ ++ return (x < VALUE ? x : VALUE) - (VALUE + 1); ++} ++ ++TYPE __attribute__((noipa)) ++f5 (TYPE x) ++{ ++ return (x < VALUE ? x : VALUE) - (VALUE + 2); ++} ++ ++TYPE __attribute__((noipa)) ++f6 (TYPE x) ++{ ++ return (x < VALUE ? x : VALUE) - (VALUE - 2); ++} ++ ++int ++main (void) ++{ ++ TYPE min_test = TYPE_MIN; ++ if (TYPE_MIN < 0 && VALUE > 0) ++ min_test += VALUE; ++ ++ if (f1 (min_test) != min_test - VALUE) ++ __builtin_abort (); ++ if (f1 (VALUE - 1) != -1) ++ __builtin_abort (); ++ if (f1 (VALUE) != 0) ++ __builtin_abort (); ++ if (f1 (VALUE + 1) != 0) ++ __builtin_abort (); ++ if (f1 (TYPE_MAX) != 0) ++ __builtin_abort (); ++ ++ if (f2 (min_test) != min_test - VALUE) ++ __builtin_abort (); ++ if (f2 (VALUE - 1) != -1) ++ __builtin_abort (); ++ if (f2 (VALUE) != 0) ++ __builtin_abort (); ++ if (f2 (VALUE + 1) != 0) ++ __builtin_abort (); ++ if (f2 (TYPE_MAX) != 0) ++ __builtin_abort (); ++ ++ if (f3 (min_test) != min_test - VALUE + 1) ++ __builtin_abort (); ++ if (f3 (VALUE - 1) != 0) ++ __builtin_abort (); ++ if (f3 (VALUE) != 1) ++ __builtin_abort (); ++ if (f3 (VALUE + 1) != 1) ++ __builtin_abort (); ++ if (f3 (TYPE_MAX) != 1) ++ __builtin_abort (); ++ ++ if (f4 (min_test + 1) != min_test - VALUE) ++ __builtin_abort (); ++ if (f4 (VALUE - 1) != -2) ++ __builtin_abort (); ++ if (f4 (VALUE) != -1) ++ __builtin_abort (); ++ if (f4 (VALUE + 1) != -1) ++ __builtin_abort (); ++ if (f4 (TYPE_MAX) != -1) ++ __builtin_abort (); ++ ++ if (VALUE >= min_test + 2 && f5 (min_test + 2) != min_test - VALUE) ++ __builtin_abort (); ++ if (f5 (VALUE - 1) != -3) ++ __builtin_abort (); ++ if (f5 (VALUE) != -2) ++ __builtin_abort (); ++ if (f5 (VALUE + 1) != -2) ++ __builtin_abort (); ++ if (f5 (TYPE_MAX) != -2) ++ __builtin_abort (); ++ ++ if (f6 (min_test) != min_test - VALUE + 2) ++ __builtin_abort (); ++ if (f6 (VALUE - 1) != 1) ++ __builtin_abort (); ++ if (f6 (VALUE) != 2) ++ __builtin_abort (); ++ if (f6 (VALUE + 1) != 2) ++ __builtin_abort (); ++ if (f6 (TYPE_MAX) != 2) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_2.c b/gcc/testsuite/gcc.target/aarch64/min_plus_2.c +new file mode 100644 +index 000000000..bc0141b72 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_2.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** subs (x[0-9]+), x0, #?4094 ++** csel x0, \1, xzr, l[te] ++** ret ++*/ ++/* ++** f2: ++** subs (x[0-9]+), x0, #?4094 ++** csel x0, \1, xzr, l[te] ++** ret ++*/ ++/* ++** f3: ++** subs (x[0-9]+), x0, #?4093 ++** csinc x0, \1, xzr, le ++** ret ++*/ ++/* ++** f4: ++** subs (x[0-9]+), x0, #?4095 ++** csinv x0, \1, xzr, lt ++** ret ++*/ ++ ++#define TYPE int64_t ++#define TYPE_MIN INT64_MIN ++#define TYPE_MAX INT64_MAX ++#define VALUE 4094 ++ ++#include "min_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_3.c b/gcc/testsuite/gcc.target/aarch64/min_plus_3.c +new file mode 100644 +index 000000000..1808e4b0c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_3.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** subs (w[0-9]+), w0, #?4095 ++** csel w0, \1, wzr, l[te] ++** ret ++*/ ++/* ++** f2: ++** subs (w[0-9]+), w0, #?4095 ++** csel w0, \1, wzr, l[te] ++** ret ++*/ ++/* ++** f3: ++** subs (w[0-9]+), w0, #?4094 ++** csinc w0, \1, wzr, le ++** ret ++*/ ++/* ++** f4: ++** subs (w[0-9]+), w0, #?4096 ++** csinv w0, \1, wzr, lt ++** ret ++*/ ++ ++#define TYPE int32_t ++#define TYPE_MIN INT32_MIN ++#define TYPE_MAX INT32_MAX ++#define VALUE 4095 ++ ++#include "min_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_4.c b/gcc/testsuite/gcc.target/aarch64/min_plus_4.c +new file mode 100644 +index 000000000..6c581fed6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_4.c +@@ -0,0 +1,30 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** subs (x[0-9]+), x0, #?4096 ++** csel x0, \1, xzr, l[te] ++** ret ++*/ ++/* ++** f2: ++** subs (x[0-9]+), x0, #?4096 ++** csel x0, \1, xzr, l[te] ++** ret ++*/ ++/* ++** f3: ++** subs (x[0-9]+), x0, #?4095 ++** csinc x0, \1, xzr, le ++** ret ++*/ ++/* f4 out of range */ ++ ++#define TYPE int64_t ++#define TYPE_MIN INT64_MIN ++#define TYPE_MAX INT64_MAX ++#define VALUE 4096 ++ ++#include "min_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_5.c b/gcc/testsuite/gcc.target/aarch64/min_plus_5.c +new file mode 100644 +index 000000000..97542d507 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_5.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** subs (w[0-9]+), w0, #?4095 ++** csel w0, \1, wzr, (cc|ls) ++** ret ++*/ ++/* ++** f2: ++** subs (w[0-9]+), w0, #?4095 ++** csel w0, \1, wzr, (cc|ls) ++** ret ++*/ ++/* ++** f3: ++** subs (w[0-9]+), w0, #?4094 ++** csinc w0, \1, wzr, ls ++** ret ++*/ ++/* ++** f4: ++** subs (w[0-9]+), w0, #?4096 ++** csinv w0, \1, wzr, cc ++** ret ++*/ ++ ++#define TYPE uint32_t ++#define TYPE_MIN 0 ++#define TYPE_MAX UINT32_MAX ++#define VALUE 4095 ++ ++#include "min_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_6.c b/gcc/testsuite/gcc.target/aarch64/min_plus_6.c +new file mode 100644 +index 000000000..176533cb2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_6.c +@@ -0,0 +1,9 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++ ++#define TYPE uint64_t ++#define TYPE_MIN 0 ++#define TYPE_MAX UINT64_MAX ++#define VALUE 1 ++ ++#include "min_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_7.c b/gcc/testsuite/gcc.target/aarch64/min_plus_7.c +new file mode 100644 +index 000000000..d6a217a51 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_7.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** subs (x[0-9]+), x0, #?2 ++** csel x0, \1, xzr, (cc|ls) ++** ret ++*/ ++/* ++** f2: ++** subs (x[0-9]+), x0, #?2 ++** csel x0, \1, xzr, (cc|ls) ++** ret ++*/ ++/* ++** f3: ++** subs (x[0-9]+), x0, #?1 ++** csinc x0, \1, xzr, ls ++** ret ++*/ ++/* ++** f4: ++** subs (x[0-9]+), x0, #?3 ++** csinv x0, \1, xzr, cc ++** ret ++*/ ++ ++#define TYPE uint64_t ++#define TYPE_MIN 0 ++#define TYPE_MAX UINT64_MAX ++#define VALUE 2 ++ ++#include "min_plus_1.c" +-- +2.27.0.windows.1 + diff --git a/0071-PHIOPT-Disable-the-match-A-CST1-0-when-the-CST1-is-n.patch b/0071-PHIOPT-Disable-the-match-A-CST1-0-when-the-CST1-is-n.patch new file mode 100644 index 0000000000000000000000000000000000000000..18c2e3bafca773de276e2490932c08d3f777227a --- /dev/null +++ b/0071-PHIOPT-Disable-the-match-A-CST1-0-when-the-CST1-is-n.patch @@ -0,0 +1,31 @@ +From b57c55b282e7a9a7b2cc0d3843e58fd7998685e6 Mon Sep 17 00:00:00 2001 +From: zhongyunde +Date: Fri, 4 Nov 2022 23:19:44 +0800 +Subject: [PATCH 23/35] [PHIOPT] Disable the match A?CST1:0 when the CST1 is + negitive value + +Fix the regression of gcc.target/aarch64/sve/vcond_3.c + +gcc: + * match.pd (A?CST1:CST2): Disable the simplifcations A? (-CST1):0 +--- + gcc/match.pd | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/gcc/match.pd b/gcc/match.pd +index 79a0228d2..fc1a34dd3 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3347,7 +3347,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (if (integer_onep (@1)) + (convert (convert:boolean_type_node @0))) + /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */ +- (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1)) ++ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (TREE_TYPE (@1)) ++ && integer_pow2p (@1)) + (with { + tree shift = build_int_cst (integer_type_node, tree_log2 (@1)); + } +-- +2.27.0.windows.1 + diff --git a/0072-Struct-Reorg-Merge-struct_layout-pass-into-struct_re.patch b/0072-Struct-Reorg-Merge-struct_layout-pass-into-struct_re.patch new file mode 100644 index 0000000000000000000000000000000000000000..a7c7f74b1791ca77e4bd581cb620ed9c37e79889 --- /dev/null +++ b/0072-Struct-Reorg-Merge-struct_layout-pass-into-struct_re.patch @@ -0,0 +1,1770 @@ +From 6a7b9e30955e0da5258d8c4ab8de611c8a5653a5 Mon Sep 17 00:00:00 2001 +From: liyancheng <412998149@qq.com> +Date: Thu, 3 Nov 2022 20:11:18 +0800 +Subject: [PATCH 24/35] [Struct Reorg] Merge struct_layout pass into + struct_reorg + +1. Merge struct_layout pass into struct_reorg +2. Merge srmode and into struct_layout_opt_level +3. Adapt to all relevant deja tests +--- + gcc/common.opt | 2 +- + gcc/ipa-struct-reorg/ipa-struct-reorg.c | 316 ++++++++---------- + gcc/opts.c | 15 +- + gcc/passes.def | 1 - + gcc/symbol-summary.h | 4 +- + gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c | 2 +- + .../gcc.dg/struct/dfe_ele_minus_verify.c | 2 +- + .../gcc.dg/struct/dfe_extr_board_init.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c | 2 +- + .../gcc.dg/struct/dfe_extr_mv_udc_core.c | 2 +- + .../gcc.dg/struct/dfe_extr_tcp_usrreq.c | 2 +- + .../gcc.dg/struct/dfe_extr_ui_main.c | 2 +- + .../gcc.dg/struct/dfe_mem_ref_offset.c | 2 +- + .../struct/dfe_mul_layer_ptr_record_bug.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c | 2 +- + .../gcc.dg/struct/dfe_ptr_negate_expr.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c | 2 +- + .../struct/rf_DTE_struct_instance_field.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c | 2 +- + .../gcc.dg/struct/rf_check_ptr_layers_bug.c | 2 +- + .../gcc.dg/struct/rf_create_fields_bug.c | 2 +- + .../gcc.dg/struct/rf_create_new_func_bug.c | 2 +- + .../gcc.dg/struct/rf_ele_minus_verify.c | 2 +- + .../gcc.dg/struct/rf_escape_by_base.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c | 2 +- + .../gcc.dg/struct/rf_mem_ref_offset.c | 2 +- + .../struct/rf_mul_layer_ptr_record_bug.c | 2 +- + .../gcc.dg/struct/rf_pass_conflict.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c | 2 +- + .../gcc.dg/struct/rf_ptr_negate_expr.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c | 2 +- + .../gcc.dg/struct/rf_rescusive_type.c | 2 +- + .../struct/rf_rewrite_assign_more_cmp.c | 2 +- + .../gcc.dg/struct/rf_rewrite_cond_bug.c | 2 +- + .../gcc.dg/struct/rf_rewrite_cond_more_cmp.c | 2 +- + .../gcc.dg/struct/rf_rewrite_phi_bug.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_visible_func.c | 2 +- + .../gcc.dg/struct/rf_void_ptr_param_func.c | 2 +- + gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c | 2 +- + gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 4 +- + gcc/testsuite/gcc.dg/struct/struct_reorg-1.c | 2 +- + gcc/testsuite/gcc.dg/struct/struct_reorg-2.c | 2 +- + gcc/testsuite/gcc.dg/struct/struct_reorg-3.c | 2 +- + gcc/testsuite/gcc.dg/struct/struct_reorg-4.c | 2 +- + .../gcc.dg/struct/w_prof_global_array.c | 2 +- + .../gcc.dg/struct/w_prof_global_var.c | 2 +- + .../gcc.dg/struct/w_prof_local_array.c | 2 +- + .../gcc.dg/struct/w_prof_local_var.c | 2 +- + .../gcc.dg/struct/w_prof_single_str_global.c | 2 +- + gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c | 2 +- + .../gcc.dg/struct/w_ratio_cold_str.c | 2 +- + .../gcc.dg/struct/wo_prof_array_field.c | 2 +- + .../struct/wo_prof_array_through_pointer.c | 2 +- + .../gcc.dg/struct/wo_prof_double_malloc.c | 2 +- + .../gcc.dg/struct/wo_prof_empty_str.c | 2 +- + .../struct/wo_prof_escape_arg_to_local.c | 2 +- + .../struct/wo_prof_escape_substr_array.c | 2 +- + .../gcc.dg/struct/wo_prof_global_array.c | 2 +- + .../gcc.dg/struct/wo_prof_global_var.c | 2 +- + .../gcc.dg/struct/wo_prof_local_array.c | 2 +- + .../gcc.dg/struct/wo_prof_local_var.c | 2 +- + .../gcc.dg/struct/wo_prof_malloc_size_var-1.c | 2 +- + .../gcc.dg/struct/wo_prof_malloc_size_var.c | 2 +- + .../struct/wo_prof_mult_field_peeling.c | 2 +- + .../gcc.dg/struct/wo_prof_single_str_global.c | 2 +- + .../gcc.dg/struct/wo_prof_single_str_local.c | 2 +- + .../gcc.dg/struct/wo_prof_two_strs.c | 2 +- + gcc/timevar.def | 1 - + gcc/tree-pass.h | 1 - + gcc/tree.c | 4 +- + 76 files changed, 222 insertions(+), 260 deletions(-) + +diff --git a/gcc/common.opt b/gcc/common.opt +index be7bfee60..ad147f7a9 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1881,7 +1881,7 @@ Common Ignore + Does nothing. Preserved for backward compatibility. + + fipa-reorder-fields +-Common Report Var(flag_ipa_struct_layout) Init(0) Optimization ++Common Report Var(flag_ipa_reorder_fields) Init(0) Optimization + Perform structure fields reorder optimizations. + + fipa-struct-reorg +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c +index 54c20ca3f..08cb51fee 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c +@@ -232,12 +232,6 @@ is_from_void_ptr_parm (tree ssa_name) + && VOID_POINTER_P (TREE_TYPE (ssa_name))); + } + +-enum srmode +-{ +- NORMAL = 0, +- COMPLETE_STRUCT_RELAYOUT, +- STRUCT_LAYOUT_OPTIMIZE +-}; + + /* Enum the struct layout optimize level, + which should be the same as the option -fstruct-reorg=. */ +@@ -245,16 +239,17 @@ enum srmode + enum struct_layout_opt_level + { + NONE = 0, +- STRUCT_REORG, +- STRUCT_REORDER_FIELDS, +- DEAD_FIELD_ELIMINATION ++ STRUCT_SPLIT = 1 << 0, ++ COMPLETE_STRUCT_RELAYOUT = 1 << 1, ++ STRUCT_REORDER_FIELDS = 1 << 2, ++ DEAD_FIELD_ELIMINATION = 1 << 3 + }; + + static bool is_result_of_mult (tree arg, tree *num, tree struct_size); + bool isptrptr (tree type); + void get_base (tree &base, tree expr); + +-srmode current_mode; ++static unsigned int current_layout_opt_level; + + hash_map replace_type_map; + +@@ -607,7 +602,7 @@ void + srtype::simple_dump (FILE *f) + { + print_generic_expr (f, type); +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + fprintf (f, "(%d)", TYPE_UID (type)); + } +@@ -656,7 +651,7 @@ srfield::create_new_fields (tree newtype[max_split], + tree newfields[max_split], + tree newlast[max_split]) + { +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + create_new_optimized_fields (newtype, newfields, newlast); + return; +@@ -857,7 +852,7 @@ srtype::create_new_type (void) + we are not splitting the struct into two clusters, + then just return false and don't change the type. */ + if (!createnewtype && maxclusters == 0 +- && current_mode != STRUCT_LAYOUT_OPTIMIZE) ++ && current_layout_opt_level < STRUCT_REORDER_FIELDS) + { + newtype[0] = type; + return false; +@@ -885,8 +880,7 @@ srtype::create_new_type (void) + sprintf(id, "%d", i); + if (tname) + { +- name = concat (tname, current_mode == STRUCT_LAYOUT_OPTIMIZE +- ? ".slo." : ".reorg.", id, NULL); ++ name = concat (tname, ".reorg.", id, NULL); + TYPE_NAME (newtype[i]) = build_decl (UNKNOWN_LOCATION, TYPE_DECL, + get_identifier (name), newtype[i]); + free (name); +@@ -896,8 +890,7 @@ srtype::create_new_type (void) + for (unsigned i = 0; i < fields.length (); i++) + { + srfield *f = fields[i]; +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE +- && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ if (current_layout_opt_level & DEAD_FIELD_ELIMINATION + && !(f->field_access & READ_FIELD)) + continue; + f->create_new_fields (newtype, newfields, newlast); +@@ -921,13 +914,12 @@ srtype::create_new_type (void) + + warn_padded = save_warn_padded; + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && replace_type_map.get (this->newtype[0]) == NULL) + replace_type_map.put (this->newtype[0], this->type); + if (dump_file) + { +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE +- && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ if (current_layout_opt_level & DEAD_FIELD_ELIMINATION + && has_dead_field ()) + fprintf (dump_file, "Dead field elimination.\n"); + } +@@ -1046,8 +1038,7 @@ srfunction::create_new_decls (void) + sprintf(id, "%d", j); + if (tname) + { +- name = concat (tname, current_mode == STRUCT_LAYOUT_OPTIMIZE +- ? ".slo." : ".reorg.", id, NULL); ++ name = concat (tname, ".reorg.", id, NULL); + new_name = get_identifier (name); + free (name); + } +@@ -1266,7 +1257,7 @@ public: + { + } + +- unsigned execute (enum srmode mode); ++ unsigned execute (unsigned int opt); + void mark_type_as_escape (tree type, escape_type, gimple *stmt = NULL); + + // fields +@@ -2796,7 +2787,7 @@ escape_type escape_type_volatile_array_or_ptrptr (tree type) + return escape_volatile; + if (isarraytype (type)) + return escape_array; +- if (isptrptr (type) && (current_mode != STRUCT_LAYOUT_OPTIMIZE)) ++ if (isptrptr (type) && (current_layout_opt_level < STRUCT_REORDER_FIELDS)) + return escape_ptr_ptr; + return does_not_escape; + } +@@ -2817,14 +2808,13 @@ ipa_struct_reorg::record_field_type (tree field, srtype *base_srtype) + field_srfield->type = field_srtype; + field_srtype->add_field_site (field_srfield); + } +- if (field_srtype == base_srtype && current_mode != COMPLETE_STRUCT_RELAYOUT +- && current_mode != STRUCT_LAYOUT_OPTIMIZE) ++ if (field_srtype == base_srtype && current_layout_opt_level == STRUCT_SPLIT) + { + base_srtype->mark_escape (escape_rescusive_type, NULL); + } + /* Types of non-pointer field are difficult to track the correctness + of the rewrite when it used by the escaped type. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE (field_type) == RECORD_TYPE) + { + field_srtype->mark_escape (escape_instance_field, NULL); +@@ -2859,7 +2849,7 @@ ipa_struct_reorg::record_struct_field_types (tree base_type, + } + /* Types of non-pointer field are difficult to track the correctness + of the rewrite when it used by the escaped type. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE (field_type) == RECORD_TYPE) + { + base_srtype->mark_escape (escape_instance_field, NULL); +@@ -3043,8 +3033,7 @@ ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) + + /* Separate instance is hard to trace in complete struct + relayout optimization. */ +- if ((current_mode == COMPLETE_STRUCT_RELAYOUT +- || current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT + && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE) + { + e = escape_separate_instance; +@@ -3149,7 +3138,7 @@ ipa_struct_reorg::find_vars (gimple *stmt) + /* Add a safe func mechanism. */ + bool l_find = true; + bool r_find = true; +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + l_find = !(current_function->is_safe_func + && TREE_CODE (lhs) == SSA_NAME +@@ -3195,7 +3184,7 @@ ipa_struct_reorg::find_vars (gimple *stmt) + } + } + } +- else if ((current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ else if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS) + && (gimple_assign_rhs_code (stmt) == LE_EXPR + || gimple_assign_rhs_code (stmt) == LT_EXPR + || gimple_assign_rhs_code (stmt) == GE_EXPR +@@ -3206,7 +3195,7 @@ ipa_struct_reorg::find_vars (gimple *stmt) + find_var (gimple_assign_rhs2 (stmt), stmt); + } + /* find void ssa_name from stmt such as: _2 = _1 - old_arcs_1. */ +- else if ((current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ else if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS) + && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR + && types_compatible_p ( + TYPE_MAIN_VARIANT (TREE_TYPE (gimple_assign_rhs1 (stmt))), +@@ -3418,8 +3407,7 @@ ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt) + default: + break; + } +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE +- && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION) ++ if (current_layout_opt_level & DEAD_FIELD_ELIMINATION) + { + /* Look for loads and stores. */ + walk_stmt_load_store_ops (stmt, this, find_field_p_load, +@@ -3590,11 +3578,12 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + size_def_stmt = SSA_NAME_DEF_STMT (arg); + } + else if (rhs_code == NEGATE_EXPR +- && current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ && current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + return trace_calculate_negate (size_def_stmt, num, struct_size); + } +- else if (rhs_code == NOP_EXPR && current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ else if (rhs_code == NOP_EXPR ++ && current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + return trace_calculate_diff (size_def_stmt, num); + } +@@ -3614,17 +3603,17 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + bool + ipa_struct_reorg::handled_allocation_stmt (gimple *stmt) + { +- if ((current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS) + && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))) + { + return true; + } +- if ((current_mode == COMPLETE_STRUCT_RELAYOUT) ++ if ((current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT) + && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) + return true; +- if ((current_mode == NORMAL) ++ if ((current_layout_opt_level == STRUCT_SPLIT) + && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) +@@ -3750,7 +3739,7 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple + /* x_1 = y.x_nodes; void *x; + Directly mark the structure pointer type assigned + to the void* variable as escape. */ +- else if (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ else if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE (side) == SSA_NAME + && VOID_POINTER_P (TREE_TYPE (side)) + && SSA_NAME_VAR (side) +@@ -4017,7 +4006,7 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + and doesn't mark escape follow.). */ + /* _1 = MEM[(struct arc_t * *)a_1]. + then base a_1: ssa_name - pointer_type - integer_type. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + bool is_int_ptr = POINTER_TYPE_P (TREE_TYPE (base)) + && (TREE_CODE (inner_type (TREE_TYPE (base))) +@@ -4081,7 +4070,7 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + /* Escape the operation of fetching field with pointer offset such as: + *(&(t->right)) = malloc (0); -> MEM[(struct node * *)_1 + 8B] = malloc (0); + */ +- if (current_mode != NORMAL ++ if (current_layout_opt_level > STRUCT_SPLIT + && (TREE_CODE (expr) == MEM_REF) && (offset != 0)) + { + gcc_assert (can_escape); +@@ -4233,7 +4222,7 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt) + /* callee_func (_1, _2); + Check the callee func, instead of current func. */ + if (!(free_or_realloc +- || (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ || (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && safe_functions.contains ( + node->get_edge (stmt)->callee))) + && VOID_POINTER_P (argtypet)) +@@ -4265,14 +4254,7 @@ ipa_struct_reorg::record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt) + realpart, imagpart, address, escape_from_base)) + return; + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) +- { +- if (!opt_for_fn (current_function_decl, flag_ipa_struct_layout)) +- { +- type->mark_escape (escape_non_optimize, stmt); +- } +- } +- else ++ if (current_layout_opt_level > NONE) + { + if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg)) + { +@@ -4379,7 +4361,7 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl, + void + ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) + { +- if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ if (current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT + && handled_allocation_stmt (stmt)) + { + tree arg0 = gimple_call_arg (stmt, 0); +@@ -4490,7 +4472,7 @@ ipa_struct_reorg::check_definition_call (srdecl *decl, vec &worklist) + check_type_and_push (gimple_call_arg (stmt, 0), decl, worklist, stmt); + } + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + if (!handled_allocation_stmt (stmt)) + { +@@ -4544,7 +4526,8 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist) + } + return; + } +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE && SSA_NAME_VAR (ssa_name) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS ++ && SSA_NAME_VAR (ssa_name) + && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (ssa_name)))) + { + type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name)); +@@ -4631,7 +4614,7 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec< + { + /* In Complete Struct Relayout opti, if lhs type is the same + as rhs type, we could return without any harm. */ +- if (current_mode == COMPLETE_STRUCT_RELAYOUT) ++ if (current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT) + { + return; + } +@@ -4645,7 +4628,7 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec< + if (!get_type_field (other, base, indirect, type1, field, + realpart, imagpart, address, escape_from_base)) + { +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + /* release INTEGER_TYPE cast to struct pointer. */ + bool cast_from_int_ptr = current_function->is_safe_func && base +@@ -4703,7 +4686,8 @@ get_base (tree &base, tree expr) + void + ipa_struct_reorg::check_ptr_layers (tree a_expr, tree b_expr, gimple* stmt) + { +- if (current_mode != STRUCT_LAYOUT_OPTIMIZE || current_function->is_safe_func ++ if (current_layout_opt_level < STRUCT_REORDER_FIELDS ++ || current_function->is_safe_func + || !(POINTER_TYPE_P (TREE_TYPE (a_expr))) + || !(POINTER_TYPE_P (TREE_TYPE (b_expr))) + || !handled_type (TREE_TYPE (a_expr)) +@@ -4779,12 +4763,9 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, vec &worklist) + tree rhs2 = gimple_cond_rhs (stmt); + tree orhs = rhs1; + enum tree_code code = gimple_cond_code (stmt); +- if ((current_mode == NORMAL && (code != EQ_EXPR && code != NE_EXPR)) +- || (current_mode == COMPLETE_STRUCT_RELAYOUT +- && (code != EQ_EXPR && code != NE_EXPR +- && code != LT_EXPR && code != LE_EXPR +- && code != GT_EXPR && code != GE_EXPR)) +- || (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ if ((current_layout_opt_level == STRUCT_SPLIT ++ && (code != EQ_EXPR && code != NE_EXPR)) ++ || (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT + && (code != EQ_EXPR && code != NE_EXPR + && code != LT_EXPR && code != LE_EXPR + && code != GT_EXPR && code != GE_EXPR))) +@@ -4818,15 +4799,12 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, vec &worklist) + tree rhs2 = gimple_assign_rhs2 (stmt); + tree orhs = rhs1; + enum tree_code code = gimple_assign_rhs_code (stmt); +- if ((current_mode == NORMAL && (code != EQ_EXPR && code != NE_EXPR)) +- || (current_mode == COMPLETE_STRUCT_RELAYOUT ++ if ((current_layout_opt_level == STRUCT_SPLIT ++ && (code != EQ_EXPR && code != NE_EXPR)) ++ || (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT + && (code != EQ_EXPR && code != NE_EXPR + && code != LT_EXPR && code != LE_EXPR +- && code != GT_EXPR && code != GE_EXPR)) +- || (current_mode == STRUCT_LAYOUT_OPTIMIZE +- && (code != EQ_EXPR && code != NE_EXPR +- && code != LT_EXPR && code != LE_EXPR +- && code != GT_EXPR && code != GE_EXPR))) ++ && code != GT_EXPR && code != GE_EXPR))) + { + mark_expr_escape (rhs1, escape_non_eq, stmt); + mark_expr_escape (rhs2, escape_non_eq, stmt); +@@ -4945,11 +4923,11 @@ ipa_struct_reorg::record_function (cgraph_node *node) + escapes = escape_marked_as_used; + else if (!node->local) + { +- if (current_mode != STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level < STRUCT_REORDER_FIELDS) + { + escapes = escape_visible_function; + } +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE && node->externally_visible) ++ else if (node->externally_visible) + { + escapes = escape_visible_function; + } +@@ -4959,14 +4937,7 @@ ipa_struct_reorg::record_function (cgraph_node *node) + else if (!tree_versionable_function_p (node->decl)) + escapes = escape_noclonable_function; + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) +- { +- if (!opt_for_fn (node->decl, flag_ipa_struct_layout)) +- { +- escapes = escape_non_optimize; +- } +- } +- else if (current_mode == NORMAL || current_mode == COMPLETE_STRUCT_RELAYOUT) ++ if (current_layout_opt_level > NONE) + { + if (!opt_for_fn (node->decl, flag_ipa_struct_reorg)) + { +@@ -4978,10 +4949,10 @@ ipa_struct_reorg::record_function (cgraph_node *node) + gimple_stmt_iterator si; + + /* Add a safe func mechanism. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + current_function->is_safe_func = safe_functions.contains (node); +- if (dump_file) ++ if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nfunction %s/%u: is_safe_func = %d\n", + node->name (), node->order, +@@ -5194,7 +5165,7 @@ ipa_struct_reorg::record_accesses (void) + } + + /* Add a safe func mechanism. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + record_safe_func_with_void_ptr_parm (); + } +@@ -5392,8 +5363,7 @@ ipa_struct_reorg::propagate_escape_via_empty_with_no_original (void) + void + ipa_struct_reorg::prune_escaped_types (void) + { +- if (current_mode != COMPLETE_STRUCT_RELAYOUT +- && current_mode != STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level == STRUCT_SPLIT) + { + /* Detect recusive types and mark them as escaping. */ + detect_cycles (); +@@ -5401,7 +5371,7 @@ ipa_struct_reorg::prune_escaped_types (void) + mark them as escaping. */ + propagate_escape (); + } +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + propagate_escape_via_original (); + propagate_escape_via_empty_with_no_original (); +@@ -5461,7 +5431,7 @@ ipa_struct_reorg::prune_escaped_types (void) + if (function->args.is_empty () + && function->decls.is_empty () + && function->globals.is_empty () +- && current_mode != STRUCT_LAYOUT_OPTIMIZE) ++ && current_layout_opt_level < STRUCT_REORDER_FIELDS) + { + delete function; + functions.ordered_remove (i); +@@ -5489,7 +5459,7 @@ ipa_struct_reorg::prune_escaped_types (void) + /* The escape type is not deleted in STRUCT_LAYOUT_OPTIMIZE, + Then the type that contains the escaped type fields + can find complete information. */ +- if (current_mode != STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level < STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < types.length ();) + { +@@ -5539,7 +5509,7 @@ ipa_struct_reorg::create_new_types (void) + for (unsigned i = 0; i < types.length (); i++) + newtypes += types[i]->create_new_type (); + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < types.length (); i++) + { +@@ -5561,14 +5531,31 @@ ipa_struct_reorg::create_new_types (void) + } + } + +- if (dump_file) ++ if (current_layout_opt_level == STRUCT_SPLIT) + { +- if (newtypes) +- fprintf (dump_file, "\nNumber of structures to transform is %d\n", newtypes); +- else +- fprintf (dump_file, "\nNo structures to transform.\n"); ++ if (dump_file) ++ { ++ if (newtypes) ++ fprintf (dump_file, "\nNumber of structures to transform in" ++ " struct split is %d\n", newtypes); ++ else ++ fprintf (dump_file, "\nNo structures to transform in" ++ " struct split.\n"); ++ } ++ } ++ else ++ { ++ if (dump_file) ++ { ++ if (newtypes) ++ fprintf (dump_file, "\nNumber of structures to transform" ++ " is %d\n", newtypes); ++ else ++ fprintf (dump_file, "\nNo structures to transform.\n"); ++ } + } + ++ + return newtypes != 0; + } + +@@ -5663,8 +5650,7 @@ ipa_struct_reorg::create_new_args (cgraph_node *new_node) + char *name = NULL; + if (tname) + { +- name = concat (tname, current_mode == STRUCT_LAYOUT_OPTIMIZE +- ? ".slo.0" : ".reorg.0", NULL); ++ name = concat (tname, ".reorg.0", NULL); + new_name = get_identifier (name); + free (name); + } +@@ -5751,9 +5737,7 @@ ipa_struct_reorg::create_new_functions (void) + } + statistics_counter_event (NULL, "Create new function", 1); + new_node = node->create_version_clone_with_body ( +- vNULL, NULL, NULL, NULL, NULL, +- current_mode == STRUCT_LAYOUT_OPTIMIZE +- ? "slo" : "struct_reorg"); ++ vNULL, NULL, NULL, NULL, NULL, "struct_reorg"); + new_node->can_change_signature = node->can_change_signature; + new_node->make_local (); + f->newnode = new_node; +@@ -5871,7 +5855,7 @@ ipa_struct_reorg::rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_ + newbase1 = build_fold_addr_expr (newbase1); + if (indirect) + { +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + /* Supports the MEM_REF offset. + _1 = MEM[(struct arc *)ap_1 + 72B].flow; +@@ -5927,8 +5911,7 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + { + bool remove = false; + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE +- && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ if (current_layout_opt_level & DEAD_FIELD_ELIMINATION + && remove_dead_field_stmt (gimple_assign_lhs (stmt))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) +@@ -5964,10 +5947,10 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + return remove; + } + +- if ((current_mode != STRUCT_LAYOUT_OPTIMIZE ++ if ((current_layout_opt_level < STRUCT_REORDER_FIELDS + && (gimple_assign_rhs_code (stmt) == EQ_EXPR + || gimple_assign_rhs_code (stmt) == NE_EXPR)) +- || (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ || (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) + == tcc_comparison))) + { +@@ -5977,7 +5960,7 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + tree newrhs2[max_split]; + tree_code rhs_code = gimple_assign_rhs_code (stmt); + tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && rhs_code != EQ_EXPR && rhs_code != NE_EXPR) + { + code = rhs_code; +@@ -6024,8 +6007,9 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + _6 = _4 + _5; + _5 = (long unsigned int) _3; + _3 = _1 - old_2. */ +- if (current_mode != STRUCT_LAYOUT_OPTIMIZE +- || (current_mode == STRUCT_LAYOUT_OPTIMIZE && (num != NULL))) ++ if (current_layout_opt_level < STRUCT_REORDER_FIELDS ++ || (current_layout_opt_level >= STRUCT_REORDER_FIELDS ++ && (num != NULL))) + { + num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num); + } +@@ -6053,7 +6037,7 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + } + + /* Support POINTER_DIFF_EXPR rewriting. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR) + { + tree rhs1 = gimple_assign_rhs1 (stmt); +@@ -6240,7 +6224,8 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + srfunction *f = find_function (node); + + /* Add a safe func mechanism. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE && f && f->is_safe_func) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS ++ && f && f->is_safe_func) + { + tree expr = gimple_call_arg (stmt, 0); + tree newexpr[max_split]; +@@ -6367,9 +6352,9 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) + tree_code rhs_code = gimple_cond_code (stmt); + + /* Handle only equals or not equals conditionals. */ +- if ((current_mode != STRUCT_LAYOUT_OPTIMIZE ++ if ((current_layout_opt_level < STRUCT_REORDER_FIELDS + && (rhs_code != EQ_EXPR && rhs_code != NE_EXPR)) +- || (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ || (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE_CLASS (rhs_code) != tcc_comparison)) + return false; + tree lhs = gimple_cond_lhs (stmt); +@@ -6429,7 +6414,7 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) + bool + ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *) + { +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + /* Delete debug gimple now. */ + return true; +@@ -6593,7 +6578,7 @@ ipa_struct_reorg::rewrite_functions (void) + then don't rewrite any accesses. */ + if (!create_new_types ()) + { +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < functions.length (); i++) + { +@@ -6612,7 +6597,7 @@ ipa_struct_reorg::rewrite_functions (void) + return 0; + } + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE && dump_file) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS && dump_file) + { + fprintf (dump_file, "=========== all created newtypes: ===========\n\n"); + dump_newtypes (dump_file); +@@ -6622,13 +6607,13 @@ ipa_struct_reorg::rewrite_functions (void) + { + retval = TODO_remove_functions; + create_new_functions (); +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + prune_escaped_types (); + } + } + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < functions.length (); i++) + { +@@ -6794,13 +6779,13 @@ ipa_struct_reorg::execute_struct_relayout (void) + } + + unsigned int +-ipa_struct_reorg::execute (enum srmode mode) ++ipa_struct_reorg::execute (unsigned int opt) + { + unsigned int ret = 0; + +- if (mode == NORMAL || mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (opt != COMPLETE_STRUCT_RELAYOUT) + { +- current_mode = mode; ++ current_layout_opt_level = opt; + /* If there is a top-level inline-asm, + the pass immediately returns. */ + if (symtab->first_asm_symbol ()) +@@ -6809,20 +6794,20 @@ ipa_struct_reorg::execute (enum srmode mode) + } + record_accesses (); + prune_escaped_types (); +- if (current_mode == NORMAL) ++ if (opt == STRUCT_SPLIT) + { + analyze_types (); + } + + ret = rewrite_functions (); + } +- else if (mode == COMPLETE_STRUCT_RELAYOUT) ++ else // do COMPLETE_STRUCT_RELAYOUT + { + if (dump_file) + { + fprintf (dump_file, "\n\nTry Complete Struct Relayout:\n"); + } +- current_mode = COMPLETE_STRUCT_RELAYOUT; ++ current_layout_opt_level = COMPLETE_STRUCT_RELAYOUT; + if (symtab->first_asm_symbol ()) + { + return 0; +@@ -6861,67 +6846,48 @@ public: + virtual unsigned int execute (function *) + { + unsigned int ret = 0; +- ret = ipa_struct_reorg ().execute (NORMAL); +- if (!ret) ++ unsigned int ret_reorg = 0; ++ unsigned int level = 0; ++ switch (struct_layout_optimize_level) + { +- ret = ipa_struct_reorg ().execute (COMPLETE_STRUCT_RELAYOUT); ++ case 3: level |= DEAD_FIELD_ELIMINATION; ++ // FALLTHRU ++ case 2: level |= STRUCT_REORDER_FIELDS; ++ // FALLTHRU ++ case 1: ++ level |= COMPLETE_STRUCT_RELAYOUT; ++ level |= STRUCT_SPLIT; ++ break; ++ case 0: break; ++ default: gcc_unreachable (); + } +- return ret; +- } + +-}; // class pass_ipa_struct_reorg +- +-bool +-pass_ipa_struct_reorg::gate (function *) +-{ +- return (optimize >= 3 +- && flag_ipa_struct_reorg +- /* Don't bother doing anything if the program has errors. */ +- && !seen_error () +- && flag_lto_partition == LTO_PARTITION_ONE +- /* Only enable struct optimizations in C since other +- languages' grammar forbid. */ +- && lang_c_p () +- /* Only enable struct optimizations in lto or whole_program. */ +- && (in_lto_p || flag_whole_program)); +-} ++ /* Preserved for backward compatibility, reorder fields needs run before ++ struct split and complete struct relayout. */ ++ if (flag_ipa_reorder_fields && level < STRUCT_REORDER_FIELDS) ++ ret = ipa_struct_reorg ().execute (STRUCT_REORDER_FIELDS); + +-const pass_data pass_data_ipa_struct_layout = +-{ +- SIMPLE_IPA_PASS, // type +- "struct_layout", // name +- OPTGROUP_NONE, // optinfo_flags +- TV_IPA_STRUCT_LAYOUT, // tv_id +- 0, // properties_required +- 0, // properties_provided +- 0, // properties_destroyed +- 0, // todo_flags_start +- 0, // todo_flags_finish +-}; ++ if (level >= STRUCT_REORDER_FIELDS) ++ ret = ipa_struct_reorg ().execute (level); + +-class pass_ipa_struct_layout : public simple_ipa_opt_pass +-{ +-public: +- pass_ipa_struct_layout (gcc::context *ctxt) +- : simple_ipa_opt_pass (pass_data_ipa_struct_layout, ctxt) +- {} ++ if (level >= COMPLETE_STRUCT_RELAYOUT) ++ { ++ /* Preserved for backward compatibility. */ ++ ret_reorg = ipa_struct_reorg ().execute (STRUCT_SPLIT); ++ if (!ret_reorg) ++ ret_reorg = ipa_struct_reorg ().execute (COMPLETE_STRUCT_RELAYOUT); ++ } + +- /* opt_pass methods: */ +- virtual bool gate (function *); +- virtual unsigned int execute (function *) +- { +- unsigned int ret = 0; +- ret = ipa_struct_reorg ().execute (STRUCT_LAYOUT_OPTIMIZE); +- return ret; ++ return ret | ret_reorg; + } + +-}; // class pass_ipa_struct_layout ++}; // class pass_ipa_struct_reorg + + bool +-pass_ipa_struct_layout::gate (function *) ++pass_ipa_struct_reorg::gate (function *) + { + return (optimize >= 3 +- && flag_ipa_struct_layout ++ && flag_ipa_struct_reorg + /* Don't bother doing anything if the program has errors. */ + && !seen_error () + && flag_lto_partition == LTO_PARTITION_ONE +@@ -6939,9 +6905,3 @@ make_pass_ipa_struct_reorg (gcc::context *ctxt) + { + return new pass_ipa_struct_reorg (ctxt); + } +- +-simple_ipa_opt_pass * +-make_pass_ipa_struct_layout (gcc::context *ctxt) +-{ +- return new pass_ipa_struct_layout (ctxt); +-} +diff --git a/gcc/opts.c b/gcc/opts.c +index c3877c24e..f12b13599 100644 +--- a/gcc/opts.c ++++ b/gcc/opts.c +@@ -2696,15 +2696,20 @@ common_handle_option (struct gcc_options *opts, + break; + + case OPT_fipa_struct_reorg_: +- opts->x_struct_layout_optimize_level = value; +- if (value > 1) +- { +- SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_struct_layout, value); +- } + /* No break here - do -fipa-struct-reorg processing. */ + /* FALLTHRU. */ + case OPT_fipa_struct_reorg: + opts->x_flag_ipa_struct_reorg = value; ++ if (value && !opts->x_struct_layout_optimize_level) ++ { ++ /* Using the -fipa-struct-reorg option is equivalent to using ++ -fipa-struct-reorg=1. */ ++ opts->x_struct_layout_optimize_level = 1; ++ } ++ break; ++ ++ case OPT_fipa_reorder_fields: ++ SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_struct_reorg, value); + break; + + case OPT_fprofile_generate_: +diff --git a/gcc/passes.def b/gcc/passes.def +index 94554cc1d..f3b6048d8 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -175,7 +175,6 @@ along with GCC; see the file COPYING3. If not see + INSERT_PASSES_AFTER (all_late_ipa_passes) + NEXT_PASS (pass_materialize_all_clones); + NEXT_PASS (pass_ipa_pta); +- NEXT_PASS (pass_ipa_struct_layout); + /* FIXME: this should a normal IP pass */ + NEXT_PASS (pass_ipa_struct_reorg); + NEXT_PASS (pass_omp_simd_clone); +diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h +index f62222a96..1e7341b24 100644 +--- a/gcc/symbol-summary.h ++++ b/gcc/symbol-summary.h +@@ -61,7 +61,7 @@ protected: + { + /* In structure optimizatons, we call new to ensure that + the allocated memory is initialized to 0. */ +- if (flag_ipa_struct_layout || flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg) + return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T () + : new T (); + /* Call gcc_internal_because we do not want to call finalizer for +@@ -77,7 +77,7 @@ protected: + ggc_delete (item); + else + { +- if (flag_ipa_struct_layout || flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg) + delete item; + else + m_allocator.remove (item); +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c +index 4261d2352..afa181e07 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c +@@ -83,4 +83,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c +index 42d38c63a..c87db2aba 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c +@@ -57,4 +57,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c +index 4e52564b6..d217f7bd8 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c +@@ -74,4 +74,4 @@ LBF_DFU_If_Needed (void) + } + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c +index 894e9f460..e56bf467b 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c +@@ -74,4 +74,4 @@ claw_snd_conn_req (struct net_device *dev, __u8 link) + return rc; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c +index 13a226ee8..c86c4bb3c 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c +@@ -53,4 +53,4 @@ dtrace_bcmp (const void *s1, const void *s2, size_t len) + return (0); + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c +index 1fff2cb9d..8484d29d2 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c +@@ -159,4 +159,4 @@ gc_gray_mark (mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) + return children; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c +index 0f577667c..300b2dac4 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c +@@ -123,4 +123,4 @@ hpsa_cmd_dev_match (struct ctlr_info *h, struct CommandList *c, + return match; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c +index 9801f87f1..9397b98ea 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c +@@ -79,4 +79,4 @@ ep0_reset (struct mv_udc *udc) + } + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c +index 5570c762e..0ae75e13e 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c +@@ -55,4 +55,4 @@ tcp_usr_listen (struct socket *so, struct proc *p) + COMMON_END (PRU_LISTEN); + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c +index 50ab9cc24..512fb37a7 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c +@@ -58,4 +58,4 @@ UI_LoadMods () + } + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c +index 53583fe82..0dea5517c 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c +@@ -55,4 +55,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c +index fd675ec2e..00bd911c1 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c +@@ -27,4 +27,4 @@ main() { + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c +index 600e7908b..0cfa6554e 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c +@@ -68,4 +68,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 3 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 3 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c +index f411364a7..4a7069244 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c +@@ -52,4 +52,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c +index a4e723763..b91efe10f 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c +@@ -52,4 +52,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c +index 882a695b0..1b6a462e2 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c +@@ -72,4 +72,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c +index 20ecee545..346c71264 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c +@@ -91,4 +91,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c +index ad879fc11..8eb16c8d6 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c +@@ -21,4 +21,4 @@ main() + { + g(); + } +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c +index f0c9d8f39..7d7641f01 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c +@@ -79,4 +79,4 @@ main() + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c +index fa5e6c2d0..63fb3f828 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c +@@ -53,4 +53,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c +index 2966869e7..8c431e15f 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c +@@ -57,4 +57,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c +index b74b9e5e9..efc95a4cd 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c +@@ -80,4 +80,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c +index cf85c6109..75fc10575 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c +@@ -69,4 +69,4 @@ main() + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c +index 61fd9f755..9fb06877b 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c +@@ -55,4 +55,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c +index 2c115da02..e8eb0eaa0 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c +@@ -27,4 +27,4 @@ main() { + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c +index c7646d8b7..bd535afd0 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c +@@ -106,4 +106,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c +index 01c000375..11393a197 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c +@@ -84,4 +84,4 @@ main () + return cnt; + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c +index f962163fe..d601fae64 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c +@@ -68,4 +68,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c +index 6558b1797..4d5f25aa1 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c +@@ -52,4 +52,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c +index 6d528ed5b..b3891fde9 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c +@@ -31,4 +31,4 @@ main () + printf (" Tree.\n"); + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c +index e95cf2e5d..4df79e4f0 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c +@@ -52,4 +52,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c +index cb4054522..49d2106d1 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c +@@ -55,4 +55,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c +index 38bddbae5..f71c7894f 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c +@@ -54,4 +54,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c +index 86034f042..721cee2c6 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c +@@ -62,4 +62,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c +index aae7c4bc9..3871d3d99 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c +@@ -69,4 +69,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c +index 8672e7552..5ad206433 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c +@@ -55,4 +55,4 @@ main() + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c +index 2d67434a0..a002f9889 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c +@@ -78,4 +78,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c +index a8cf2b63c..f77a062bd 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c +@@ -89,4 +89,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c +index b6cba3c34..cba6225a5 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c +@@ -51,4 +51,4 @@ main() + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c +index fb135ef0b..e3d219fe1 100644 +--- a/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c ++++ b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c +@@ -22,4 +22,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "No structures to transform in struct split." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +index ac5585813..2eebef768 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp ++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +@@ -64,8 +64,6 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout + "" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program" + + # -fipa-struct-reorg=2 +-gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf*.c]] \ +- "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program" + gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] \ + "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program" + gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] \ +@@ -80,6 +78,8 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/csr_*.c]] \ + "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program" + gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout.c]] \ + "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program" ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf*.c]] \ ++ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program" + + # -fipa-struct-reorg=3 + gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dfe*.c]] \ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c +index 23444fe8b..a73ff8e7e 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c +@@ -27,4 +27,4 @@ int main() + return g (); + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "No structures to transform in struct split." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c +index 44babd35b..d7ab7d21c 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c +@@ -26,4 +26,4 @@ int main() + assert (f(1, 2) == 3); + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 2" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c +index 2d1f95c99..9e5b192eb 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c +@@ -25,4 +25,4 @@ int main() + f (NULL, NULL, 1); + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c +index e5a8a6c84..27b4b56e0 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c +@@ -56,4 +56,4 @@ main (void) + return 0; + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "No structures to transform in struct split." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c +index 733413a94..9e0f84da8 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c +@@ -26,4 +26,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c +index 0ef686e74..c868347e3 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c +@@ -39,4 +39,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c +index 23a53be53..185ff3125 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c +@@ -34,4 +34,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c +index 0cbb172f2..6294fb2a2 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c +@@ -37,4 +37,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c +index f900b1349..3ca4e0e71 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c +@@ -28,4 +28,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c +index 13b4cdc70..ac99b9e62 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c +@@ -61,4 +61,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 2" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c +index dcc545964..afa145a57 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c ++++ b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c +@@ -40,4 +40,4 @@ main () + + /*--------------------------------------------------------------------------*/ + /* Arrays are not handled. */ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c +index 6d6375fc1..7fa6ae275 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c +@@ -23,4 +23,4 @@ int main() + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c +index 9d3213408..b3bde5836 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c +@@ -35,4 +35,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c +index d79992a53..f2bb82b94 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c +@@ -26,4 +26,4 @@ int main() + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c +index ee9b0d765..0685cf8fe 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c +@@ -41,4 +41,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "No structures to transform" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "No structures to transform in struct split" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c +index 9ebb2b4cc..1a0a5a9c6 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c +@@ -40,5 +40,5 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ + +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c +index 60d2466e1..9533538c4 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c +@@ -30,4 +30,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c +index 1c5a3aa15..100a93868 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c +@@ -29,4 +29,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c +index a0d1467fe..669d0b886 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c +@@ -42,4 +42,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c +index 6c24e1c8b..ce6c1544c 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c +@@ -37,4 +37,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c +index 8f2f8143f..eca2ebf32 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c +@@ -40,4 +40,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c +index 98bf01a6d..6f8f94d7d 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c +@@ -44,4 +44,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c +index 66b0f967c..2ca729d1f 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c +@@ -44,4 +44,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c +index d28bcfb02..6000b2919 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c +@@ -39,4 +39,4 @@ main () + + /*--------------------------------------------------------------------------*/ + /* Two more fields structure is not splitted. */ +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "No structures to transform in struct split." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c +index 37a6a43a8..f4a103409 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c +@@ -31,4 +31,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c +index ca9a8efcf..0c97173eb 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c +@@ -31,4 +31,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "No structures to transform" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "No structures to transform in struct split" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c +index cba92e995..bc8eacc77 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c +@@ -64,4 +64,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 2" "struct_reorg" } } */ +diff --git a/gcc/timevar.def b/gcc/timevar.def +index e9866ebf0..2814b14f2 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -80,7 +80,6 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp") + DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") + DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") + DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") +-DEFTIMEVAR (TV_IPA_STRUCT_LAYOUT , "ipa struct layout optimization") + DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") + DEFTIMEVAR (TV_IPA_EXTEND_AUTO_PROFILE, "ipa extend auto profile") + DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 09dd9b289..2b4864b89 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -510,7 +510,6 @@ extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); +-extern simple_ipa_opt_pass *make_pass_ipa_struct_layout (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_extend_auto_profile (gcc::context + *ctxt); +diff --git a/gcc/tree.c b/gcc/tree.c +index 84a440b35..8bbd54e0d 100644 +--- a/gcc/tree.c ++++ b/gcc/tree.c +@@ -5222,7 +5222,7 @@ fld_simplified_type_name (tree type) + /* Simplify type will cause that struct A and struct A within + struct B are different type pointers, so skip it in structure + optimizations. */ +- if ((flag_ipa_struct_layout || flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg + && lang_c_p () + && flag_lto_partition == LTO_PARTITION_ONE + && (in_lto_p || flag_whole_program)) +@@ -5469,7 +5469,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld) + /* Simplify type will cause that struct A and struct A within + struct B are different type pointers, so skip it in structure + optimizations. */ +- if ((flag_ipa_struct_layout || flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg + && lang_c_p () + && flag_lto_partition == LTO_PARTITION_ONE + && (in_lto_p || flag_whole_program)) +-- +2.27.0.windows.1 + diff --git a/0073-PHIOPT-Add-A-B-op-CST-B-match-and-simplify-optimizat.patch b/0073-PHIOPT-Add-A-B-op-CST-B-match-and-simplify-optimizat.patch new file mode 100644 index 0000000000000000000000000000000000000000..805753234229c7a0ea3cb37fba223f4113a43f48 --- /dev/null +++ b/0073-PHIOPT-Add-A-B-op-CST-B-match-and-simplify-optimizat.patch @@ -0,0 +1,89 @@ +From 9af03694082c462bee86c167c78717089a93a188 Mon Sep 17 00:00:00 2001 +From: zhongyunde +Date: Sat, 5 Nov 2022 13:22:33 +0800 +Subject: [PATCH 25/35] [PHIOPT] Add A ? B op CST : B match and simplify + optimizations + + Refer to commit b6bdd7a4, use pattern match to simple + A ? B op CST : B (where CST is power of 2) simplifications. + Fixes the 1st issue of https://gitee.com/openeuler/gcc/issues/I5TSG0?from=project-issue. + + gcc/ + * match.pd (A ? B op CST : B): Add simplifcations for A ? B op POW2 : B + + gcc/testsuite/ + * gcc.dg/pr107190.c: New test. +--- + gcc/match.pd | 21 +++++++++++++++++++++ + gcc/testsuite/gcc.dg/pr107190.c | 27 +++++++++++++++++++++++++++ + 2 files changed, 48 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/pr107190.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index fc1a34dd3..5c5b5f89e 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3383,6 +3383,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++(if (canonicalize_math_p ()) ++/* These patterns are mostly used by PHIOPT to move some operations outside of ++ the if statements. They should be done late because it gives jump threading ++ and few other passes to reduce what is going on. */ ++/* a ? x op C : x -> x op (a << log2(C)) when C is power of 2. */ ++ (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate) ++ (simplify ++ (cond @0 (op:s @1 integer_pow2p@2) @1) ++ /* powerof2cst */ ++ (if (INTEGRAL_TYPE_P (type)) ++ (with { ++ tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); ++ } ++ (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; }))) ++ ) ++ ) ++ ) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c +new file mode 100644 +index 000000000..235b2761a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr107190.c +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */ ++ ++# define BN_BITS4 32 ++# define BN_MASK2 (0xffffffffffffffffL) ++# define BN_MASK2l (0xffffffffL) ++# define BN_MASK2h (0xffffffff00000000L) ++# define BN_MASK2h1 (0xffffffff80000000L) ++# define LBITS(a) ((a)&BN_MASK2l) ++# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) ++# define L2HBITS(a) (((a)< +Date: Wed, 9 Nov 2022 17:04:13 +0800 +Subject: [PATCH 26/35] [FORWPROP] Fold series of instructions into mul + + Merge the low part of series instructions into mul + + gcc/ + * match.pd: Add simplifcations for low part of mul + * common.opt: Add new option fmerge-mull enable with -O2 + * opts.c: default_options_table + + gcc/testsuite/ + * g++.dg/tree-ssa/mull64.C: New test. +--- + gcc/common.opt | 4 +++ + gcc/match.pd | 27 ++++++++++++++++++++ + gcc/opts.c | 1 + + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++ + 4 files changed, 66 insertions(+) + create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C + +diff --git a/gcc/common.opt b/gcc/common.opt +index ad147f7a9..6a7f66624 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2069,6 +2069,10 @@ fmerge-debug-strings + Common Report Var(flag_merge_debug_strings) Init(1) + Attempt to merge identical debug strings across compilation units. + ++fmerge-mull ++Common Report Var(flag_merge_mull) Init(0) Optimization ++Attempt to merge series instructions into mul. ++ + fmessage-length= + Common RejectNegative Joined UInteger + -fmessage-length= Limit diagnostics to characters per line. 0 suppresses line-wrapping. +diff --git a/gcc/match.pd b/gcc/match.pd +index 5c5b5f89e..f6c5befd7 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3404,6 +3404,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP1 to fold some operations into more ++ simple IR. The following scenario should be matched: ++ In0Lo = In0(D) & 4294967295; ++ In0Hi = In0(D) >> 32; ++ In1Lo = In1(D) & 4294967295; ++ In1Hi = In1(D) >> 32; ++ Addc = In0Lo * In1Hi + In0Hi * In1Lo; ++ addc32 = Addc << 32; ++ ResLo = In0Lo * In1Lo + addc32 */ ++(simplify ++ (plus:c (mult @4 @5) ++ (lshift ++ (plus:c ++ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3)) ++ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2))) ++ INTEGER_CST@3 ++ ) ++ ) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) ++ && TYPE_PRECISION (type) == 64) ++ (mult (convert:type @0) (convert:type @1)) ++ ) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +diff --git a/gcc/opts.c b/gcc/opts.c +index f12b13599..751965e46 100644 +--- a/gcc/opts.c ++++ b/gcc/opts.c +@@ -511,6 +511,7 @@ static const struct default_options default_options_table[] = + { OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP }, + { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, ++ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 }, + + /* -O2 and above optimizations, but not -Os or -Og. */ + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 }, +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +new file mode 100644 +index 000000000..2a3b74604 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */ ++ ++# define BN_BITS4 32 ++# define BN_MASK2 (0xffffffffffffffffL) ++# define BN_MASK2l (0xffffffffL) ++# define BN_MASK2h (0xffffffff00000000L) ++# define BN_MASK2h1 (0xffffffff80000000L) ++# define LBITS(a) ((a)&BN_MASK2l) ++# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) ++# define L2HBITS(a) (((a)< +Date: Fri, 11 Nov 2022 11:30:37 +0800 +Subject: [PATCH 27/35] [FORWPROP] Fold series of instructions into umulh + + Merge the high part of series instructions into umulh + + gcc/ + * match.pd: Add simplifcations for high part of umulh + + gcc/testsuite/ + * g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4 +--- + gcc/match.pd | 56 ++++++++++++++++++++++++++ + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 5 ++- + 2 files changed, 59 insertions(+), 2 deletions(-) + +diff --git a/gcc/match.pd b/gcc/match.pd +index f6c5befd7..433682afb 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3404,6 +3404,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP4 to move some operations outside of ++ the if statements. They should be done late because it gives jump threading ++ and few other passes to reduce what is going on. */ ++/* Mul64 is defined as a multiplication algorithm which compute two 64-bit ++ integers to one 128-bit integer. Try to match the high part of mul pattern ++ after the low part of mul pattern is simplified. The following scenario ++ should be matched: ++ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { ++ In0Lo = In0(D) & 4294967295; -- bit_and@4 SSA_NAME@0 @2 ++ In0Hi = In0(D) >> 32; -- rshift@5 SSA_NAME@0 @3 ++ In1Lo = In1(D) & 4294967295; -- bit_and@6 SSA_NAME@1 INTEGER_CST@2 ++ In1Hi = In1(D) >> 32; -- rshift@7 SSA_NAME@1 INTEGER_CST@3 ++ Mull_01 = In0Hi * In1Lo; -- mult@8 @5 @6 ++ Addc = In0Lo * In1Hi + Mull_01; -- plus@9 (mult (@4 @7) @8 ++ AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7)) ++ addc32 = Addc << 32; -- lshift@10 @9 @3 ++ ResLo = In0(D) * In1(D); -- mult @0 @1 ++ ResHi = ((long unsigned int) (addc32 > ResLo)) + ++ (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH; ++ } */ ++(simplify ++ (plus:c ++ (plus:c ++ (convert ++ (gt (lshift@10 @9 @3) ++ (mult:c @0 @1))) ++ (lshift ++ (convert ++ (gt @8 @9)) ++ @3)) ++ (plus:c@11 ++ (rshift ++ (plus:c@9 ++ (mult:c (bit_and@4 SSA_NAME@0 @2) @7) ++ (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) ++ @3) ++ (mult:c (rshift@5 SSA_NAME@0 @3) ++ (rshift@7 SSA_NAME@1 INTEGER_CST@3)) ++ ) ++ ) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) ++ && TYPE_PRECISION (type) == 64) ++ (with { ++ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); ++ tree shift = build_int_cst (integer_type_node, 64); ++ } ++ (convert:type (rshift ++ (mult (convert:i128_type @0) ++ (convert:i128_type @1)) ++ { shift; }))) ++ ) ++) ++#endif ++ + #if GIMPLE + /* These patterns are mostly used by FORWPROP1 to fold some operations into more + simple IR. The following scenario should be matched: +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +index 2a3b74604..f61cf5e6f 100644 +--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */ ++/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */ + + # define BN_BITS4 32 + # define BN_MASK2 (0xffffffffffffffffL) +@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1, + retHi = m11; + } + +-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */ ++/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */ ++/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */ +-- +2.27.0.windows.1 + diff --git a/0076-Struct-Reorg-Fix-speccpu2006-462-double-free-I60YUV.patch b/0076-Struct-Reorg-Fix-speccpu2006-462-double-free-I60YUV.patch new file mode 100644 index 0000000000000000000000000000000000000000..88613ba97716c1adf4051a67cf5494f4ff0b24e9 --- /dev/null +++ b/0076-Struct-Reorg-Fix-speccpu2006-462-double-free-I60YUV.patch @@ -0,0 +1,38 @@ +From b669b4512e8425f4d752ef76bf61097cf40d9b35 Mon Sep 17 00:00:00 2001 +From: zgat <1071107108@qq.com> +Date: Thu, 17 Nov 2022 02:55:48 +0000 +Subject: [PATCH 28/35] [Struct Reorg] Fix speccpu2006 462 double free #I60YUV + modify gcc/tree.c. Normal operation speccpu 462 after modifed + +Signed-off-by: zgat <1071107108@qq.com> +--- + gcc/tree.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/gcc/tree.c b/gcc/tree.c +index 2a532d15a..a61788651 100644 +--- a/gcc/tree.c ++++ b/gcc/tree.c +@@ -5224,8 +5224,7 @@ fld_simplified_type_name (tree type) + optimizations. */ + if (flag_ipa_struct_reorg + && lang_c_p () +- && flag_lto_partition == LTO_PARTITION_ONE +- && (in_lto_p || flag_whole_program)) ++ && flag_lto_partition == LTO_PARTITION_ONE) + return TYPE_NAME (type); + + if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL) +@@ -5471,8 +5470,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld) + optimizations. */ + if (flag_ipa_struct_reorg + && lang_c_p () +- && flag_lto_partition == LTO_PARTITION_ONE +- && (in_lto_p || flag_whole_program)) ++ && flag_lto_partition == LTO_PARTITION_ONE) + return t; + if (POINTER_TYPE_P (t)) + return fld_incomplete_type_of (t, fld); +-- +2.27.0.windows.1 + diff --git a/0077-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch b/0077-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch new file mode 100644 index 0000000000000000000000000000000000000000..c804ea622fb4dcfe4f2480a2437382dee9fa9060 --- /dev/null +++ b/0077-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch @@ -0,0 +1,1193 @@ +From 0445301c09926a20d5e02809b2cd35bddc9fa50e Mon Sep 17 00:00:00 2001 +From: liyancheng <412998149@qq.com> +Date: Wed, 9 Nov 2022 21:00:04 +0800 +Subject: [PATCH 29/35] [Struct Reorg] Add Safe Structure Pointer Compression + +Safe structure pointer compression allows safely compressing pointers +stored in structure to reduce the size of structure. +Add flag -fipa-struct-reorg=4 to enable safe structure pointer compression. +--- + gcc/common.opt | 5 +- + gcc/ipa-struct-reorg/ipa-struct-reorg.c | 905 +++++++++++++++++++++++- + gcc/ipa-struct-reorg/ipa-struct-reorg.h | 4 + + gcc/params.opt | 4 + + 4 files changed, 877 insertions(+), 41 deletions(-) + +diff --git a/gcc/common.opt b/gcc/common.opt +index 6a7f66624..c9b099817 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1889,8 +1889,9 @@ Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization + Perform structure layout optimizations. + + fipa-struct-reorg= +-Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 3) +--fipa-struct-reorg=[0,1,2,3] adding none, struct-reorg, reorder-fields, dfe optimizations. ++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 4) ++-fipa-struct-reorg=[0,1,2,3,4] adding none, struct-reorg, reorder-fields, ++dfe, safe-pointer-compression optimizations. + + fipa-extend-auto-profile + Common Report Var(flag_ipa_extend_auto_profile) +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c +index 08cb51fee..3550411dc 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c +@@ -83,6 +83,7 @@ along with GCC; see the file COPYING3. If not see + #include "gimple-iterator.h" + #include "gimple-walk.h" + #include "cfg.h" ++#include "cfghooks.h" /* For split_block. */ + #include "ssa.h" + #include "tree-dfa.h" + #include "fold-const.h" +@@ -145,7 +146,27 @@ namespace { + using namespace struct_reorg; + using namespace struct_relayout; + +-/* Return true iff TYPE is stdarg va_list type. */ ++static void ++set_var_attributes (tree var) ++{ ++ if (!var) ++ return; ++ gcc_assert (TREE_CODE (var) == VAR_DECL); ++ ++ DECL_ARTIFICIAL (var) = 1; ++ DECL_EXTERNAL (var) = 0; ++ TREE_STATIC (var) = 1; ++ TREE_PUBLIC (var) = 0; ++ TREE_USED (var) = 1; ++ DECL_CONTEXT (var) = NULL_TREE; ++ TREE_THIS_VOLATILE (var) = 0; ++ TREE_ADDRESSABLE (var) = 0; ++ TREE_READONLY (var) = 0; ++ if (is_global_var (var)) ++ set_decl_tls_model (var, TLS_MODEL_NONE); ++} ++ ++/* Return true if TYPE is stdarg va_list type. */ + + static inline bool + is_va_list_type (tree type) +@@ -242,9 +263,15 @@ enum struct_layout_opt_level + STRUCT_SPLIT = 1 << 0, + COMPLETE_STRUCT_RELAYOUT = 1 << 1, + STRUCT_REORDER_FIELDS = 1 << 2, +- DEAD_FIELD_ELIMINATION = 1 << 3 ++ DEAD_FIELD_ELIMINATION = 1 << 3, ++ POINTER_COMPRESSION_SAFE = 1 << 4 + }; + ++/* Defines the target pointer size of compressed pointer, which should be 8, ++ 16, 32. */ ++ ++static int compressed_size = 32; ++ + static bool is_result_of_mult (tree arg, tree *num, tree struct_size); + bool isptrptr (tree type); + void get_base (tree &base, tree expr); +@@ -366,7 +393,10 @@ srtype::srtype (tree type) + : type (type), + chain_type (false), + escapes (does_not_escape), ++ pc_gptr (NULL_TREE), + visited (false), ++ pc_candidate (false), ++ has_legal_alloc_num (false), + has_alloc_array (0) + { + for (int i = 0; i < max_split; i++) +@@ -447,6 +477,31 @@ srtype::mark_escape (escape_type e, gimple *stmt) + } + } + ++/* Create a global header for compressed struct. */ ++ ++void ++srtype::create_global_ptr_for_pc () ++{ ++ if (!pc_candidate || pc_gptr != NULL_TREE) ++ return; ++ ++ const char *type_name = get_type_name (type); ++ gcc_assert (type_name != NULL); ++ ++ char *gptr_name = concat (type_name, "_pc", NULL); ++ tree new_name = get_identifier (gptr_name); ++ tree new_type = build_pointer_type (newtype[0]); ++ tree new_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, new_name, new_type); ++ set_var_attributes (new_var); ++ pc_gptr = new_var; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nType: %s has create global header for pointer" ++ " compression: %s\n", type_name, gptr_name); ++ ++ free (gptr_name); ++} ++ + /* Add FIELD to the list of fields that use this type. */ + + void +@@ -790,20 +845,31 @@ srfield::create_new_optimized_fields (tree newtype[max_split], + fields.safe_push (field); + } + +- DECL_NAME (field) = DECL_NAME (fielddecl); + if (type == NULL) + { ++ DECL_NAME (field) = DECL_NAME (fielddecl); + /* Common members do not need to reconstruct. + Otherwise, int* -> int** or void* -> void**. */ + TREE_TYPE (field) = nt; ++ SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl)); ++ } ++ else if (type->pc_candidate) ++ { ++ const char *old_name = IDENTIFIER_POINTER (DECL_NAME (fielddecl)); ++ char *new_name = concat (old_name, "_pc", NULL); ++ DECL_NAME (field) = get_identifier (new_name); ++ free (new_name); ++ TREE_TYPE (field) = make_unsigned_type (compressed_size); ++ SET_DECL_ALIGN (field, compressed_size); + } + else + { +- TREE_TYPE (field) +- = reconstruct_complex_type (TREE_TYPE (fielddecl), nt); ++ DECL_NAME (field) = DECL_NAME (fielddecl); ++ TREE_TYPE (field) = reconstruct_complex_type (TREE_TYPE (fielddecl), nt); ++ SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl)); + } ++ + DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl); +- SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl)); + DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl); + TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl); + DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl); +@@ -923,6 +989,10 @@ srtype::create_new_type (void) + && has_dead_field ()) + fprintf (dump_file, "Dead field elimination.\n"); + } ++ ++ if (pc_candidate && pc_gptr == NULL_TREE) ++ create_global_ptr_for_pc (); ++ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Created %d types:\n", maxclusters); +@@ -1341,6 +1411,30 @@ public: + void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt); + unsigned execute_struct_relayout (void); + bool remove_dead_field_stmt (tree lhs); ++ ++ // Pointer compression methods: ++ void check_and_prune_struct_for_pointer_compression (void); ++ void try_rewrite_with_pointer_compression (gassign *, gimple_stmt_iterator *, ++ tree, tree, tree &, tree &); ++ bool safe_void_cmp_p (tree, srtype *); ++ bool pc_candidate_st_type_p (tree); ++ bool pc_candidate_tree_p (tree); ++ bool pc_type_conversion_candidate_p (tree); ++ bool pc_direct_rewrite_chance_p (tree, tree &); ++ bool compress_candidate_with_check (gimple_stmt_iterator *, tree, tree &); ++ bool compress_candidate (gassign *, gimple_stmt_iterator *, tree, tree &); ++ bool decompress_candidate_with_check (gimple_stmt_iterator *, tree, tree &); ++ bool decompress_candidate (gimple_stmt_iterator *, tree, tree, tree &, ++ tree &); ++ srtype *get_compression_candidate_type (tree); ++ tree compress_ptr_to_offset (tree, srtype *, gimple_stmt_iterator *); ++ tree decompress_offset_to_ptr (tree, srtype *, gimple_stmt_iterator *); ++ basic_block create_bb_for_compress_candidate (basic_block, tree, srtype *, ++ tree &); ++ basic_block create_bb_for_decompress_candidate (basic_block, tree, srtype *, ++ tree &); ++ basic_block create_bb_for_compress_nullptr (basic_block, tree &); ++ basic_block create_bb_for_decompress_nullptr (basic_block, tree, tree &); + }; + + struct ipa_struct_relayout +@@ -1391,29 +1485,6 @@ namespace { + + /* Methods for ipa_struct_relayout. */ + +-static void +-set_var_attributes (tree var) +-{ +- if (!var) +- { +- return; +- } +- gcc_assert (TREE_CODE (var) == VAR_DECL); +- +- DECL_ARTIFICIAL (var) = 1; +- DECL_EXTERNAL (var) = 0; +- TREE_STATIC (var) = 1; +- TREE_PUBLIC (var) = 0; +- TREE_USED (var) = 1; +- DECL_CONTEXT (var) = NULL; +- TREE_THIS_VOLATILE (var) = 0; +- TREE_ADDRESSABLE (var) = 0; +- TREE_READONLY (var) = 0; +- if (is_global_var (var)) +- { +- set_decl_tls_model (var, TLS_MODEL_NONE); +- } +-} + + tree + ipa_struct_relayout::create_new_vars (tree type, const char *name) +@@ -3135,6 +3206,19 @@ ipa_struct_reorg::find_vars (gimple *stmt) + records the right value _1 declaration. */ + find_var (gimple_assign_rhs1 (stmt), stmt); + ++ /* Pointer types from non-zero pointer need to be escaped in pointer ++ compression and complete relayout. ++ e.g _1->t = (struct *) 0x400000. */ ++ if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT ++ && TREE_CODE (lhs) == COMPONENT_REF ++ && TREE_CODE (TREE_TYPE (lhs)) == POINTER_TYPE ++ && TREE_CODE (rhs) == INTEGER_CST ++ && !integer_zerop (rhs)) ++ { ++ mark_type_as_escape (inner_type (TREE_TYPE (lhs)), ++ escape_cast_int, stmt); ++ } ++ + /* Add a safe func mechanism. */ + bool l_find = true; + bool r_find = true; +@@ -3603,14 +3687,15 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + bool + ipa_struct_reorg::handled_allocation_stmt (gimple *stmt) + { +- if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS) ++ if ((current_layout_opt_level & STRUCT_REORDER_FIELDS) + && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))) + { + return true; + } +- if ((current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT) ++ if ((current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT ++ || current_layout_opt_level & POINTER_COMPRESSION_SAFE) + && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) + return true; + if ((current_layout_opt_level == STRUCT_SPLIT) +@@ -3737,15 +3822,20 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple + } + } + /* x_1 = y.x_nodes; void *x; +- Directly mark the structure pointer type assigned +- to the void* variable as escape. */ ++ Mark the structure pointer type assigned ++ to the void* variable as escape. Unless the void* is only used to compare ++ with variables of the same type. */ + else if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE (side) == SSA_NAME + && VOID_POINTER_P (TREE_TYPE (side)) + && SSA_NAME_VAR (side) + && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (side)))) + { +- mark_type_as_escape (TREE_TYPE (other), escape_cast_void, stmt); ++ if (current_layout_opt_level < POINTER_COMPRESSION_SAFE ++ || !safe_void_cmp_p (side, type)) ++ { ++ mark_type_as_escape (TREE_TYPE (other), escape_cast_void, stmt); ++ } + } + + check_ptr_layers (side, other, stmt); +@@ -4361,7 +4451,7 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl, + void + ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) + { +- if (current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT ++ if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT + && handled_allocation_stmt (stmt)) + { + tree arg0 = gimple_call_arg (stmt, 0); +@@ -4388,6 +4478,22 @@ ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) + ? type->has_alloc_array + : type->has_alloc_array + 1; + } ++ if (current_layout_opt_level & POINTER_COMPRESSION_SAFE ++ && TREE_CODE (arg0) == INTEGER_CST) ++ { ++ /* Only known size during compilation can be optimized ++ at this level. */ ++ unsigned HOST_WIDE_INT max_alloc_size = 0; ++ switch (compressed_size) ++ { ++ case 8: max_alloc_size = 0xff; break; // max of uint8 ++ case 16: max_alloc_size = 0xffff; break; // max of uint16 ++ case 32: max_alloc_size = 0xffffffff; break; // max of uint32 ++ default: gcc_unreachable (); break; ++ } ++ if (tree_to_uhwi (arg0) < max_alloc_size) ++ type->has_legal_alloc_num = true; ++ } + } + } + +@@ -4530,7 +4636,11 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist) + && SSA_NAME_VAR (ssa_name) + && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (ssa_name)))) + { +- type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name)); ++ if (current_layout_opt_level < POINTER_COMPRESSION_SAFE ++ || !safe_void_cmp_p (ssa_name, type)) ++ { ++ type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name)); ++ } + } + gimple *stmt = SSA_NAME_DEF_STMT (ssa_name); + +@@ -5509,6 +5619,8 @@ ipa_struct_reorg::create_new_types (void) + for (unsigned i = 0; i < types.length (); i++) + newtypes += types[i]->create_new_type (); + ++ /* Some new types may not have been created at create_new_type (), so ++ recreate new type for all struct fields. */ + if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < types.length (); i++) +@@ -5519,9 +5631,18 @@ ipa_struct_reorg::create_new_types (void) + for (unsigned j = 0; j < fields->length (); j++) + { + tree field = (*fields)[j]; +- TREE_TYPE (field) +- = reconstruct_complex_type (TREE_TYPE (field), +- types[i]->newtype[0]); ++ if (types[i]->pc_candidate) ++ { ++ TREE_TYPE (field) ++ = make_unsigned_type (compressed_size); ++ SET_DECL_ALIGN (field, compressed_size); ++ } ++ else ++ { ++ TREE_TYPE (field) ++ = reconstruct_complex_type (TREE_TYPE (field), ++ types[i]->newtype[0]); ++ } + } + } + } +@@ -5906,6 +6027,556 @@ ipa_struct_reorg::rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_ + return true; + } + ++/* Emit a series of gimples to compress the pointer to the index relative to ++ the global header. The basic blocks where gsi is located must have at least ++ one stmt. */ ++ ++tree ++ipa_struct_reorg::compress_ptr_to_offset (tree xhs, srtype *type, ++ gimple_stmt_iterator *gsi) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nCompress candidate pointer:\n"); ++ print_generic_expr (dump_file, xhs); ++ fprintf (dump_file, "\nto offset:\n"); ++ } ++ ++ /* Emit gimple _X1 = ptr - gptr. */ ++ tree pointer_addr = fold_convert (long_unsigned_type_node, xhs); ++ tree gptr_addr = fold_convert (long_unsigned_type_node, type->pc_gptr); ++ tree step1 = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node, ++ pointer_addr, gptr_addr); ++ ++ /* Emit gimple _X2 = _X1 / sizeof (struct). */ ++ tree step2 = gimplify_build2 (gsi, TRUNC_DIV_EXPR, long_unsigned_type_node, ++ step1, TYPE_SIZE_UNIT (type->newtype[0])); ++ ++ /* Emit gimple _X3 = _X2 + 1. */ ++ tree step3 = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, ++ step2, build_one_cst (long_unsigned_type_node)); ++ ++ /* Emit _X4 = (compressed_size) _X3. */ ++ tree step4 = gimplify_build1 (gsi, NOP_EXPR, ++ make_unsigned_type (compressed_size), step3); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ print_generic_expr (dump_file, step3); ++ fprintf (dump_file, "\n"); ++ } ++ return step4; ++} ++ ++/* Emit a series of gimples to decompress the index into the original ++ pointer. The basic blocks where gsi is located must have at least ++ one stmt. */ ++ ++tree ++ipa_struct_reorg::decompress_offset_to_ptr (tree xhs, srtype *type, ++ gimple_stmt_iterator *gsi) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nDecompress candidate offset:\n"); ++ print_generic_expr (dump_file, xhs); ++ fprintf (dump_file, "\nto pointer:\n"); ++ } ++ ++ /* Emit _X1 = xhs - 1. */ ++ tree offset = fold_convert (long_unsigned_type_node, xhs); ++ tree step1 = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node, ++ offset, ++ build_one_cst (long_unsigned_type_node)); ++ ++ /* Emit _X2 = _X1 * sizeof (struct). */ ++ tree step2 = gimplify_build2 (gsi, MULT_EXPR, long_unsigned_type_node, ++ step1, TYPE_SIZE_UNIT (type->newtype[0])); ++ ++ /* Emit _X3 = phead + _X2. */ ++ tree gptr_addr = fold_convert (long_unsigned_type_node, type->pc_gptr); ++ tree step3 = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, ++ gptr_addr, step2); ++ ++ /* Emit _X4 = (struct *) _X3. */ ++ tree step4 = gimplify_build1 (gsi, NOP_EXPR, TREE_TYPE (type->pc_gptr), ++ step3); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ print_generic_expr (dump_file, step3); ++ fprintf (dump_file, "\n"); ++ } ++ return step4; ++} ++ ++/* Return the compression candidate srtype of SSA_NAME or COMPONENT_REF. */ ++ ++srtype * ++ipa_struct_reorg::get_compression_candidate_type (tree xhs) ++{ ++ if (xhs == NULL_TREE) ++ return NULL; ++ ++ if (TREE_CODE (xhs) == SSA_NAME || TREE_CODE (xhs) == COMPONENT_REF) ++ { ++ srtype *access_type = find_type (inner_type (TREE_TYPE (xhs))); ++ if (access_type != NULL && access_type->pc_candidate) ++ return access_type; ++ } ++ return NULL; ++} ++ ++/* True if the input type is the candidate type for pointer compression. */ ++ ++bool ++ipa_struct_reorg::pc_candidate_st_type_p (tree type) ++{ ++ if (type == NULL_TREE) ++ return false; ++ ++ if (TREE_CODE (type) == POINTER_TYPE) ++ { ++ if (TREE_CODE (TREE_TYPE (type)) == RECORD_TYPE) ++ { ++ srtype *access_type = find_type (TREE_TYPE (type)); ++ if (access_type != NULL && access_type->pc_candidate) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/* True if the input xhs is a candidate for pointer compression. */ ++ ++bool ++ipa_struct_reorg::pc_candidate_tree_p (tree xhs) ++{ ++ if (xhs == NULL_TREE) ++ return false; ++ ++ if (TREE_CODE (xhs) == COMPONENT_REF) ++ { ++ srtype *base_type = find_type (TREE_TYPE (TREE_OPERAND (xhs, 0))); ++ if (base_type == NULL || base_type->has_escaped ()) ++ return false; ++ ++ return pc_candidate_st_type_p (TREE_TYPE (xhs)); ++ } ++ return false; ++} ++ ++/* True if xhs is a component_ref that base has escaped but uses a compression ++ candidate type. */ ++ ++bool ++ipa_struct_reorg::pc_type_conversion_candidate_p (tree xhs) ++{ ++ if (xhs == NULL_TREE) ++ return false; ++ ++ if (TREE_CODE (xhs) == COMPONENT_REF) ++ { ++ srtype *base_type = find_type (TREE_TYPE (TREE_OPERAND (xhs, 0))); ++ if (base_type != NULL && base_type->has_escaped ()) ++ return pc_candidate_st_type_p (TREE_TYPE (xhs)); ++ ++ } ++ return false; ++} ++ ++/* Creates a new basic block with zero for compressed null pointers. */ ++ ++basic_block ++ipa_struct_reorg::create_bb_for_compress_nullptr (basic_block last_bb, ++ tree &phi) ++{ ++ basic_block new_bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (new_bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ ++ /* Emit phi = 0. */ ++ gimple_stmt_iterator gsi = gsi_last_bb (new_bb); ++ phi = make_ssa_name (make_unsigned_type (compressed_size)); ++ tree rhs = build_int_cst (make_unsigned_type (compressed_size), 0); ++ gimple *new_stmt = gimple_build_assign (phi, rhs); ++ gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nCreate bb %d for compress nullptr:\n", ++ new_bb->index); ++ gimple_dump_bb (dump_file, new_bb, 0, dump_flags); ++ } ++ return new_bb; ++} ++ ++/* Create a new basic block to compress the pointer to the index relative to ++ the allocated memory pool header. */ ++ ++basic_block ++ipa_struct_reorg::create_bb_for_compress_candidate (basic_block last_bb, ++ tree new_rhs, srtype *type, ++ tree &phi) ++{ ++ basic_block new_bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (new_bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ ++ gimple_stmt_iterator gsi = gsi_last_bb (new_bb); ++ /* compress_ptr_to_offset () needs at least one stmt in target bb. */ ++ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT); ++ phi = compress_ptr_to_offset (new_rhs, type, &gsi); ++ /* Remove the NOP created above. */ ++ gsi_remove (&gsi, true); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nCreate bb %d for compress candidate:\n", ++ new_bb->index); ++ gimple_dump_bb (dump_file, new_bb, 0, dump_flags); ++ } ++ return new_bb; ++} ++ ++/* Compression can be simplified by these following cases: ++ 1. if rhs is NULL, uses zero to represent it. ++ 2. if new_rhs has been converted into INTEGER_TYPE in the previous stmt, ++ just use it here. For example: ++ _1 = t->s ++ -> tt->s = _1. */ ++ ++bool ++ipa_struct_reorg::pc_direct_rewrite_chance_p (tree rhs, tree &new_rhs) ++{ ++ if (integer_zerop (rhs)) ++ { ++ new_rhs = build_int_cst (make_unsigned_type (compressed_size), 0); ++ return true; ++ } ++ else if (new_rhs && TREE_CODE (TREE_TYPE (new_rhs)) == INTEGER_TYPE) ++ { ++ return true; ++ } ++ return false; ++} ++ ++/* Perform pointer compression with check. The conversion will be as shown in ++ the following example: ++ Orig bb: ++ bb <1>: ++ _1->t = _2 ++ ++ will be transformed to: ++ bb <1>: ++ _3 = _2 ++ if (_2 == NULL) ++ goto bb <2> ++ else ++ goto bb <3> ++ ++ bb <2>: ++ _3 = 0 ++ goto bb <4> ++ ++ bb <3>: ++ ... ++ _4 = compress (_2) ++ goto bb <4> ++ ++ bb <4>: ++ _5 = PHI (_3, _4) ++ _1->t = _5 ++ The gsi will move to the beginning of split dst bb <4>, _1->t = _5 will be ++ emitted by rewrite_assign (). */ ++ ++bool ++ipa_struct_reorg::compress_candidate_with_check (gimple_stmt_iterator *gsi, ++ tree rhs, tree &new_rhs) ++{ ++ tree cond_lhs = make_ssa_name (TREE_TYPE (new_rhs)); ++ gimple *assign_stmt = gimple_build_assign (cond_lhs, new_rhs); ++ gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); ++ ++ /* Insert cond stmt. */ ++ tree rhs_pointer_type = build_pointer_type (TREE_TYPE (new_rhs)); ++ gcond *cond = gimple_build_cond (EQ_EXPR, cond_lhs, ++ build_int_cst (rhs_pointer_type, 0), ++ NULL_TREE, NULL_TREE); ++ gimple_set_location (cond, UNKNOWN_LOCATION); ++ gsi_insert_before (gsi, cond, GSI_SAME_STMT); ++ ++ gimple* cur_stmt = as_a (cond); ++ edge e = split_block (cur_stmt->bb, cur_stmt); ++ basic_block split_src_bb = e->src; ++ basic_block split_dst_bb = e->dest; ++ ++ /* Create bb for nullptr. */ ++ tree phi1 = NULL_TREE; ++ basic_block true_bb = create_bb_for_compress_nullptr (split_src_bb, phi1); ++ ++ /* Create bb for comprssion. */ ++ srtype *type = get_compression_candidate_type (rhs); ++ gcc_assert (type != NULL); ++ tree phi2 = NULL_TREE; ++ basic_block false_bb = create_bb_for_compress_candidate (true_bb, new_rhs, ++ type, phi2); ++ ++ /* Rebuild and reset cfg. */ ++ remove_edge_raw (e); ++ ++ edge etrue = make_edge (split_src_bb, true_bb, EDGE_TRUE_VALUE); ++ etrue->probability = profile_probability::unlikely (); ++ true_bb->count = etrue->count (); ++ ++ edge efalse = make_edge (split_src_bb, false_bb, EDGE_FALSE_VALUE); ++ efalse->probability = profile_probability::likely (); ++ false_bb->count = efalse->count (); ++ ++ edge e1 = make_single_succ_edge (true_bb, split_dst_bb, EDGE_FALLTHRU); ++ edge e2 = make_single_succ_edge (false_bb, split_dst_bb, EDGE_FALLTHRU); ++ ++ tree phi = make_ssa_name (make_unsigned_type (compressed_size)); ++ gphi *phi_node = create_phi_node (phi, split_dst_bb); ++ add_phi_arg (phi_node, phi1, e1, UNKNOWN_LOCATION); ++ add_phi_arg (phi_node, phi2, e2, UNKNOWN_LOCATION); ++ ++ if (dom_info_available_p (CDI_DOMINATORS)) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, split_dst_bb, split_src_bb); ++ set_immediate_dominator (CDI_DOMINATORS, true_bb, split_src_bb); ++ set_immediate_dominator (CDI_DOMINATORS, false_bb, split_src_bb); ++ } ++ *gsi = gsi_start_bb (split_dst_bb); ++ new_rhs = phi; ++ return true; ++} ++ ++/* If there is a direct rewrite chance or simplification opportunity, perform ++ the simplified compression rewrite. Otherwise, create a cond expression and ++ two basic blocks to implement pointer compression. */ ++ ++bool ++ipa_struct_reorg::compress_candidate (gassign *stmt, gimple_stmt_iterator *gsi, ++ tree rhs, tree &new_rhs) ++{ ++ if (pc_direct_rewrite_chance_p (rhs, new_rhs)) ++ return true; ++ ++ return compress_candidate_with_check (gsi, rhs, new_rhs); ++} ++ ++/* Create a new basic block to decompress the index to null pointer. */ ++ ++basic_block ++ipa_struct_reorg::create_bb_for_decompress_nullptr (basic_block last_bb, ++ tree new_rhs, ++ tree &phi_node) ++{ ++ basic_block new_bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (new_bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ gimple_stmt_iterator gsi = gsi_last_bb (new_bb); ++ tree rhs_pointer_type = build_pointer_type (TREE_TYPE (new_rhs)); ++ phi_node = make_ssa_name (rhs_pointer_type); ++ gimple *new_stmt = gimple_build_assign (phi_node, ++ build_int_cst (rhs_pointer_type, 0)); ++ gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nCreate bb %d for decompress nullptr:\n", ++ new_bb->index); ++ gimple_dump_bb (dump_file, new_bb, 0, dump_flags); ++ } ++ return new_bb; ++} ++ ++/* Create a new basic block to decompress the index into original pointer. */ ++ ++basic_block ++ipa_struct_reorg::create_bb_for_decompress_candidate (basic_block last_bb, ++ tree lhs, srtype *type, ++ tree &phi_node) ++{ ++ basic_block new_bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (new_bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ gimple_stmt_iterator gsi = gsi_last_bb (new_bb); ++ /* decompress_ptr_to_offset () needs at least one stmt in target bb. */ ++ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT); ++ phi_node = decompress_offset_to_ptr (lhs, type, &gsi); ++ /* Remove the NOP created above. */ ++ gsi_remove (&gsi, true); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nCreate bb %d for decompress candidate:\n", ++ new_bb->index); ++ gimple_dump_bb (dump_file, new_bb, 0, dump_flags); ++ } ++ return new_bb; ++} ++ ++/* Perform pointer decompression with check. The conversion will be as shown ++ in the following example: ++ Orig bb: ++ bb <1>: ++ _1 = _2->t ++ ++ will be transformed to: ++ bb <1>: ++ _3 = _2->t ++ if (_3 == 0) ++ goto bb <2> ++ else ++ goto bb <3> ++ ++ bb <2>: ++ _4 = NULL ++ goto bb <4> ++ ++ bb <3>: ++ ... ++ _5 = decompress (_3) ++ goto bb <4> ++ ++ bb <4>: ++ _6 = PHI (_4, _5) ++ _1 = _6 ++ The gsi will move to the beginning of split dst bb <4>, _1 = _6 will be ++ emitted by rewrite_assign (). */ ++ ++bool ++ipa_struct_reorg::decompress_candidate_with_check (gimple_stmt_iterator *gsi, ++ tree rhs, tree &new_rhs) ++{ ++ /* Insert cond stmt. */ ++ tree cond_lhs = make_ssa_name (TREE_TYPE (new_rhs)); ++ gassign *cond_assign = gimple_build_assign (cond_lhs, new_rhs); ++ gsi_insert_before (gsi, cond_assign, GSI_SAME_STMT); ++ ++ tree pc_type = make_unsigned_type (compressed_size); ++ gcond *cond = gimple_build_cond (EQ_EXPR, cond_lhs, ++ build_int_cst (pc_type, 0), ++ NULL_TREE, NULL_TREE); ++ gimple_set_location (cond, UNKNOWN_LOCATION); ++ gsi_insert_before (gsi, cond, GSI_SAME_STMT); ++ ++ /* Split bb. */ ++ gimple* cur_stmt = as_a (cond); ++ edge e = split_block (cur_stmt->bb, cur_stmt); ++ basic_block split_src_bb = e->src; ++ basic_block split_dst_bb = e->dest; ++ ++ /* Create bb for decompress nullptr. */ ++ tree phi1 = NULL_TREE; ++ basic_block true_bb = create_bb_for_decompress_nullptr (split_src_bb, ++ new_rhs, phi1); ++ ++ /* Create bb for decomprssion candidate. */ ++ tree phi2 = NULL_TREE; ++ srtype *type = get_compression_candidate_type (rhs); ++ gcc_assert (type != NULL); ++ basic_block false_bb = create_bb_for_decompress_candidate (true_bb, cond_lhs, ++ type, phi2); ++ ++ /* Refresh and reset cfg. */ ++ remove_edge_raw (e); ++ ++ edge etrue = make_edge (split_src_bb, true_bb, EDGE_TRUE_VALUE); ++ etrue->probability = profile_probability::unlikely (); ++ true_bb->count = etrue->count (); ++ ++ edge efalse = make_edge (split_src_bb, false_bb, EDGE_FALSE_VALUE); ++ efalse->probability = profile_probability::likely (); ++ false_bb->count = efalse->count (); ++ ++ edge e1 = make_single_succ_edge (true_bb, split_dst_bb, EDGE_FALLTHRU); ++ edge e2 = make_single_succ_edge (false_bb, split_dst_bb, EDGE_FALLTHRU); ++ ++ tree phi = make_ssa_name (build_pointer_type (TREE_TYPE (cond_lhs))); ++ gphi *phi_node = create_phi_node (phi, split_dst_bb); ++ add_phi_arg (phi_node, phi1, e1, UNKNOWN_LOCATION); ++ add_phi_arg (phi_node, phi2, e2, UNKNOWN_LOCATION); ++ ++ if (dom_info_available_p (CDI_DOMINATORS)) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, split_dst_bb, split_src_bb); ++ set_immediate_dominator (CDI_DOMINATORS, true_bb, split_src_bb); ++ set_immediate_dominator (CDI_DOMINATORS, false_bb, split_src_bb); ++ } ++ *gsi = gsi_start_bb (split_dst_bb); ++ new_rhs = phi; ++ return true; ++} ++ ++/* If there is a simplification opportunity, perform the simplified ++ decompression rewrite. Otherwise, create a cond expression and two basic ++ blocks to implement pointer decompression. */ ++ ++bool ++ipa_struct_reorg::decompress_candidate (gimple_stmt_iterator *gsi, ++ tree lhs, tree rhs, tree &new_lhs, ++ tree &new_rhs) ++{ ++ // TODO: simplifiy check and rewrite will be pushed in next PR. ++ return decompress_candidate_with_check (gsi, rhs, new_rhs); ++} ++ ++/* Try to perform pointer compression and decompression. */ ++ ++void ++ipa_struct_reorg::try_rewrite_with_pointer_compression (gassign *stmt, ++ gimple_stmt_iterator ++ *gsi, tree lhs, ++ tree rhs, tree &new_lhs, ++ tree &new_rhs) ++{ ++ bool l = pc_candidate_tree_p (lhs); ++ bool r = pc_candidate_tree_p (rhs); ++ if (!l && !r) ++ { ++ tree tmp_rhs = new_rhs == NULL_TREE ? rhs : new_rhs; ++ if (pc_type_conversion_candidate_p (lhs)) ++ { ++ /* Transfer MEM[(struct *)_1].files = _4; ++ to MEM[(struct *)_1].files = (struct *)_4; */ ++ new_rhs = fold_convert (TREE_TYPE (lhs), tmp_rhs); ++ } ++ else if (pc_type_conversion_candidate_p (rhs)) ++ { ++ /* Transfer _4 = MEM[(struct *)_1].nodes; ++ to _4 = (new_struct *) MEM[(struct *)_1].nodes; */ ++ new_rhs = fold_convert (TREE_TYPE (new_lhs), tmp_rhs); ++ } ++ } ++ else if (l && r) ++ gcc_unreachable (); ++ else if (l) ++ { ++ if (!compress_candidate (stmt, gsi, rhs, new_rhs)) ++ gcc_unreachable (); ++ } ++ else if (r) ++ { ++ if (!decompress_candidate (gsi, lhs, rhs, new_lhs, new_rhs)) ++ gcc_unreachable (); ++ } ++} ++ + bool + ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + { +@@ -6109,6 +6780,9 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + fprintf (dump_file, "replaced with:\n"); + for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++) + { ++ if (current_layout_opt_level >= POINTER_COMPRESSION_SAFE) ++ try_rewrite_with_pointer_compression (stmt, gsi, lhs, rhs, ++ newlhs[i], newrhs[i]); + gimple *newstmt = gimple_build_assign (newlhs[i] ? newlhs[i] : lhs, newrhs[i] ? newrhs[i] : rhs); + if (dump_file && (dump_flags & TDF_DETAILS)) + { +@@ -6183,6 +6857,13 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + gcc_assert (false); + gimple_call_set_lhs (g, decl->newdecl[i]); + gsi_insert_before (gsi, g, GSI_SAME_STMT); ++ if (type->pc_candidate) ++ { ++ /* Init global header for pointer compression. */ ++ gassign *gptr ++ = gimple_build_assign (type->pc_gptr, decl->newdecl[i]); ++ gsi_insert_before (gsi, gptr, GSI_SAME_STMT); ++ } + } + return true; + } +@@ -6649,6 +7330,12 @@ ipa_struct_reorg::rewrite_functions (void) + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + current_function = f; + ++ if (current_layout_opt_level >= POINTER_COMPRESSION_SAFE) ++ { ++ calculate_dominance_info (CDI_DOMINATORS); ++ loop_optimizer_init (0); ++ } ++ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nBefore rewrite: %dth_%s\n", +@@ -6724,6 +7411,9 @@ ipa_struct_reorg::rewrite_functions (void) + + free_dominance_info (CDI_DOMINATORS); + ++ if (current_layout_opt_level >= POINTER_COMPRESSION_SAFE) ++ loop_optimizer_finalize (); ++ + if (dump_file) + { + fprintf (dump_file, "\nAfter rewrite: %dth_%s\n", +@@ -6758,6 +7448,10 @@ ipa_struct_reorg::execute_struct_relayout (void) + { + continue; + } ++ if (get_type_name (types[i]->type) == NULL) ++ { ++ continue; ++ } + retval |= ipa_struct_relayout (type, this).execute (); + } + +@@ -6778,6 +7472,132 @@ ipa_struct_reorg::execute_struct_relayout (void) + return retval; + } + ++ ++/* True if the var with void type is only used to compare with the same ++ target type. */ ++ ++bool ++ipa_struct_reorg::safe_void_cmp_p (tree var, srtype *type) ++{ ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var) ++ { ++ gimple *use_stmt = USE_STMT (use_p); ++ if (is_gimple_debug (use_stmt)) ++ continue; ++ ++ if (gimple_code (use_stmt) == GIMPLE_COND) ++ { ++ tree lhs = gimple_cond_lhs (use_stmt); ++ tree rhs = gimple_cond_rhs (use_stmt); ++ tree xhs = lhs == var ? rhs : lhs; ++ if (types_compatible_p (inner_type (TREE_TYPE (xhs)), type->type)) ++ continue; ++ ++ } ++ return false; ++ } ++ return true; ++} ++ ++/* Mark the structure that should perform pointer compression. */ ++ ++void ++ipa_struct_reorg::check_and_prune_struct_for_pointer_compression (void) ++{ ++ unsigned pc_transform_num = 0; ++ ++ if (dump_file) ++ fprintf (dump_file, "\nMark the structure that should perform pointer" ++ " compression:\n"); ++ ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ srtype *type = types[i]; ++ if (dump_file) ++ print_generic_expr (dump_file, type->type); ++ ++ if (type->has_escaped ()) ++ { ++ if (dump_file) ++ fprintf (dump_file, " has escaped by %s, skip compression.\n", ++ type->escape_reason ()); ++ continue; ++ } ++ if (TYPE_FIELDS (type->type) == NULL) ++ { ++ if (dump_file) ++ fprintf (dump_file, " has zero field, skip compression.\n"); ++ continue; ++ } ++ if (type->chain_type) ++ { ++ if (dump_file) ++ fprintf (dump_file, " is chain_type, skip compression.\n"); ++ continue; ++ } ++ if (type->has_alloc_array != 1) ++ { ++ if (dump_file) ++ fprintf (dump_file, " has alloc number: %d, skip compression.\n", ++ type->has_alloc_array); ++ continue; ++ } ++ if (get_type_name (type->type) == NULL) ++ { ++ if (dump_file) ++ fprintf (dump_file, " has empty struct name," ++ " skip compression.\n"); ++ continue; ++ } ++ if ((current_layout_opt_level & POINTER_COMPRESSION_SAFE) ++ && !type->has_legal_alloc_num) ++ { ++ if (dump_file) ++ fprintf (dump_file, " has illegal struct array size," ++ " skip compression.\n"); ++ continue; ++ } ++ pc_transform_num++; ++ type->pc_candidate = true; ++ if (dump_file) ++ fprintf (dump_file, " attemps to do pointer compression.\n"); ++ } ++ ++ if (dump_file) ++ { ++ if (pc_transform_num) ++ fprintf (dump_file, "\nNumber of structures to transform in " ++ "pointer compression is %d\n", pc_transform_num); ++ else ++ fprintf (dump_file, "\nNo structures to transform in " ++ "pointer compression.\n"); ++ } ++} ++ ++/* Init pointer size from parameter param_pointer_compression_size. */ ++ ++static void ++init_pointer_size_for_pointer_compression (void) ++{ ++ switch (param_pointer_compression_size) ++ { ++ case 8: ++ compressed_size = 8; // sizeof (uint8) ++ break; ++ case 16: ++ compressed_size = 16; // sizeof (uint16) ++ break; ++ case 32: ++ compressed_size = 32; // sizeof (uint32) ++ break; ++ default: ++ error ("Invalid pointer compression size, using the following param: " ++ "\"--param pointer-compression-size=[8,16,32]\""); ++ } ++} ++ + unsigned int + ipa_struct_reorg::execute (unsigned int opt) + { +@@ -6798,6 +7618,8 @@ ipa_struct_reorg::execute (unsigned int opt) + { + analyze_types (); + } ++ if (opt >= POINTER_COMPRESSION_SAFE) ++ check_and_prune_struct_for_pointer_compression (); + + ret = rewrite_functions (); + } +@@ -6850,6 +7672,8 @@ public: + unsigned int level = 0; + switch (struct_layout_optimize_level) + { ++ case 4: level |= POINTER_COMPRESSION_SAFE; ++ // FALLTHRU + case 3: level |= DEAD_FIELD_ELIMINATION; + // FALLTHRU + case 2: level |= STRUCT_REORDER_FIELDS; +@@ -6862,6 +7686,9 @@ public: + default: gcc_unreachable (); + } + ++ if (level & POINTER_COMPRESSION_SAFE) ++ init_pointer_size_for_pointer_compression (); ++ + /* Preserved for backward compatibility, reorder fields needs run before + struct split and complete struct relayout. */ + if (flag_ipa_reorder_fields && level < STRUCT_REORDER_FIELDS) +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +index 936c0fa6f..d88799982 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +@@ -122,7 +122,10 @@ private: + public: + + tree newtype[max_split]; ++ tree pc_gptr; + bool visited; ++ bool pc_candidate; ++ bool has_legal_alloc_num; + int has_alloc_array; + + // Constructors +@@ -144,6 +147,7 @@ public: + void analyze (void); + bool has_dead_field (void); + void mark_escape (escape_type, gimple *stmt); ++ void create_global_ptr_for_pc (); + bool has_escaped (void) + { + return escapes != does_not_escape; +diff --git a/gcc/params.opt b/gcc/params.opt +index 9d1faa7ab..1d355819c 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -984,4 +984,8 @@ High execution rate loops to be analyzed in prefetch (in%). + Common Joined UInteger Var(param_prefetch_func_counts_threshold) Init(100) Param Optimization + Threshold functions of cache miss counts to be analyzed in prefetching. + ++-param=compressed-pointer-size= ++Common Joined UInteger Var(param_pointer_compression_size) Init(32) IntegerRange(8, 32) Param Optimization ++Target size of compressed pointer, which should be 8, 16 or 32. ++ + ; This comment is to ensure we retain the blank line above. +-- +2.27.0.windows.1 + diff --git a/0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch b/0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch new file mode 100644 index 0000000000000000000000000000000000000000..baeff9009d33177bdaebed3813a1d92365addf53 --- /dev/null +++ b/0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch @@ -0,0 +1,1007 @@ +From d334ec1579fb0668da5e23ced3b782d7f6f35d77 Mon Sep 17 00:00:00 2001 +From: benniaobufeijiushiji +Date: Mon, 17 Oct 2022 17:21:57 +0800 +Subject: [PATCH 30/35] [Loop-distribution] Add isomorphic stmts analysis + +Use option -ftree-slp-transpose-vectorize + +Check if loop is vectorizable before analysis. For unvectorizable +loops, try to find isomorphic stmts from grouped load as new seed stmts +for distribution. +--- + gcc/tree-loop-distribution.c | 858 +++++++++++++++++++++++++++++++++++ + gcc/tree-vect-loop.c | 37 +- + gcc/tree-vectorizer.h | 3 +- + 3 files changed, 894 insertions(+), 4 deletions(-) + +diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c +index 888af4894..c08af6562 100644 +--- a/gcc/tree-loop-distribution.c ++++ b/gcc/tree-loop-distribution.c +@@ -90,6 +90,8 @@ along with GCC; see the file COPYING3. If not see + data reuse. */ + + #include "config.h" ++#define INCLUDE_MAP ++#define INCLUDE_ALGORITHM + #include "system.h" + #include "coretypes.h" + #include "backend.h" +@@ -115,6 +117,7 @@ along with GCC; see the file COPYING3. If not see + #include "tree-vectorizer.h" + #include "tree-eh.h" + #include "gimple-fold.h" ++#include "optabs-tree.h" + + + #define MAX_DATAREFS_NUM \ +@@ -183,6 +186,52 @@ struct rdg_vertex + #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I])) + #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I])) + ++/* Results of isomorphic group analysis. */ ++#define UNINITIALIZED (0) ++#define ISOMORPHIC (1) ++#define HETEROGENEOUS (1 << 1) ++#define UNCERTAIN (1 << 2) ++ ++/* Information of a stmt while analyzing isomorphic use in group. */ ++ ++typedef struct _group_info ++{ ++ gimple *stmt; ++ ++ /* True if stmt can be a cut point. */ ++ bool cut_point; ++ ++ /* For use_stmt with two rhses, one of which is the lhs of stmt. ++ If the other is unknown to be isomorphic, mark it uncertain. */ ++ bool uncertain; ++ ++ /* Searching of isomorphic stmt reaches heterogeneous groups or reaches ++ MEM stmts. */ ++ bool done; ++ ++ _group_info () ++ { ++ stmt = NULL; ++ cut_point = false; ++ uncertain = false; ++ done = false; ++ } ++} *group_info; ++ ++/* PAIR of cut points and corresponding profit. */ ++typedef std::pair *, int> stmts_profit; ++ ++/* MAP of vector factor VF and corresponding stmts_profit PAIR. */ ++typedef std::map vf_stmts_profit_map; ++ ++/* PAIR of group_num and iteration_num. We consider rhses from the same ++ group and interation are isomorphic. */ ++typedef std::pair group_iteration; ++ ++/* An isomorphic stmt is detetmined by lhs of use_stmt, group_num and ++ the iteration_num when we insert this stmt to this map. */ ++typedef std::map isomer_stmt_lhs; ++ + /* Data dependence type. */ + + enum rdg_dep_type +@@ -640,6 +689,18 @@ class loop_distribution + void finalize_partitions (class loop *loop, vec + *partitions, vec *alias_ddrs); + ++ /* Analyze loop form and if it's vectorizable to decide if we need to ++ insert temp arrays to distribute it. */ ++ bool may_insert_temp_arrays (loop_p loop, struct graph *&rdg, ++ control_dependences *cd); ++ ++ /* Reset gimple_uid of GIMPLE_DEBUG and GIMPLE_LABEL to -1. */ ++ void reset_gimple_uid (loop_p loop); ++ ++ bool check_loop_vectorizable (loop_p loop); ++ ++ inline void rebuild_rdg (loop_p loop, struct graph *&rdg, ++ control_dependences *cd); + /* Distributes the code from LOOP in such a way that producer statements + are placed before consumer statements. Tries to separate only the + statements from STMTS into separate loops. Returns the number of +@@ -2900,6 +2961,803 @@ loop_distribution::finalize_partitions (class loop *loop, + fuse_memset_builtins (partitions); + } + ++/* Gimple uids of GIMPLE_DEBUG and GIMPLE_LABEL were changed during function ++ vect_analyze_loop, reset them to -1. */ ++ ++void ++loop_distribution::reset_gimple_uid (loop_p loop) ++{ ++ basic_block *bbs = get_loop_body_in_custom_order (loop, this, ++ bb_top_order_cmp_r); ++ for (int i = 0; i < int (loop->num_nodes); i++) ++ { ++ basic_block bb = bbs[i]; ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ gimple *stmt = gsi_stmt (gsi); ++ if (is_gimple_debug (stmt) || gimple_code (stmt) == GIMPLE_LABEL) ++ gimple_set_uid (stmt, -1); ++ } ++ } ++ free (bbs); ++} ++ ++bool ++loop_distribution::check_loop_vectorizable (loop_p loop) ++{ ++ vec_info_shared shared; ++ vect_analyze_loop (loop, &shared, true); ++ loop_vec_info vinfo = loop_vec_info_for_loop (loop); ++ reset_gimple_uid (loop); ++ if (vinfo == NULL) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, ++ "Loop %d no temp array insertion: bad data access pattern," ++ " unable to generate loop_vinfo.\n", loop->num); ++ return false; ++ } ++ if (vinfo->vectorizable) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Loop %d no temp array insertion: original loop" ++ " can be vectorized without distribution.\n", ++ loop->num); ++ delete vinfo; ++ loop->aux = NULL; ++ return false; ++ } ++ if (vinfo->grouped_loads.length () == 0) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Loop %d no temp array insertion: original loop" ++ " has no grouped loads.\n" , loop->num); ++ delete vinfo; ++ loop->aux = NULL; ++ return false; ++ } ++ return true; ++} ++ ++inline void ++loop_distribution::rebuild_rdg (loop_p loop, struct graph *&rdg, ++ control_dependences *cd) ++{ ++ free_rdg (rdg); ++ rdg = build_rdg (loop, cd); ++ gcc_checking_assert (rdg != NULL); ++} ++ ++bool ++loop_distribution::may_insert_temp_arrays (loop_p loop, struct graph *&rdg, ++ control_dependences *cd) ++{ ++ if (!(flag_tree_slp_transpose_vectorize && flag_tree_loop_vectorize)) ++ return false; ++ ++ /* Only loops with two basic blocks HEADER and LATCH are supported. HEADER ++ is the main body of a LOOP and LATCH is the basic block that controls the ++ LOOP execution. Size of temp array is determined by loop execution time, ++ so it must be a const. */ ++ tree loop_extent = number_of_latch_executions (loop); ++ if (loop->inner != NULL || loop->num_nodes > 2 ++ || rdg->n_vertices > param_slp_max_insns_in_bb ++ || TREE_CODE (loop_extent) != INTEGER_CST) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Loop %d: no temp array insertion: bad loop" ++ " form.\n", loop->num); ++ return false; ++ } ++ ++ if (loop->dont_vectorize) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Loop %d: no temp array insertion: this loop" ++ " should never be vectorized.\n", ++ loop->num); ++ return false; ++ } ++ ++ /* Do not distribute a LOOP that is able to be vectorized without ++ distribution. */ ++ if (!check_loop_vectorizable (loop)) ++ { ++ rebuild_rdg (loop, rdg, cd); ++ return false; ++ } ++ ++ rebuild_rdg (loop, rdg, cd); ++ return true; ++} ++ ++/* Return max grouped loads' length if all groupes length satisfy len = 2 ^ n. ++ Otherwise, return 0. */ ++ ++static unsigned ++get_max_vf (loop_vec_info vinfo) ++{ ++ unsigned size = 0; ++ unsigned max = 0; ++ stmt_vec_info stmt_info; ++ unsigned i = 0; ++ FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info) ++ { ++ size = stmt_info->size; ++ if (!pow2p_hwi (size)) ++ return 0; ++ max = size > max ? size : max; ++ } ++ return max; ++} ++ ++/* Convert grouped_loads from linked list to vector with length vf. Init ++ group_info of each stmt in the same group and put then into a vector. And ++ these vectors consist WORKLISTS. We will re-analyze a group if it is ++ uncertain, so we regard WORKLISTS as a circular queue. */ ++ ++static unsigned ++build_queue (loop_vec_info vinfo, unsigned vf, ++ vec *> &worklists) ++{ ++ stmt_vec_info stmt_info; ++ unsigned i = 0; ++ group_info ginfo = NULL; ++ vec *worklist = NULL; ++ FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info) ++ { ++ unsigned group_size = stmt_info->size; ++ stmt_vec_info c_stmt_info = stmt_info; ++ while (group_size >= vf) ++ { ++ vec_alloc (worklist, vf); ++ for (unsigned j = 0; j < vf; ++j) ++ { ++ ginfo = new _group_info (); ++ ginfo->stmt = c_stmt_info->stmt; ++ worklist->safe_push (ginfo); ++ c_stmt_info = c_stmt_info->next_element; ++ } ++ worklists.safe_push (worklist); ++ group_size -= vf; ++ } ++ } ++ return worklists.length (); ++} ++ ++static bool ++check_same_oprand_type (tree op1, tree op2) ++{ ++ tree type1 = TREE_TYPE (op1); ++ tree type2 = TREE_TYPE (op2); ++ if (TREE_CODE (type1) != INTEGER_TYPE && TREE_CODE (type1) != REAL_TYPE) ++ return false; ++ ++ return (TREE_CODE (type1) == TREE_CODE (type2) ++ && TYPE_UNSIGNED (type1) == TYPE_UNSIGNED (type2) ++ && TYPE_PRECISION (type1) == TYPE_PRECISION (type2)); ++} ++ ++static bool ++bit_field_p (gimple *stmt) ++{ ++ unsigned i = 0; ++ auto_vec datarefs_vec; ++ data_reference_p dr; ++ if (!find_data_references_in_stmt (NULL, stmt, &datarefs_vec)) ++ return true; ++ ++ FOR_EACH_VEC_ELT (datarefs_vec, i, dr) ++ { ++ if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF ++ && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))) ++ return true; ++ } ++ return false; ++} ++ ++static inline bool ++shift_operation (enum tree_code op) ++{ ++ return op == LSHIFT_EXPR || op == RSHIFT_EXPR || op == LROTATE_EXPR ++ || op == RROTATE_EXPR; ++} ++ ++/* Return relationship between USE_STMT and the first use_stmt of the group. ++ RHS1 is the lhs of stmt recorded in group_info. If another rhs of use_stmt ++ is not a constant, return UNCERTAIN and re-check it later. */ ++ ++static unsigned ++check_isomorphic (gimple *use_stmt, gimple *first, ++ tree rhs1, vec &hetero_lhs) ++{ ++ /* Check same operation. */ ++ enum tree_code rhs_code_first = gimple_assign_rhs_code (first); ++ enum tree_code rhs_code_current = gimple_assign_rhs_code (use_stmt); ++ if (rhs_code_first != rhs_code_current) ++ return HETEROGENEOUS; ++ ++ /* For shift operations, oprands should be equal. */ ++ if (shift_operation (rhs_code_current)) ++ { ++ tree shift_op_first = gimple_assign_rhs2 (first); ++ tree shift_op_current = gimple_assign_rhs2 (use_stmt); ++ if (!operand_equal_p (shift_op_first, shift_op_current, 0) ++ || !TREE_CONSTANT (shift_op_first)) ++ return HETEROGENEOUS; ++ ++ return ISOMORPHIC; ++ } ++ /* Type convertion expr or assignment. */ ++ if (gimple_num_ops (first) == 2) ++ return (rhs_code_first == NOP_EXPR || rhs_code_first == CONVERT_EXPR ++ || rhs_code_first == SSA_NAME) ? ISOMORPHIC : HETEROGENEOUS; ++ ++ /* We find USE_STMT from lhs of a stmt, denote it as rhs1 of USE_STMT and ++ the other one as rhs2. Check if define-stmt of current rhs2 is isomorphic ++ with define-stmt of rhs2 in the first USE_STMT at this group. */ ++ tree rhs2_first = gimple_assign_rhs1 (use_stmt) == rhs1 ++ ? gimple_assign_rhs2 (first) : gimple_assign_rhs1 (first); ++ tree rhs2_curr = gimple_assign_rhs1 (use_stmt) == rhs1 ++ ? gimple_assign_rhs2 (use_stmt) : gimple_assign_rhs1 (use_stmt); ++ ++ if (check_same_oprand_type (rhs2_first, rhs2_curr)) ++ { ++ if (TREE_CONSTANT (rhs2_curr)) ++ return ISOMORPHIC; ++ else if (hetero_lhs.contains (rhs2_curr)) ++ return HETEROGENEOUS; ++ ++ /* Provisionally set the stmt as uncertain and analyze the whole group ++ in function CHECK_UNCERTAIN later if all use_stmts are uncertain. */ ++ return UNCERTAIN; ++ } ++ return HETEROGENEOUS; ++} ++ ++static bool ++unsupported_operations (gimple *stmt) ++{ ++ enum tree_code code = gimple_assign_rhs_code (stmt); ++ return code == COND_EXPR; ++} ++ ++/* Check if the single use_stmt of STMT is isomorphic with the first one's ++ use_stmt in current group. */ ++ ++static unsigned ++check_use_stmt (group_info elmt, gimple *&first, ++ vec &tmp_stmts, vec &hetero_lhs) ++{ ++ if (gimple_code (elmt->stmt) != GIMPLE_ASSIGN) ++ return HETEROGENEOUS; ++ use_operand_p dummy; ++ tree lhs = gimple_assign_lhs (elmt->stmt); ++ gimple *use_stmt = NULL; ++ single_imm_use (lhs, &dummy, &use_stmt); ++ /* STMTs with three rhs are not supported, e.g., GIMPLE_COND. */ ++ if (use_stmt == NULL || gimple_code (use_stmt) != GIMPLE_ASSIGN ++ || unsupported_operations (use_stmt) || bit_field_p (use_stmt)) ++ return HETEROGENEOUS; ++ tmp_stmts.safe_push (use_stmt); ++ if (first == NULL) ++ { ++ first = use_stmt; ++ return UNINITIALIZED; ++ } ++ /* Check if current use_stmt and the first menber's use_stmt in the group ++ are of the same type. */ ++ tree first_lhs = gimple_assign_lhs (first); ++ tree curr_lhs = gimple_assign_lhs (use_stmt); ++ if (!check_same_oprand_type (first_lhs, curr_lhs)) ++ return HETEROGENEOUS; ++ return check_isomorphic (use_stmt, first, lhs, hetero_lhs); ++} ++ ++/* Replace stmt field in group with stmts in TMP_STMTS, and insert their ++ lhs_info to ISOMER_LHS. */ ++ ++static void ++update_isomer_lhs (vec *group, unsigned group_num, ++ unsigned iteration, isomer_stmt_lhs &isomer_lhs, ++ vec tmp_stmts, int &profit, ++ vec &merged_groups) ++{ ++ group_info elmt = NULL; ++ /* Do not insert temp array if isomorphic stmts from grouped load have ++ only casting operations. Once isomorphic calculation has 3 oprands, ++ such as plus operation, this group can be regarded as cut point. */ ++ bool operated = (gimple_num_ops (tmp_stmts[0]) == 3); ++ /* Do not insert temp arrays if search of iosomophic stmts reaches ++ MEM stmts. */ ++ bool has_vdef = gimple_vdef (tmp_stmts[0]) != NULL; ++ bool merge = false; ++ for (unsigned i = 0; i < group->length (); i++) ++ { ++ elmt = (*group)[i]; ++ elmt->stmt = has_vdef ? NULL : tmp_stmts[i]; ++ elmt->cut_point = has_vdef ? false : (elmt->cut_point || operated); ++ elmt->uncertain = false; ++ elmt->done = has_vdef; ++ tree lhs = gimple_assign_lhs (tmp_stmts[i]); ++ if (isomer_lhs.find (lhs) != isomer_lhs.end ()) ++ { ++ merge = true; ++ continue; ++ } ++ isomer_lhs[lhs] = std::make_pair (group_num, iteration); ++ } ++ if (merge) ++ { ++ merged_groups.safe_push (group_num); ++ profit = 0; ++ return; ++ } ++ enum vect_cost_for_stmt kind = scalar_stmt; ++ int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0); ++ profit = (tmp_stmts.length () - 1) * scalar_cost; ++} ++ ++/* Try to find rhs2 in ISOMER_LHS, if all rhs2 were found and their group_num ++ and iteration are same, GROUP is isomorphic. */ ++ ++static unsigned ++check_isomorphic_rhs (vec *group, vec &tmp_stmts, ++ isomer_stmt_lhs &isomer_lhs) ++{ ++ group_info elmt = NULL; ++ gimple *stmt = NULL; ++ unsigned j = 0; ++ unsigned group_num = -1u; ++ unsigned iteration = -1u; ++ tree rhs1 = NULL; ++ tree rhs2 = NULL; ++ unsigned status = UNINITIALIZED; ++ FOR_EACH_VEC_ELT (*group, j, elmt) ++ { ++ rhs1 = gimple_assign_lhs (elmt->stmt); ++ stmt = tmp_stmts[j]; ++ rhs2 = (rhs1 == gimple_assign_rhs1 (stmt)) ++ ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt); ++ isomer_stmt_lhs::iterator iter = isomer_lhs.find (rhs2); ++ if (iter != isomer_lhs.end ()) ++ { ++ if (group_num == -1u) ++ { ++ group_num = iter->second.first; ++ iteration = iter->second.second; ++ status |= ISOMORPHIC; ++ continue; ++ } ++ if (iter->second.first == group_num ++ && iter->second.second == iteration) ++ { ++ status |= ISOMORPHIC; ++ continue; ++ } ++ return HETEROGENEOUS; ++ } ++ else ++ status |= UNCERTAIN; ++ } ++ return status; ++} ++ ++/* Update group_info for uncertain groups. */ ++ ++static void ++update_uncertain_stmts (vec *group, unsigned group_num, ++ unsigned iteration, vec &tmp_stmts) ++{ ++ unsigned j = 0; ++ group_info elmt = NULL; ++ FOR_EACH_VEC_ELT (*group, j, elmt) ++ { ++ elmt->uncertain = true; ++ elmt->done = false; ++ } ++} ++ ++/* Push stmts in TMP_STMTS into HETERO_LHS. */ ++ ++static void ++set_hetero (vec *group, vec &hetero_lhs, ++ vec &tmp_stmts) ++{ ++ group_info elmt = NULL; ++ unsigned i = 0; ++ for (i = 0; i < group->length (); i++) ++ { ++ elmt = (*group)[i]; ++ elmt->uncertain = false; ++ elmt->done = true; ++ } ++ gimple *stmt = NULL; ++ FOR_EACH_VEC_ELT (tmp_stmts, i, stmt) ++ if (stmt != NULL) ++ hetero_lhs.safe_push (gimple_assign_lhs (stmt)); ++} ++ ++/* Given an uncertain group, TMP_STMTS are use_stmts of stmts in GROUP. ++ Rhs1 is the lhs of stmt in GROUP, rhs2 is the other rhs of USE_STMT. ++ ++ Try to find rhs2 in ISOMER_LHS, if all found rhs2 have same group_num ++ and iteration, this uncertain group is isomorphic. ++ ++ If no rhs matched, this GROUP remains uncertain and update group_info. ++ ++ Otherwise, this GROUP is heterogeneous and return true to end analysis ++ for this group. */ ++ ++static bool ++check_uncertain (vec *group, unsigned group_num, ++ unsigned iteration, int &profit, ++ vec &tmp_stmts, isomer_stmt_lhs &isomer_lhs, ++ vec &hetero_lhs, vec &merged_groups) ++{ ++ unsigned status = check_isomorphic_rhs (group, tmp_stmts, isomer_lhs); ++ bool done = false; ++ switch (status) ++ { ++ case UNCERTAIN: ++ update_uncertain_stmts (group, group_num, iteration, tmp_stmts); ++ break; ++ case ISOMORPHIC: ++ update_isomer_lhs (group, group_num, iteration, isomer_lhs, ++ tmp_stmts, profit, merged_groups); ++ break; ++ default: ++ set_hetero (group, hetero_lhs, tmp_stmts); ++ done = true; ++ } ++ return done; ++} ++ ++/* Return false if analysis of this group is not finished, e.g., isomorphic or ++ uncertain. Calculate the profit if vectorized. */ ++ ++static bool ++check_group (vec *group, unsigned group_num, unsigned iteration, ++ int &profit, vec &merged_groups, ++ isomer_stmt_lhs &isomer_lhs, vec &hetero_lhs) ++{ ++ unsigned j = 0; ++ group_info elmt = NULL; ++ gimple *first = NULL; ++ unsigned res = 0; ++ /* Record single use stmts in TMP_STMTS and decide whether replace stmts in ++ ginfo in succeeding processes. */ ++ auto_vec tmp_stmts; ++ FOR_EACH_VEC_ELT (*group, j, elmt) ++ { ++ if (merged_groups.contains (group_num)) ++ return true; ++ res |= check_use_stmt (elmt, first, tmp_stmts, hetero_lhs); ++ } ++ ++ /* Update each group member according to RES. */ ++ switch (res) ++ { ++ case ISOMORPHIC: ++ update_isomer_lhs (group, group_num, iteration, isomer_lhs, ++ tmp_stmts, profit, merged_groups); ++ return false; ++ case UNCERTAIN: ++ return check_uncertain (group, group_num, iteration, profit, ++ tmp_stmts, isomer_lhs, hetero_lhs, ++ merged_groups); ++ default: ++ set_hetero (group, hetero_lhs, tmp_stmts); ++ return true; ++ } ++} ++ ++/* Return true if all analysises are done except uncertain groups. */ ++ ++static bool ++end_of_search (vec *> &circular_queue, ++ vec &merged_groups) ++{ ++ unsigned i = 0; ++ vec *group = NULL; ++ group_info elmt = NULL; ++ FOR_EACH_VEC_ELT (circular_queue, i, group) ++ { ++ if (merged_groups.contains (i)) ++ continue; ++ elmt = (*group)[0]; ++ /* If there is any isomorphic use_stmts, continue analysis of isomorphic ++ use_stmts. */ ++ if (!elmt->done && !elmt->uncertain) ++ return false; ++ } ++ return true; ++} ++ ++/* Push valid stmts to STMTS as cutpoints. */ ++ ++static bool ++check_any_cutpoints (vec *> &circular_queue, ++ vec *&stmts, vec &merged_groups) ++{ ++ unsigned front = 0; ++ vec *group = NULL; ++ group_info elmt = NULL; ++ unsigned max = circular_queue.length () * circular_queue[0]->length (); ++ vec_alloc (stmts, max); ++ while (front < circular_queue.length ()) ++ { ++ unsigned i = 0; ++ if (merged_groups.contains (front)) ++ { ++ front++; ++ continue; ++ } ++ group = circular_queue[front++]; ++ FOR_EACH_VEC_ELT (*group, i, elmt) ++ if (elmt->stmt != NULL && elmt->done && elmt->cut_point) ++ stmts->safe_push (elmt->stmt); ++ } ++ return stmts->length () != 0; ++} ++ ++/* Grouped loads are isomorphic. Make pair for group number and iteration, ++ map load stmt to this pair. We set iteration 0 here. */ ++ ++static void ++init_isomer_lhs (vec *> &groups, isomer_stmt_lhs &isomer_lhs) ++{ ++ vec *group = NULL; ++ group_info elmt = NULL; ++ unsigned i = 0; ++ FOR_EACH_VEC_ELT (groups, i, group) ++ { ++ unsigned j = 0; ++ FOR_EACH_VEC_ELT (*group, j, elmt) ++ isomer_lhs[gimple_assign_lhs (elmt->stmt)] = std::make_pair (i, 0); ++ } ++} ++ ++/* It's not a strict analysis of load/store profit. Assume scalar and vector ++ load/store are of the same cost. The result PROFIT equals profit form ++ vectorizing of scalar loads/stores minus cost of a vectorized load/store. */ ++ ++static int ++load_store_profit (unsigned scalar_mem_ops, unsigned vf, unsigned new_mem_ops) ++{ ++ int profit = 0; ++ enum vect_cost_for_stmt kind = scalar_load; ++ int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0); ++ profit += (scalar_mem_ops - (scalar_mem_ops / vf)) * scalar_cost; ++ profit -= new_mem_ops / vf * scalar_cost; ++ kind = scalar_store; ++ scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0); ++ profit -= new_mem_ops / vf * scalar_cost; ++ return profit; ++} ++ ++/* Breadth first search the graph consisting of define-use chain starting from ++ the circular queue initialized by function BUILD_QUEUE. Find single use of ++ each stmt in group and check if they are isomorphic. Isomorphic is defined ++ as same rhs type, same operator, and isomorphic calculation of each rhs ++ starting from load. If another rhs is uncertain to be isomorphic, put it ++ at the end of circular queue and re-analyze it during the next iteration. ++ If a group shares the same use_stmt with another group, skip one of them in ++ succeedor prcoesses as merged. Iterate the circular queue until all ++ remianing groups heterogeneous or reaches MEN stmts. If all other groups ++ have finishes the analysis, and the remaining groups are uncertain, ++ return false to avoid endless loop. */ ++ ++bool ++bfs_find_isomer_stmts (vec *> &circular_queue, ++ stmts_profit &profit_pair, unsigned vf, ++ bool &reach_vdef) ++{ ++ isomer_stmt_lhs isomer_lhs; ++ auto_vec hetero_lhs; ++ auto_vec merged_groups; ++ vec *group = NULL; ++ /* True if analysis finishes. */ ++ bool done = false; ++ int profit_sum = 0; ++ vec *stmts = NULL; ++ init_isomer_lhs (circular_queue, isomer_lhs); ++ for (unsigned i = 1; !done; ++i) ++ { ++ unsigned front = 0; ++ /* Re-initialize DONE to TRUE while a new iteration begins. */ ++ done = true; ++ while (front < circular_queue.length ()) ++ { ++ int profit = 0; ++ group = circular_queue[front]; ++ done &= check_group (group, front, i, profit, merged_groups, ++ isomer_lhs, hetero_lhs); ++ profit_sum += profit; ++ if (profit != 0 && (*group)[0]->stmt == NULL) ++ { ++ reach_vdef = true; ++ return false; ++ } ++ ++front; ++ } ++ /* Uncertain result, return. */ ++ if (!done && end_of_search (circular_queue, merged_groups)) ++ return false; ++ } ++ if (check_any_cutpoints (circular_queue, stmts, merged_groups)) ++ { ++ profit_pair.first = stmts; ++ unsigned loads = circular_queue.length () * circular_queue[0]->length (); ++ profit_pair.second = profit_sum + load_store_profit (loads, vf, ++ stmts->length ()); ++ if (profit_pair.second > 0) ++ return true; ++ } ++ return false; ++} ++ ++/* Free memory allocated by ginfo. */ ++ ++static void ++free_ginfos (vec *> &worklists) ++{ ++ vec *worklist; ++ unsigned i = 0; ++ while (i < worklists.length ()) ++ { ++ worklist = worklists[i++]; ++ group_info ginfo; ++ unsigned j = 0; ++ FOR_EACH_VEC_ELT (*worklist, j, ginfo) ++ delete ginfo; ++ } ++} ++ ++static void ++release_tmp_stmts (vf_stmts_profit_map &candi_stmts) ++{ ++ vf_stmts_profit_map::iterator iter; ++ for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter) ++ iter->second.first->release (); ++} ++ ++/* Choose the group of stmt with maximun profit. */ ++ ++static bool ++decide_stmts_by_profit (vf_stmts_profit_map &candi_stmts, vec &stmts) ++{ ++ vf_stmts_profit_map::iterator iter; ++ int profit = 0; ++ int max = 0; ++ vec *tmp = NULL; ++ for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter) ++ { ++ profit = iter->second.second; ++ if (profit > max) ++ { ++ tmp = iter->second.first; ++ max = profit; ++ } ++ } ++ if (max == 0) ++ { ++ release_tmp_stmts (candi_stmts); ++ return false; ++ } ++ unsigned i = 0; ++ gimple *stmt = NULL; ++ FOR_EACH_VEC_ELT (*tmp, i, stmt) ++ stmts.safe_push (stmt); ++ release_tmp_stmts (candi_stmts); ++ return stmts.length () != 0; ++} ++ ++/* Find isomorphic stmts from grouped loads with vector factor VF. ++ ++ Given source code as follows and ignore casting. ++ ++ a0 = (a[0] + b[0]) + ((a[4] - b[4]) << 16); ++ a1 = (a[1] + b[1]) + ((a[5] - b[5]) << 16); ++ a2 = (a[2] + b[2]) + ((a[6] - b[6]) << 16); ++ a3 = (a[3] + b[3]) + ((a[7] - b[7]) << 16); ++ ++ We get grouped loads in VINFO as ++ ++ GROUP_1 GROUP_2 ++ _1 = *a _11 = *b ++ _2 = *(a + 1) _12 = *(b + 1) ++ _3 = *(a + 2) _13 = *(b + 2) ++ _4 = *(a + 3) _14 = *(b + 3) ++ _5 = *(a + 4) _15 = *(b + 4) ++ _6 = *(a + 5) _16 = *(b + 5) ++ _7 = *(a + 6) _17 = *(b + 6) ++ _8 = *(a + 7) _18 = *(b + 7) ++ ++ First we try VF = 8, we get two worklists ++ ++ WORKLIST_1 WORKLIST_2 ++ _1 = *a _11 = *b ++ _2 = *(a + 1) _12 = *(b + 1) ++ _3 = *(a + 2) _13 = *(b + 2) ++ _4 = *(a + 3) _14 = *(b + 3) ++ _5 = *(a + 4) _15 = *(b + 4) ++ _6 = *(a + 5) _16 = *(b + 5) ++ _7 = *(a + 6) _17 = *(b + 6) ++ _8 = *(a + 7) _18 = *(b + 7) ++ ++ We find _111 = _1 + _11 and _115 = _5 - _15 are not isomorphic, ++ so we try VF = VF / 2. ++ ++ GROUP_1 GROUP_2 ++ _1 = *a _5 = *(a + 4) ++ _2 = *(a + 1) _6 = *(a + 5) ++ _3 = *(a + 2) _7 = *(a + 6) ++ _4 = *(a + 3) _8 = *(a + 7) ++ ++ GROUP_3 GROUP_4 ++ _11 = *b _15 = *(b + 4) ++ _12 = *(b + 1) _16 = *(b + 5) ++ _13 = *(b + 2) _17 = *(b + 6) ++ _14 = *(b + 3) _18 = *(b + 7) ++ ++ We first analyze group_1, and find all operations are isomorphic, then ++ replace stmts in group_1 with their use_stmts. Group_2 as well. ++ ++ GROUP_1 GROUP_2 ++ _111 = _1 + _11 _115 = _5 - _15 ++ _112 = _2 + _12 _116 = _6 - _16 ++ _113 = _3 + _13 _117 = _7 - _17 ++ _114 = _4 + _14 _118 = _8 - _18 ++ ++ When analyzing group_3 and group_4, we find their use_stmts are the same ++ as group_1 and group_2. So group_3 is regarded as being merged to group_1 ++ and group_4 being merged to group_2. In future procedures, we will skip ++ group_3 and group_4. ++ ++ We repeat such processing until opreations are not isomorphic or searching ++ reaches MEM stmts. In our given case, searching end up at a0, a1, a2 and ++ a3. */ ++ ++static bool ++find_isomorphic_stmts (loop_vec_info vinfo, vec &stmts) ++{ ++ unsigned vf = get_max_vf (vinfo); ++ if (vf == 0) ++ return false; ++ auto_vec *> circular_queue; ++ /* Map of vector factor and corresponding vectorizing profit. */ ++ stmts_profit profit_map; ++ /* Map of cut_points and vector factor. */ ++ vf_stmts_profit_map candi_stmts; ++ bool reach_vdef = false; ++ while (vf > 2) ++ { ++ if (build_queue (vinfo, vf, circular_queue) == 0) ++ return false; ++ if (!bfs_find_isomer_stmts (circular_queue, profit_map, vf, reach_vdef)) ++ { ++ if (reach_vdef) ++ { ++ release_tmp_stmts (candi_stmts); ++ free_ginfos (circular_queue); ++ circular_queue.release (); ++ return false; ++ } ++ vf /= 2; ++ free_ginfos (circular_queue); ++ circular_queue.release (); ++ continue; ++ } ++ candi_stmts[vf] = profit_map; ++ free_ginfos (circular_queue); ++ vf /= 2; ++ circular_queue.release (); ++ } ++ return decide_stmts_by_profit (candi_stmts, stmts); ++} ++ + /* Distributes the code from LOOP in such a way that producer statements + are placed before consumer statements. Tries to separate only the + statements from STMTS into separate loops. Returns the number of +diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c +index 7990e31de..1e332d3c5 100644 +--- a/gcc/tree-vect-loop.c ++++ b/gcc/tree-vect-loop.c +@@ -2516,9 +2516,11 @@ vect_reanalyze_as_main_loop (loop_vec_info loop_vinfo, unsigned int *n_stmts) + + Apply a set of analyses on LOOP, and create a loop_vec_info struct + for it. The different analyses will record information in the +- loop_vec_info struct. */ ++ loop_vec_info struct. When RESULT_ONLY_P is true, quit analysis ++ if loop is vectorizable, otherwise, do not delete vinfo.*/ + opt_loop_vec_info +-vect_analyze_loop (class loop *loop, vec_info_shared *shared) ++vect_analyze_loop (class loop *loop, vec_info_shared *shared, ++ bool result_only_p) + { + auto_vector_modes vector_modes; + +@@ -2545,6 +2547,8 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) + unsigned n_stmts = 0; + machine_mode autodetected_vector_mode = VOIDmode; + opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL); ++ /* Loop_vinfo for loop-distribution pass. */ ++ opt_loop_vec_info fail_loop_vinfo = opt_loop_vec_info::success (NULL); + machine_mode next_vector_mode = VOIDmode; + poly_uint64 lowest_th = 0; + unsigned vectorized_loops = 0; +@@ -2633,6 +2637,13 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) + if (res) + { + LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; ++ /* In loop-distribution pass, we only need to get loop_vinfo, do not ++ conduct further operations. */ ++ if (result_only_p) ++ { ++ loop->aux = (loop_vec_info) loop_vinfo; ++ return loop_vinfo; ++ } + vectorized_loops++; + + /* Once we hit the desired simdlen for the first time, +@@ -2724,7 +2735,19 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) + } + else + { +- delete loop_vinfo; ++ /* If current analysis shows LOOP is unable to vectorize, loop_vinfo ++ will be deleted. If LOOP is under ldist analysis, backup it before ++ it is deleted and return it if all modes are analyzed and still ++ fail to vectorize. */ ++ if (result_only_p && (mode_i == vector_modes.length () ++ || autodetected_vector_mode == VOIDmode)) ++ { ++ fail_loop_vinfo = loop_vinfo; ++ } ++ else ++ { ++ delete loop_vinfo; ++ } + if (fatal) + { + gcc_checking_assert (first_loop_vinfo == NULL); +@@ -2773,6 +2796,14 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) + return first_loop_vinfo; + } + ++ /* Return loop_vinfo for ldist if loop is unvectorizable. */ ++ if (result_only_p && (mode_i == vector_modes.length () ++ || autodetected_vector_mode == VOIDmode)) ++ { ++ loop->aux = (loop_vec_info) fail_loop_vinfo; ++ return fail_loop_vinfo; ++ } ++ + return opt_loop_vec_info::propagate_failure (res); + } + +diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h +index 1c4a6c421..dc8175f00 100644 +--- a/gcc/tree-vectorizer.h ++++ b/gcc/tree-vectorizer.h +@@ -1896,7 +1896,8 @@ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, + enum tree_code); + extern bool needs_fold_left_reduction_p (tree, tree_code); + /* Drive for loop analysis stage. */ +-extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *); ++extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *, ++ bool result_only_p = false); + extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); + extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, + tree *, bool); +-- +2.27.0.windows.1 + diff --git a/0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch b/0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch new file mode 100644 index 0000000000000000000000000000000000000000..c004eacda34c3d397829bfec9f003a6b91891914 --- /dev/null +++ b/0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch @@ -0,0 +1,267 @@ +From 013544d0b477647c8835a8806c75e7b09155b8ed Mon Sep 17 00:00:00 2001 +From: benniaobufeijiushiji +Date: Mon, 8 Aug 2022 09:13:53 +0800 +Subject: [PATCH 31/35] [loop-vect] Transfer arrays using registers between + loops For vectorized stores in loop, if all succeed loops immediately use the + data, transfer data using registers instead of load store to prevent overhead + from memory access. + +--- + gcc/testsuite/gcc.dg/vect/vect-perm-1.c | 45 ++++++ + gcc/tree-vect-stmts.c | 181 ++++++++++++++++++++++++ + 2 files changed, 226 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/vect/vect-perm-1.c + +diff --git a/gcc/testsuite/gcc.dg/vect/vect-perm-1.c b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c +new file mode 100644 +index 000000000..d8b29fbd5 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-options "-O3 -fdump-tree-vect-all-details -save-temps" } */ ++ ++#include ++#include ++ ++static unsigned inline abs2 (unsigned a) ++{ ++ unsigned s = ((a>>15)&0x10001)*0xffff; ++ return (a+s)^s; ++} ++ ++int foo (unsigned *a00, unsigned *a11, unsigned *a22, unsigned *a33) ++{ ++ unsigned tmp[4][4]; ++ unsigned a0, a1, a2, a3; ++ int sum = 0; ++ for (int i = 0; i < 4; i++) ++ { ++ int t0 = a00[i] + a11[i]; ++ int t1 = a00[i] - a11[i]; ++ int t2 = a22[i] + a33[i]; ++ int t3 = a22[i] - a33[i]; ++ tmp[i][0] = t0 + t2; ++ tmp[i][2] = t0 - t2; ++ tmp[i][1] = t1 + t3; ++ tmp[i][3] = t1 - t3; ++ } ++ for (int i = 0; i < 4; i++) ++ { ++ int t0 = tmp[0][i] + tmp[1][i]; ++ int t1 = tmp[0][i] - tmp[1][i]; ++ int t2 = tmp[2][i] + tmp[3][i]; ++ int t3 = tmp[2][i] - tmp[3][i]; ++ a0 = t0 + t2; ++ a2 = t0 - t2; ++ a1 = t1 + t3; ++ a3 = t1 - t3; ++ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3); ++ } ++ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ ++/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 16 "vect" } } */ +diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +index 2c2197022..98b233718 100644 +--- a/gcc/tree-vect-stmts.c ++++ b/gcc/tree-vect-stmts.c +@@ -2276,6 +2276,173 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype) + return NULL_TREE; + } + ++/* Check succeedor BB, BB without load is regarded as empty BB. Ignore empty ++ BB in DFS. */ ++ ++static unsigned ++mem_refs_in_bb (basic_block bb, vec &stmts) ++{ ++ unsigned num = 0; ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); ++ !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple *stmt = gsi_stmt (gsi); ++ if (is_gimple_debug (stmt)) ++ continue; ++ if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt) ++ && !gimple_has_volatile_ops (stmt)) ++ { ++ if (gimple_assign_rhs_code (stmt) == MEM_REF ++ || gimple_assign_rhs_code (stmt) == ARRAY_REF) ++ { ++ stmts.safe_push (stmt); ++ num++; ++ } ++ else if (TREE_CODE (gimple_get_lhs (stmt)) == MEM_REF ++ || TREE_CODE (gimple_get_lhs (stmt)) == ARRAY_REF) ++ num++; ++ } ++ } ++ return num; ++} ++ ++static bool ++check_same_base (vec *datarefs, data_reference_p dr) ++{ ++ for (unsigned ui = 0; ui < datarefs->length (); ui++) ++ { ++ tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0); ++ tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)[ui]), 0); ++ if (TREE_CODE (op1) != TREE_CODE (op2)) ++ continue; ++ if (TREE_CODE (op1) == ADDR_EXPR) ++ { ++ op1 = TREE_OPERAND (op1, 0); ++ op2 = TREE_OPERAND (op2, 0); ++ } ++ enum tree_code code = TREE_CODE (op1); ++ switch (code) ++ { ++ case VAR_DECL: ++ if (DECL_NAME (op1) == DECL_NAME (op2) ++ && DR_IS_READ ((*datarefs)[ui])) ++ return true; ++ break; ++ case SSA_NAME: ++ if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2) ++ && DR_IS_READ ((*datarefs)[ui])) ++ return true; ++ break; ++ default: ++ break; ++ } ++ } ++ return false; ++} ++ ++/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return, ++ Otherwise, set false to SUCCESS. */ ++ ++static void ++check_vec_use (loop_vec_info loop_vinfo, vec &stmts, ++ stmt_vec_info stmt_info, bool &success) ++{ ++ if (stmt_info == NULL) ++ { ++ success = false; ++ return; ++ } ++ if (DR_IS_READ (stmt_info->dr_aux.dr)) ++ { ++ success = false; ++ return; ++ } ++ unsigned ui = 0; ++ gimple *candidate = NULL; ++ FOR_EACH_VEC_ELT (stmts, ui, candidate) ++ { ++ if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE) ++ continue; ++ ++ if (candidate->bb != candidate->bb->loop_father->header) ++ { ++ success = false; ++ return; ++ } ++ auto_vec datarefs; ++ tree res = find_data_references_in_bb (candidate->bb->loop_father, ++ candidate->bb, &datarefs); ++ if (res == chrec_dont_know) ++ { ++ success = false; ++ return; ++ } ++ if (check_same_base (&datarefs, stmt_info->dr_aux.dr)) ++ return; ++ } ++ success = false; ++} ++ ++/* Deep first search from present BB. If succeedor has load STMTS, ++ stop further searching. */ ++ ++static void ++dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, stmt_vec_info stmt_info, ++ bool &success, vec &visited_bbs) ++{ ++ if (bb == cfun->cfg->x_exit_block_ptr) ++ { ++ success = false; ++ return; ++ } ++ if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch) ++ return; ++ ++ visited_bbs.safe_push (bb); ++ auto_vec stmts; ++ unsigned num = mem_refs_in_bb (bb, stmts); ++ /* Empty BB. */ ++ if (num == 0) ++ { ++ edge e; ++ edge_iterator ei; ++ FOR_EACH_EDGE (e, ei, bb->succs) ++ { ++ dfs_check_bb (loop_vinfo, e->dest, stmt_info, success, visited_bbs); ++ if (!success) ++ return; ++ } ++ return; ++ } ++ /* Non-empty BB. */ ++ check_vec_use (loop_vinfo, stmts, stmt_info, success); ++} ++ ++/* For grouped store, if all succeedors of present BB have vectorized load ++ from same base of store. If so, set memory_access_type using ++ VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES. */ ++ ++static bool ++conti_perm (stmt_vec_info stmt_vinfo, loop_vec_info loop_vinfo) ++{ ++ gimple *stmt = stmt_vinfo->stmt; ++ if (gimple_code (stmt) != GIMPLE_ASSIGN) ++ return false; ++ ++ if (DR_IS_READ (stmt_vinfo->dr_aux.dr)) ++ return false; ++ ++ basic_block bb = stmt->bb; ++ bool success = true; ++ auto_vec visited_bbs; ++ visited_bbs.safe_push (bb); ++ edge e; ++ edge_iterator ei; ++ FOR_EACH_EDGE (e, ei, bb->succs) ++ dfs_check_bb (loop_vinfo, e->dest, stmt_vinfo, success, visited_bbs); ++ return success; ++} ++ + /* A subroutine of get_load_store_type, with a subset of the same + arguments. Handle the case where STMT_INFO is part of a grouped load + or store. +@@ -2434,6 +2601,20 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, + *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; + overrun_p = would_overrun_p; + } ++ ++ if (*memory_access_type == VMAT_LOAD_STORE_LANES ++ && TREE_CODE (loop_vinfo->num_iters) == INTEGER_CST ++ && maybe_eq (tree_to_shwi (loop_vinfo->num_iters), ++ loop_vinfo->vectorization_factor) ++ && conti_perm (stmt_info, loop_vinfo) ++ && (vls_type == VLS_LOAD ++ ? vect_grouped_load_supported (vectype, single_element_p, ++ group_size) ++ : vect_grouped_store_supported (vectype, group_size))) ++ { ++ *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; ++ overrun_p = would_overrun_p; ++ } + } + + /* As a last resort, trying using a gather load or scatter store. +-- +2.27.0.windows.1 + diff --git a/0080-Struct-Reorg-Add-Unsafe-Structure-Pointer-Compressio.patch b/0080-Struct-Reorg-Add-Unsafe-Structure-Pointer-Compressio.patch new file mode 100644 index 0000000000000000000000000000000000000000..ee51cb3e5fbc1f1c8906e813530b3777316abbf8 --- /dev/null +++ b/0080-Struct-Reorg-Add-Unsafe-Structure-Pointer-Compressio.patch @@ -0,0 +1,1061 @@ +From 7dc6940ba0f463137ff6cf98032d1e98edecde54 Mon Sep 17 00:00:00 2001 +From: liyancheng <412998149@qq.com> +Date: Fri, 25 Nov 2022 19:36:59 +0800 +Subject: [PATCH 32/35] [Struct Reorg] Add Unsafe Structure Pointer Compression + +Unsafe structure pointer compression allows some danger conversions to +achieve faster performance. +Add flag -fipa-struct-reorg=5 to enable unsafe structure pointer compression. +--- + gcc/common.opt | 6 +- + gcc/ipa-struct-reorg/ipa-struct-reorg.c | 309 +++++++++++++++--- + .../gcc.dg/struct/csr_skip_void_struct_name.c | 53 +++ + gcc/testsuite/gcc.dg/struct/pc_cast_int.c | 91 ++++++ + .../gcc.dg/struct/pc_compress_and_decomress.c | 90 +++++ + gcc/testsuite/gcc.dg/struct/pc_ptr2void.c | 87 +++++ + .../gcc.dg/struct/pc_simple_rewrite_pc.c | 112 +++++++ + .../gcc.dg/struct/pc_skip_void_struct_name.c | 53 +++ + gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 8 + + 9 files changed, 757 insertions(+), 52 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_skip_void_struct_name.c + create mode 100644 gcc/testsuite/gcc.dg/struct/pc_cast_int.c + create mode 100644 gcc/testsuite/gcc.dg/struct/pc_compress_and_decomress.c + create mode 100644 gcc/testsuite/gcc.dg/struct/pc_ptr2void.c + create mode 100644 gcc/testsuite/gcc.dg/struct/pc_simple_rewrite_pc.c + create mode 100644 gcc/testsuite/gcc.dg/struct/pc_skip_void_struct_name.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index c9b099817..384595f16 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1889,9 +1889,9 @@ Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization + Perform structure layout optimizations. + + fipa-struct-reorg= +-Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 4) +--fipa-struct-reorg=[0,1,2,3,4] adding none, struct-reorg, reorder-fields, +-dfe, safe-pointer-compression optimizations. ++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 5) ++-fipa-struct-reorg=[0,1,2,3,4,5] adding none, struct-reorg, reorder-fields, ++dfe, safe-pointer-compression, unsafe-pointer-compression optimizations. + + fipa-extend-auto-profile + Common Report Var(flag_ipa_extend_auto_profile) +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c +index 3550411dc..ee4893dfb 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c +@@ -264,7 +264,8 @@ enum struct_layout_opt_level + COMPLETE_STRUCT_RELAYOUT = 1 << 1, + STRUCT_REORDER_FIELDS = 1 << 2, + DEAD_FIELD_ELIMINATION = 1 << 3, +- POINTER_COMPRESSION_SAFE = 1 << 4 ++ POINTER_COMPRESSION_SAFE = 1 << 4, ++ POINTER_COMPRESSION_UNSAFE = 1 << 5 + }; + + /* Defines the target pointer size of compressed pointer, which should be 8, +@@ -1266,12 +1267,12 @@ csrtype::init_type_info (void) + + /* Close enough to pad to improve performance. + 33~63 should pad to 64 but 33~48 (first half) are too far away, and +- 65~127 should pad to 128 but 65~80 (first half) are too far away. */ ++ 65~127 should pad to 128 but 65~70 (first half) are too far away. */ + if (old_size > 48 && old_size < 64) + { + new_size = 64; + } +- if (old_size > 80 && old_size < 128) ++ if (old_size > 70 && old_size < 128) + { + new_size = 128; + } +@@ -1421,8 +1422,12 @@ public: + bool pc_candidate_tree_p (tree); + bool pc_type_conversion_candidate_p (tree); + bool pc_direct_rewrite_chance_p (tree, tree &); ++ bool pc_simplify_chance_for_compress_p (gassign *, tree); ++ bool compress_candidate_without_check (gimple_stmt_iterator *, tree, tree &); + bool compress_candidate_with_check (gimple_stmt_iterator *, tree, tree &); + bool compress_candidate (gassign *, gimple_stmt_iterator *, tree, tree &); ++ bool decompress_candidate_without_check (gimple_stmt_iterator *, ++ tree, tree, tree &, tree &); + bool decompress_candidate_with_check (gimple_stmt_iterator *, tree, tree &); + bool decompress_candidate (gimple_stmt_iterator *, tree, tree, tree &, + tree &); +@@ -1996,27 +2001,95 @@ ipa_struct_relayout::maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi, + { + return false; + } +- gsi_next (gsi); +- gimple *stmt2 = gsi_stmt (*gsi); +- +- if (gimple_code (stmt2) == GIMPLE_ASSIGN +- && gimple_assign_rhs_code (stmt2) == POINTER_PLUS_EXPR) ++ // Check uses. ++ imm_use_iterator imm_iter_lhs; ++ use_operand_p use_p_lhs; ++ FOR_EACH_IMM_USE_FAST (use_p_lhs, imm_iter_lhs, gimple_assign_lhs (stmt)) + { +- tree lhs = gimple_assign_lhs (stmt2); +- tree rhs1 = gimple_assign_rhs1 (stmt2); +- if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type) +- || types_compatible_p (inner_type (TREE_TYPE (lhs)), ctype.type)) ++ gimple *stmt2 = USE_STMT (use_p_lhs); ++ if (gimple_code (stmt2) != GIMPLE_ASSIGN) ++ continue; ++ if (gimple_assign_rhs_code (stmt2) == POINTER_PLUS_EXPR) ++ { ++ tree lhs = gimple_assign_lhs (stmt2); ++ tree rhs1 = gimple_assign_rhs1 (stmt2); ++ if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type) ++ || types_compatible_p (inner_type (TREE_TYPE (lhs)), ++ ctype.type)) ++ { ++ tree num = NULL; ++ if (is_result_of_mult (cst, &num, ++ TYPE_SIZE_UNIT (ctype.type))) ++ { ++ times = TREE_INT_CST_LOW (num); ++ return true; ++ } ++ } ++ } ++ // For pointer compression. ++ else if (gimple_assign_rhs_code (stmt2) == PLUS_EXPR) + { +- tree num = NULL; +- if (is_result_of_mult (cst, &num, TYPE_SIZE_UNIT (ctype.type))) ++ // Check uses. ++ imm_use_iterator imm_iter_cast; ++ use_operand_p use_p_cast; ++ FOR_EACH_IMM_USE_FAST (use_p_cast, imm_iter_cast, ++ gimple_assign_lhs (stmt2)) + { +- times = TREE_INT_CST_LOW (num); +- ret = true; ++ gimple *stmt_cast = USE_STMT (use_p_cast); ++ if (gimple_code (stmt_cast) != GIMPLE_ASSIGN) ++ continue; ++ if (gimple_assign_cast_p (stmt_cast)) ++ { ++ tree lhs_type = inner_type (TREE_TYPE ( ++ gimple_assign_lhs (stmt_cast))); ++ if (types_compatible_p (lhs_type, ctype.type)) ++ { ++ tree num = NULL; ++ if (is_result_of_mult (cst, &num, ++ TYPE_SIZE_UNIT (ctype.type))) ++ { ++ times = TREE_INT_CST_LOW (num); ++ return true; ++ } ++ } ++ } ++ } ++ } ++ } ++ } ++ // For pointer compression. ++ if (gimple_assign_rhs_code (stmt) == TRUNC_DIV_EXPR) ++ { ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ tree lhs = gimple_assign_lhs (stmt); ++ if (lhs == NULL_TREE) ++ return false; ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) ++ { ++ gimple *use_stmt = USE_STMT (use_p); ++ if (is_gimple_debug (use_stmt)) ++ continue; ++ if (gimple_code (use_stmt) != GIMPLE_ASSIGN) ++ continue; ++ if (gimple_assign_cast_p (use_stmt)) ++ { ++ tree lhs_type = inner_type (TREE_TYPE ( ++ gimple_assign_lhs (use_stmt))); ++ if (TYPE_UNSIGNED (lhs_type) ++ && TREE_CODE (lhs_type) == INTEGER_TYPE ++ && TYPE_PRECISION (lhs_type) == compressed_size) ++ { ++ tree num = NULL; ++ if (is_result_of_mult (cst, &num, ++ TYPE_SIZE_UNIT (ctype.type))) ++ { ++ times = TREE_INT_CST_LOW (num); ++ return true; ++ } + } + } + } +- gsi_prev (gsi); +- return ret; + } + return false; + } +@@ -3110,7 +3183,9 @@ ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) + e = escape_separate_instance; + } + +- if (e != does_not_escape) ++ if (e != does_not_escape ++ && (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT ++ || replace_type_map.get (type->type) == NULL)) + type->mark_escape (e, NULL); + } + +@@ -3793,7 +3868,9 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple + if (TREE_CODE (side) == SSA_NAME + && VOID_POINTER_P (TREE_TYPE (side))) + return; +- d->type->mark_escape (escape_cast_another_ptr, stmt); ++ if (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT ++ || replace_type_map.get (d->type->type) == NULL) ++ d->type->mark_escape (escape_cast_another_ptr, stmt); + return; + } + +@@ -3810,7 +3887,9 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple + else + { + /* *_1 = &MEM[(void *)&x + 8B]. */ +- type->mark_escape (escape_cast_another_ptr, stmt); ++ if (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT ++ || replace_type_map.get (type->type) == NULL) ++ type->mark_escape (escape_cast_another_ptr, stmt); + } + } + else if (type != d->type) +@@ -4550,7 +4629,9 @@ ipa_struct_reorg::check_definition_assign (srdecl *decl, vec &worklist) + /* Casts between pointers and integer are escaping. */ + if (gimple_assign_cast_p (stmt)) + { +- type->mark_escape (escape_cast_int, stmt); ++ if (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT ++ || replace_type_map.get (type->type) == NULL) ++ type->mark_escape (escape_cast_int, stmt); + return; + } + +@@ -4897,7 +4978,9 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, vec &worklist) + /* Casts between pointers and integer are escaping. */ + if (gimple_assign_cast_p (stmt)) + { +- type->mark_escape (escape_cast_int, stmt); ++ if (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT ++ || replace_type_map.get (type->type) == NULL) ++ type->mark_escape (escape_cast_int, stmt); + return; + } + +@@ -5566,9 +5649,9 @@ ipa_struct_reorg::prune_escaped_types (void) + + /* Prune types that escape, all references to those types + will have been removed in the above loops. */ +- /* The escape type is not deleted in STRUCT_LAYOUT_OPTIMIZE, +- Then the type that contains the escaped type fields +- can find complete information. */ ++ /* The escape type is not deleted in current_layout_opt_level after ++ STRUCT_REORDER_FIELDS, then the type that contains the ++ escaped type fields can find complete information. */ + if (current_layout_opt_level < STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < types.length ();) +@@ -6052,17 +6135,17 @@ ipa_struct_reorg::compress_ptr_to_offset (tree xhs, srtype *type, + tree step2 = gimplify_build2 (gsi, TRUNC_DIV_EXPR, long_unsigned_type_node, + step1, TYPE_SIZE_UNIT (type->newtype[0])); + +- /* Emit gimple _X3 = _X2 + 1. */ +- tree step3 = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, +- step2, build_one_cst (long_unsigned_type_node)); ++ /* Emit _X3 = (compressed_size) _X2. */ ++ tree pc_type = make_unsigned_type (compressed_size); ++ tree step3 = gimplify_build1 (gsi, NOP_EXPR, pc_type, step2); + +- /* Emit _X4 = (compressed_size) _X3. */ +- tree step4 = gimplify_build1 (gsi, NOP_EXPR, +- make_unsigned_type (compressed_size), step3); ++ /* Emit gimple _X4 = _X3 + 1. */ ++ tree step4 = gimplify_build2 (gsi, PLUS_EXPR, pc_type, step3, ++ build_one_cst (pc_type)); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- print_generic_expr (dump_file, step3); ++ print_generic_expr (dump_file, step4); + fprintf (dump_file, "\n"); + } + return step4; +@@ -6104,7 +6187,7 @@ ipa_struct_reorg::decompress_offset_to_ptr (tree xhs, srtype *type, + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- print_generic_expr (dump_file, step3); ++ print_generic_expr (dump_file, step4); + fprintf (dump_file, "\n"); + } + return step4; +@@ -6267,6 +6350,49 @@ ipa_struct_reorg::pc_direct_rewrite_chance_p (tree rhs, tree &new_rhs) + return false; + } + ++/* The following cases can simplify the checking of null pointer: ++ 1. rhs defined from POINTER_PLUS_EXPR. ++ 2. rhs used as COMPONENT_REF in this basic block. */ ++ ++bool ++ipa_struct_reorg::pc_simplify_chance_for_compress_p (gassign *stmt, ++ tree rhs) ++{ ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); ++ ++ if (def_stmt && is_gimple_assign (def_stmt) ++ && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR) ++ return true; ++ ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, rhs) ++ { ++ gimple *use_stmt = USE_STMT (use_p); ++ if (use_stmt->bb != stmt->bb || !is_gimple_assign (use_stmt)) ++ continue; ++ ++ tree use_rhs = gimple_assign_rhs1 (use_stmt); ++ if (TREE_CODE (use_rhs) == COMPONENT_REF ++ && TREE_OPERAND (TREE_OPERAND (use_rhs, 0), 0) == rhs) ++ return true; ++ } ++ return false; ++} ++ ++/* Perform compression directly without checking null pointer. */ ++ ++bool ++ipa_struct_reorg::compress_candidate_without_check (gimple_stmt_iterator *gsi, ++ tree rhs, ++ tree &new_rhs) ++{ ++ srtype *type = get_compression_candidate_type (rhs); ++ gcc_assert (type != NULL); ++ new_rhs = compress_ptr_to_offset (new_rhs, type, gsi); ++ return true; ++} ++ + /* Perform pointer compression with check. The conversion will be as shown in + the following example: + Orig bb: +@@ -6368,6 +6494,9 @@ ipa_struct_reorg::compress_candidate (gassign *stmt, gimple_stmt_iterator *gsi, + { + if (pc_direct_rewrite_chance_p (rhs, new_rhs)) + return true; ++ else if (current_layout_opt_level & POINTER_COMPRESSION_UNSAFE ++ && pc_simplify_chance_for_compress_p (stmt, rhs)) ++ return compress_candidate_without_check (gsi, rhs, new_rhs); + + return compress_candidate_with_check (gsi, rhs, new_rhs); + } +@@ -6430,6 +6559,79 @@ ipa_struct_reorg::create_bb_for_decompress_candidate (basic_block last_bb, + return new_bb; + } + ++/* Try decompress candidate without check. */ ++ ++bool ++ipa_struct_reorg::decompress_candidate_without_check (gimple_stmt_iterator *gsi, ++ tree lhs, tree rhs, ++ tree &new_lhs, ++ tree &new_rhs) ++{ ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ bool processed = false; ++ ++ if (!gsi_one_before_end_p (*gsi)) ++ { ++ gsi_next (gsi); ++ gimple *next_stmt = gsi_stmt (*gsi); ++ if (gimple_assign_rhs_class (next_stmt) == GIMPLE_SINGLE_RHS) ++ { ++ tree next_rhs = gimple_assign_rhs1 (next_stmt); ++ /* If current lhs is used as rhs in the next stmt: ++ -> _1 = t->s ++ tt->s = _1. */ ++ if (lhs == next_rhs) ++ { ++ /* Check whether: ++ 1. the lhs is only used in the next stmt. ++ 2. the next lhs is candidate type. */ ++ if (has_single_use (lhs) ++ && pc_candidate_tree_p (gimple_assign_lhs (next_stmt))) ++ { ++ processed = true; ++ /* Copy directly without conversion after update type. */ ++ TREE_TYPE (new_lhs) ++ = make_unsigned_type (compressed_size); ++ } ++ } ++ /* -> _1 = t->s ++ _2 = _1->s ++ In this case, _1 might not be nullptr, so decompress it without ++ check. */ ++ else if (TREE_CODE (next_rhs) == COMPONENT_REF) ++ { ++ tree use_base = TREE_OPERAND (TREE_OPERAND (next_rhs, 0), 0); ++ if (use_base == lhs) ++ { ++ srtype *type = get_compression_candidate_type (rhs); ++ gcc_assert (type != NULL); ++ gsi_prev (gsi); ++ tree new_ref = NULL_TREE; ++ if (TREE_CODE (new_rhs) == MEM_REF) ++ new_ref = new_rhs; ++ else ++ { ++ tree base = TREE_OPERAND (TREE_OPERAND (new_rhs, 0), 0); ++ tree new_mem_ref = build_simple_mem_ref (base); ++ new_ref = build3 (COMPONENT_REF, ++ TREE_TYPE (new_rhs), ++ new_mem_ref, ++ TREE_OPERAND (new_rhs, 1), ++ NULL_TREE); ++ } ++ new_rhs = decompress_offset_to_ptr (new_ref, type, gsi); ++ processed = true; ++ gsi_next (gsi); ++ } ++ } ++ } ++ gsi_prev (gsi); ++ return processed; ++ } ++ return false; ++} ++ + /* Perform pointer decompression with check. The conversion will be as shown + in the following example: + Orig bb: +@@ -6532,7 +6734,10 @@ ipa_struct_reorg::decompress_candidate (gimple_stmt_iterator *gsi, + tree lhs, tree rhs, tree &new_lhs, + tree &new_rhs) + { +- // TODO: simplifiy check and rewrite will be pushed in next PR. ++ if (current_layout_opt_level & POINTER_COMPRESSION_UNSAFE ++ && decompress_candidate_without_check (gsi, lhs, rhs, new_lhs, new_rhs)) ++ return true; ++ + return decompress_candidate_with_check (gsi, rhs, new_rhs); + } + +@@ -7551,18 +7756,26 @@ ipa_struct_reorg::check_and_prune_struct_for_pointer_compression (void) + " skip compression.\n"); + continue; + } +- if ((current_layout_opt_level & POINTER_COMPRESSION_SAFE) +- && !type->has_legal_alloc_num) ++ if (!type->has_legal_alloc_num) + { +- if (dump_file) +- fprintf (dump_file, " has illegal struct array size," +- " skip compression.\n"); +- continue; ++ if (current_layout_opt_level & POINTER_COMPRESSION_UNSAFE) ++ { ++ if (dump_file) ++ fprintf (dump_file, " has unknown alloc size, but" ++ " in unsafe mode, so"); ++ } ++ else ++ { ++ if (dump_file) ++ fprintf (dump_file, " has illegal struct array size," ++ " skip compression.\n"); ++ continue; ++ } + } + pc_transform_num++; + type->pc_candidate = true; + if (dump_file) +- fprintf (dump_file, " attemps to do pointer compression.\n"); ++ fprintf (dump_file, " attempts to do pointer compression.\n"); + } + + if (dump_file) +@@ -7584,14 +7797,10 @@ init_pointer_size_for_pointer_compression (void) + switch (param_pointer_compression_size) + { + case 8: +- compressed_size = 8; // sizeof (uint8) +- break; ++ // FALLTHRU + case 16: +- compressed_size = 16; // sizeof (uint16) +- break; +- case 32: +- compressed_size = 32; // sizeof (uint32) +- break; ++ // FALLTHRU ++ case 32: compressed_size = param_pointer_compression_size; break; + default: + error ("Invalid pointer compression size, using the following param: " + "\"--param pointer-compression-size=[8,16,32]\""); +@@ -7672,6 +7881,8 @@ public: + unsigned int level = 0; + switch (struct_layout_optimize_level) + { ++ case 5: level |= POINTER_COMPRESSION_UNSAFE; ++ // FALLTHRU + case 4: level |= POINTER_COMPRESSION_SAFE; + // FALLTHRU + case 3: level |= DEAD_FIELD_ELIMINATION; +diff --git a/gcc/testsuite/gcc.dg/struct/csr_skip_void_struct_name.c b/gcc/testsuite/gcc.dg/struct/csr_skip_void_struct_name.c +new file mode 100644 +index 000000000..c5e4968d9 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_skip_void_struct_name.c +@@ -0,0 +1,53 @@ ++// Structures without names should not be optimized ++/* { dg-do compile } */ ++#include ++#include ++ ++typedef struct ++{ ++ int a; ++ float b; ++ double s1; ++ double s2; ++ double s3; ++ double s4; ++ double s5; ++ double s6; ++ double s7; ++ double s8; ++} str_t1; ++ ++#define N 1000 ++ ++int num; ++ ++int ++main () ++{ ++ int i, r; ++ ++ r = rand (); ++ num = r > N ? N : r; ++ str_t1 *p1 = calloc (num, sizeof (str_t1)); ++ ++ if (p1 == NULL) ++ return 0; ++ ++ for (i = 0; i < num; i++) ++ p1[i].a = 1; ++ ++ for (i = 0; i < num; i++) ++ p1[i].b = 2; ++ ++ for (i = 0; i < num; i++) ++ if (p1[i].a != 1) ++ abort (); ++ ++ for (i = 0; i < num; i++) ++ if (fabsf (p1[i].b - 2) > 0.0001) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/pc_cast_int.c b/gcc/testsuite/gcc.dg/struct/pc_cast_int.c +new file mode 100644 +index 000000000..6f67fc556 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/pc_cast_int.c +@@ -0,0 +1,91 @@ ++// Escape cast int for pointer compression ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++ network_t* net_add; ++}; ++ ++ ++const int MAX = 100; ++network_t* net; ++node_p node; ++ ++int ++main () ++{ ++ net = (network_t*) calloc (1, sizeof(network_t)); ++ net->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ net->sorted_arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ net->nodes = (node_p) calloc (MAX, sizeof (node_t)); ++ net->arcs->id = 100; ++ ++ node = net->nodes; ++ node_p n1 = (node_p) 0x123456; ++ ++ for (unsigned i = 0; i < MAX; i++) ++ { ++ node->pred = n1; ++ node = node + 1; ++ } ++ ++ node = net->nodes; ++ ++ for (unsigned i = 0; i < MAX; i++) ++ { ++ if (node->pred != n1) ++ { ++ abort (); ++ } ++ node = node + 1; ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform in pointer compression" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/pc_compress_and_decomress.c b/gcc/testsuite/gcc.dg/struct/pc_compress_and_decomress.c +new file mode 100644 +index 000000000..d0b8d1afa +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/pc_compress_and_decomress.c +@@ -0,0 +1,90 @@ ++// Support basic pointer compression and decompression ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++ network_t* net_add; ++}; ++ ++ ++const int MAX = 100; ++network_t* net; ++node_p node; ++ ++int ++main () ++{ ++ net = (network_t*) calloc (1, sizeof(network_t)); ++ net->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ net->sorted_arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ net->nodes = (node_p) calloc (MAX, sizeof (node_t)); ++ net->arcs->id = 100; ++ ++ node = net->nodes; ++ ++ for (unsigned i = 0; i < MAX; i++) ++ { ++ node->pred = node; ++ node = node + 1; ++ } ++ ++ node = net->nodes; ++ ++ for (unsigned i = 0; i < MAX; i++) ++ { ++ if (node->pred != node) ++ { ++ abort (); ++ } ++ node = node + 1; ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in pointer compression is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/pc_ptr2void.c b/gcc/testsuite/gcc.dg/struct/pc_ptr2void.c +new file mode 100644 +index 000000000..5022c1967 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/pc_ptr2void.c +@@ -0,0 +1,87 @@ ++// Partially support escape_cast_void for pointer compression. ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs, sorted_arcs; ++ int x; ++ node_p nodes, stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++network_t* net = NULL; ++int cnt = 0; ++ ++__attribute__((noinline)) int ++primal_feasible (network_t *net) ++{ ++ void* stop; ++ node_t *node; ++ ++ node = net->nodes; ++ stop = (void *)net->stop_nodes; ++ for( node++; node < (node_t *)stop; node++ ) ++ { ++ net->x = 1; ++ printf( "PRIMAL NETWORK SIMPLEX: "); ++ } ++ return 0; ++} ++ ++int ++main () ++{ ++ net = (network_t*) calloc (1, 20); ++ net->nodes = calloc (MAX, sizeof (node_t)); ++ net->stop_nodes = net->nodes + MAX - 1; ++ cnt = primal_feasible( net ); ++ ++ net = (network_t*) calloc (1, 20); ++ if( !(net->arcs) ) ++ { ++ return -1; ++ } ++ return cnt; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in pointer compression is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/pc_simple_rewrite_pc.c b/gcc/testsuite/gcc.dg/struct/pc_simple_rewrite_pc.c +new file mode 100644 +index 000000000..98943c9b8 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/pc_simple_rewrite_pc.c +@@ -0,0 +1,112 @@ ++// Check simplify rewrite chance for pointer compression and decompression ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++ network_t* net_add; ++}; ++ ++ ++const int MAX = 100; ++network_t* net; ++node_p node; ++arc_p arc; ++ ++int ++main () ++{ ++ net = (network_t*) calloc (1, sizeof(network_t)); ++ net->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ net->sorted_arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ net->nodes = (node_p) calloc (MAX, sizeof (node_t)); ++ net->arcs->id = 100; ++ ++ node = net->nodes; ++ arc = net->arcs; ++ ++ for (unsigned i = 0; i < MAX; i++) ++ { ++ arc->head = node; ++ arc->head->child = node; ++ node->potential = i + 1; ++ arc->cost = arc->head->potential; ++ arc->tail = node->sibling; ++ if (i % 2) ++ node->pred = net->nodes + i; ++ else ++ node->pred = NULL; ++ ++ if (node->pred && node->pred->child != NULL) ++ node->number = 0; ++ else ++ node->number = 1; ++ ++ node = node + 1; ++ arc = arc + 1; ++ } ++ ++ node = net->nodes; ++ arc = net->arcs; ++ ++ for (unsigned i = 0; i < MAX; i++) ++ { ++ node_p t = i % 2 ? node : NULL; ++ int tt = i % 2 ? 0 : 1; ++ if (arc->head->pred != t || arc->cost == 0 ++ || arc->tail != node->sibling || node->number != tt) ++ { ++ abort (); ++ } ++ arc = arc + 1; ++ node = node + 1; ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in pointer compression is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/pc_skip_void_struct_name.c b/gcc/testsuite/gcc.dg/struct/pc_skip_void_struct_name.c +new file mode 100644 +index 000000000..a0e191267 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/pc_skip_void_struct_name.c +@@ -0,0 +1,53 @@ ++// Structures without names should not be optimized ++/* { dg-do compile } */ ++#include ++#include ++ ++typedef struct ++{ ++ int a; ++ float b; ++ double s1; ++ double s2; ++ double s3; ++ double s4; ++ double s5; ++ double s6; ++ double s7; ++ double s8; ++} str_t1; ++ ++#define N 1000 ++ ++int num; ++ ++int ++main () ++{ ++ int i, r; ++ ++ r = rand (); ++ num = r > N ? N : r; ++ str_t1 *p1 = calloc (num, sizeof (str_t1)); ++ ++ if (p1 == NULL) ++ return 0; ++ ++ for (i = 0; i < num; i++) ++ p1[i].a = 1; ++ ++ for (i = 0; i < num; i++) ++ p1[i].b = 2; ++ ++ for (i = 0; i < num; i++) ++ if (p1[i].a != 1) ++ abort (); ++ ++ for (i = 0; i < num; i++) ++ if (fabsf (p1[i].b - 2) > 0.0001) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform in pointer compression" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +index 2eebef768..d7367ed96 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp ++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +@@ -85,6 +85,14 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf*.c]] \ + gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dfe*.c]] \ + "" "-fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program" + ++# -fipa-struct-reorg=4 ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pc*.c]] \ ++ "" "-fipa-struct-reorg=4 -fdump-ipa-all -flto-partition=one -fwhole-program" ++ ++# -fipa-struct-reorg=5 ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pc*.c]] \ ++ "" "-fipa-struct-reorg=5 -fdump-ipa-all -flto-partition=one -fwhole-program" ++ + # All done. + torture-finish + dg-finish +-- +2.27.0.windows.1 + diff --git a/0081-Loop-distribution-Insert-temp-arrays-built-from-isom.patch b/0081-Loop-distribution-Insert-temp-arrays-built-from-isom.patch new file mode 100644 index 0000000000000000000000000000000000000000..2197b2fc467525adead4d2def3eebd2be9f80f5e --- /dev/null +++ b/0081-Loop-distribution-Insert-temp-arrays-built-from-isom.patch @@ -0,0 +1,826 @@ +From ca2a541ed3425bec64f97fe277c6c02bf4f20049 Mon Sep 17 00:00:00 2001 +From: benniaobufeijiushiji +Date: Thu, 27 Oct 2022 10:26:34 +0800 +Subject: [PATCH 33/35] [Loop-distribution] Insert temp arrays built from + isomorphic stmts Use option -ftree-slp-transpose-vectorize Build temp arrays + for isomorphic stmt and regard them as new seed_stmts for loop distribution. + +--- + gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c | 67 +++ + gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c | 17 + + gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c | 19 + + gcc/tree-loop-distribution.c | 577 +++++++++++++++++++- + 4 files changed, 663 insertions(+), 17 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c +new file mode 100644 +index 000000000..649463647 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c +@@ -0,0 +1,67 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-do run { target { aarch64*-*-linux* } } } */ ++/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details -save-temps" } */ ++ ++#include ++#include ++ ++static unsigned inline abs2 (unsigned a) ++{ ++ unsigned s = ((a>>15)&0x10001)*0xffff; ++ return (a+s)^s; ++} ++ ++int foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib) ++{ ++ unsigned tmp[4][4]; ++ unsigned a0, a1, a2, a3; ++ int sum = 0; ++ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib) ++ { ++ a0 = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16); ++ a1 = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16); ++ a2 = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16); ++ a3 = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16); ++ int t0 = a0 + a1; ++ int t1 = a0 - a1; ++ int t2 = a2 + a3; ++ int t3 = a2 - a3; ++ tmp[i][0] = t0 + t2; ++ tmp[i][2] = t0 - t2; ++ tmp[i][1] = t1 + t3; ++ tmp[i][3] = t1 - t3; ++ } ++ for (int i = 0; i < 4; i++) ++ { ++ int t0 = tmp[0][i] + tmp[1][i]; ++ int t1 = tmp[0][i] - tmp[1][i]; ++ int t2 = tmp[2][i] + tmp[3][i]; ++ int t3 = tmp[2][i] - tmp[3][i]; ++ a0 = t0 + t2; ++ a2 = t0 - t2; ++ a1 = t1 + t3; ++ a3 = t1 - t3; ++ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3); ++ } ++ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1; ++} ++ ++int main () ++{ ++ unsigned char oxa[128] = {0}; ++ unsigned char oxb[128] = {0}; ++ for (int i = 0; i < 128; i++) ++ { ++ oxa[i] += i * 3; ++ oxb[i] = i * 2; ++ } ++ int sum = foo (oxa, 16, oxb, 32); ++ if (sum != 736) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */ ++/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c +new file mode 100644 +index 000000000..1b50fd27d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */ ++ ++unsigned a0[4], a1[4], a2[4], a3[4]; ++ ++void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib) ++{ ++ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib) ++ { ++ a0[i] = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16); ++ a1[i] = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16); ++ a2[i] = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16); ++ a3[i] = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16); ++ } ++} ++ ++/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c +new file mode 100644 +index 000000000..94b992b05 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */ ++ ++unsigned a0[4], a1[4], a2[4], a3[4]; ++ ++void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib) ++{ ++ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib) ++ { ++ a0[i] = ((oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16)) + 1; ++ a1[i] = ((oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16)) - 2; ++ a2[i] = ((oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16)) * 3; ++ a3[i] = ((oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16)) / 4; ++ } ++} ++ ++/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */ ++/* { dg-final { scan-tree-dump-times "Insertion removed" 1 "ldist" } } */ ++/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */ +\ No newline at end of file +diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c +index c08af6562..88b56379c 100644 +--- a/gcc/tree-loop-distribution.c ++++ b/gcc/tree-loop-distribution.c +@@ -36,6 +36,47 @@ along with GCC; see the file COPYING3. If not see + | D(I) = A(I-1)*E + |ENDDO + ++ If an unvectorizable loop has grouped loads, and calculations from grouped ++ loads are isomorphic, build temp arrays using stmts where isomorphic ++ calculations end. Afer distribution, the partition built from temp ++ arrays can be vectorized in pass SLP after loop unrolling. For example, ++ ++ |DO I = 1, N ++ | A = FOO (ARG_1); ++ | B = FOO (ARG_2); ++ | C = BAR_0 (A); ++ | D = BAR_1 (B); ++ |ENDDO ++ ++ is transformed to ++ ++ |DO I = 1, N ++ | J = FOO (ARG_1); ++ | K = FOO (ARG_2); ++ | X[I] = J; ++ | Y[I] = K; ++ | A = X[I]; ++ | B = Y[I]; ++ | C = BAR_0 (A); ++ | D = BAR_1 (B); ++ |ENDDO ++ ++ and is then distributed to ++ ++ |DO I = 1, N ++ | J = FOO (ARG_1); ++ | K = FOO (ARG_2); ++ | X[I] = J; ++ | Y[I] = K; ++ |ENDDO ++ ++ |DO I = 1, N ++ | A = X[I]; ++ | B = Y[I]; ++ | C = BAR_0 (A); ++ | D = BAR_1 (B); ++ |ENDDO ++ + Loop distribution is the dual of loop fusion. It separates statements + of a loop (or loop nest) into multiple loops (or loop nests) with the + same loop header. The major goal is to separate statements which may +@@ -44,7 +85,9 @@ along with GCC; see the file COPYING3. If not see + + 1) Seed partitions with specific type statements. For now we support + two types seed statements: statement defining variable used outside +- of loop; statement storing to memory. ++ of loop; statement storing to memory. Moreover, for unvectorizable ++ loops, we try to find isomorphic stmts from grouped load and build ++ temp arrays as new seed statements. + 2) Build reduced dependence graph (RDG) for loop to be distributed. + The vertices (RDG:V) model all statements in the loop and the edges + (RDG:E) model flow and control dependencies between statements. +@@ -643,7 +686,8 @@ class loop_distribution + /* Returns true when PARTITION1 and PARTITION2 access the same memory + object in RDG. */ + bool share_memory_accesses (struct graph *rdg, +- partition *partition1, partition *partition2); ++ partition *partition1, partition *partition2, ++ hash_set *excluded_arrays); + + /* For each seed statement in STARTING_STMTS, this function builds + partition for it by adding depended statements according to RDG. +@@ -686,8 +730,9 @@ class loop_distribution + + /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution. + ALIAS_DDRS contains ddrs which need runtime alias check. */ +- void finalize_partitions (class loop *loop, vec +- *partitions, vec *alias_ddrs); ++ void finalize_partitions (class loop *loop, ++ vec *partitions, ++ vec *alias_ddrs, bitmap producers); + + /* Analyze loop form and if it's vectorizable to decide if we need to + insert temp arrays to distribute it. */ +@@ -701,6 +746,28 @@ class loop_distribution + + inline void rebuild_rdg (loop_p loop, struct graph *&rdg, + control_dependences *cd); ++ ++ /* If loop is not distributed, remove inserted temp arrays. */ ++ void remove_insertion (loop_p loop, struct graph *flow_only_rdg, ++ bitmap producers, struct partition *partition); ++ ++ /* Insert temp arrays if isomorphic computation exists. Temp arrays will be ++ regarded as SEED_STMTS for building partitions in succeeding processes. */ ++ bool insert_temp_arrays (loop_p loop, vec seed_stmts, ++ hash_set *tmp_array_vars, bitmap producers); ++ ++ void build_producers (loop_p loop, bitmap producers, ++ vec &transformed); ++ ++ void do_insertion (loop_p loop, struct graph *flow_only_rdg, tree iv, ++ bitmap cut_points, hash_set *tmp_array_vars, ++ bitmap producers); ++ ++ /* Fuse PARTITIONS built from inserted temp arrays into one partition, ++ fuse the rest into another. */ ++ void merge_remaining_partitions (vec *partitions, ++ bitmap producers); ++ + /* Distributes the code from LOOP in such a way that producer statements + are placed before consumer statements. Tries to separate only the + statements from STMTS into separate loops. Returns the number of +@@ -1913,7 +1980,8 @@ loop_distribution::classify_partition (loop_p loop, + + bool + loop_distribution::share_memory_accesses (struct graph *rdg, +- partition *partition1, partition *partition2) ++ partition *partition1, partition *partition2, ++ hash_set *excluded_arrays) + { + unsigned i, j; + bitmap_iterator bi, bj; +@@ -1947,7 +2015,10 @@ loop_distribution::share_memory_accesses (struct graph *rdg, + if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0) + && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0) + && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0) +- && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0)) ++ && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0) ++ /* An exception, if PARTITION1 and PARTITION2 contain the ++ temp array we inserted, do not merge them. */ ++ && !excluded_arrays->contains (DR_REF (dr1))) + return true; + } + } +@@ -2909,13 +2980,47 @@ fuse_memset_builtins (vec *partitions) + } + } + ++void ++loop_distribution::merge_remaining_partitions ++ (vec *partitions, ++ bitmap producers) ++{ ++ struct partition *partition = NULL; ++ struct partition *p1 = NULL, *p2 = NULL; ++ for (unsigned i = 0; partitions->iterate (i, &partition); i++) ++ { ++ if (bitmap_intersect_p (producers, partition->stmts)) ++ { ++ if (p1 == NULL) ++ { ++ p1 = partition; ++ continue; ++ } ++ partition_merge_into (NULL, p1, partition, FUSE_FINALIZE); ++ } ++ else ++ { ++ if (p2 == NULL) ++ { ++ p2 = partition; ++ continue; ++ } ++ partition_merge_into (NULL, p2, partition, FUSE_FINALIZE); ++ } ++ partitions->unordered_remove (i); ++ partition_free (partition); ++ i--; ++ } ++} ++ + void + loop_distribution::finalize_partitions (class loop *loop, + vec *partitions, +- vec *alias_ddrs) ++ vec *alias_ddrs, ++ bitmap producers) + { + unsigned i; +- struct partition *partition, *a; ++ struct partition *partition; + + if (partitions->length () == 1 + || alias_ddrs->length () > 0) +@@ -2947,13 +3052,7 @@ loop_distribution::finalize_partitions (class loop *loop, + || (loop->inner == NULL + && i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin)) + { +- a = (*partitions)[0]; +- for (i = 1; partitions->iterate (i, &partition); ++i) +- { +- partition_merge_into (NULL, a, partition, FUSE_FINALIZE); +- partition_free (partition); +- } +- partitions->truncate (1); ++ merge_remaining_partitions (partitions, producers); + } + + /* Fuse memset builtins if possible. */ +@@ -3758,6 +3857,404 @@ find_isomorphic_stmts (loop_vec_info vinfo, vec &stmts) + return decide_stmts_by_profit (candi_stmts, stmts); + } + ++/* Get iv from SEED_STMTS and make sure each seed_stmt has only one iv as index ++ and all indices are the same. */ ++ ++static tree ++find_index (vec seed_stmts) ++{ ++ if (seed_stmts.length () == 0) ++ return NULL; ++ bool found_index = false; ++ tree index = NULL; ++ unsigned ui = 0; ++ for (ui = 0; ui < seed_stmts.length (); ui++) ++ { ++ if (!gimple_vdef (seed_stmts[ui])) ++ return NULL; ++ tree lhs = gimple_assign_lhs (seed_stmts[ui]); ++ unsigned num_index = 0; ++ while (TREE_CODE (lhs) == ARRAY_REF) ++ { ++ if (TREE_CODE (TREE_OPERAND (lhs, 1)) == SSA_NAME) ++ { ++ num_index++; ++ if (num_index > 1) ++ return NULL; ++ if (index == NULL) ++ { ++ index = TREE_OPERAND (lhs, 1); ++ found_index = true; ++ } ++ else if (index != TREE_OPERAND (lhs, 1)) ++ return NULL; ++ } ++ lhs = TREE_OPERAND (lhs, 0); ++ } ++ if (!found_index) ++ return NULL; ++ } ++ return index; ++} ++ ++/* Check if expression of phi is an increament of a const. */ ++ ++static void ++check_phi_inc (struct vertex *v_phi, struct graph *rdg, bool &found_inc) ++{ ++ struct graph_edge *e_phi; ++ for (e_phi = v_phi->succ; e_phi; e_phi = e_phi->succ_next) ++ { ++ struct vertex *v_inc = &(rdg->vertices[e_phi->dest]); ++ if (!is_gimple_assign (RDGV_STMT (v_inc)) ++ || gimple_expr_code (RDGV_STMT (v_inc)) != PLUS_EXPR) ++ continue; ++ tree rhs1 = gimple_assign_rhs1 (RDGV_STMT (v_inc)); ++ tree rhs2 = gimple_assign_rhs2 (RDGV_STMT (v_inc)); ++ if (!(integer_onep (rhs1) || integer_onep (rhs2))) ++ continue; ++ struct graph_edge *e_inc; ++ /* find cycle with only two vertices inc and phi: inc <--> phi. */ ++ bool found_cycle = false; ++ for (e_inc = v_inc->succ; e_inc; e_inc = e_inc->succ_next) ++ { ++ if (e_inc->dest == e_phi->src) ++ { ++ found_cycle = true; ++ break; ++ } ++ } ++ if (!found_cycle) ++ continue; ++ found_inc = true; ++ } ++} ++ ++/* Check if phi satisfies form like PHI <0, i>. */ ++ ++static inline bool ++iv_check_phi_stmt (gimple *phi_stmt) ++{ ++ return gimple_phi_num_args (phi_stmt) == 2 ++ && (integer_zerop (gimple_phi_arg_def (phi_stmt, 0)) ++ || integer_zerop (gimple_phi_arg_def (phi_stmt, 1))); ++} ++ ++/* Make sure the iteration varible is a phi. */ ++ ++static tree ++get_iv_from_seed (struct graph *flow_only_rdg, vec seed_stmts) ++{ ++ tree index = find_index (seed_stmts); ++ if (index == NULL) ++ return NULL; ++ for (int i = 0; i < flow_only_rdg->n_vertices; i++) ++ { ++ struct vertex *v = &(flow_only_rdg->vertices[i]); ++ if (RDGV_STMT (v) != seed_stmts[0]) ++ continue; ++ struct graph_edge *e; ++ bool found_phi = false; ++ for (e = v->pred; e; e = e->pred_next) ++ { ++ struct vertex *v_phi = &(flow_only_rdg->vertices[e->src]); ++ gimple *phi_stmt = RDGV_STMT (v_phi); ++ if (gimple_code (phi_stmt) != GIMPLE_PHI ++ || gimple_phi_result (phi_stmt) != index) ++ continue; ++ if (!iv_check_phi_stmt (phi_stmt)) ++ return NULL; ++ /* find inc expr in succ of phi. */ ++ bool found_inc = false; ++ check_phi_inc (v_phi, flow_only_rdg, found_inc); ++ if (!found_inc) ++ return NULL; ++ found_phi = true; ++ break; ++ } ++ if (!found_phi) ++ return NULL; ++ break; ++ } ++ return index; ++} ++ ++/* Do not distribute loop if vertexes in ROOT_MAP have antidependence with in ++ FLOW_ONLY_RDG. */ ++ ++static bool ++check_no_dependency (struct graph *flow_only_rdg, bitmap root_map) ++{ ++ bitmap_iterator bi; ++ unsigned ui; ++ auto_vec visited_nodes; ++ auto_bitmap visited_map; ++ EXECUTE_IF_SET_IN_BITMAP (root_map, 0, ui, bi) ++ visited_nodes.safe_push (ui); ++ for (ui = 0; ui < visited_nodes.length (); ui++) ++ { ++ struct vertex *v = &(flow_only_rdg->vertices[visited_nodes[ui]]); ++ struct graph_edge *e; ++ for (e = v->succ; e; e = e->succ_next) ++ { ++ if (bitmap_bit_p (root_map, e->dest)) ++ return false; ++ if (bitmap_bit_p (visited_map, e->dest)) ++ continue; ++ visited_nodes.safe_push (e->dest); ++ bitmap_set_bit (visited_map, e->dest); ++ } ++ } ++ return true; ++} ++ ++/* Find isomorphic stmts from GROUPED_LOADS in VINFO and make sure ++ there is no dependency among those STMT we found. */ ++ ++static unsigned ++get_cut_points (struct graph *flow_only_rdg, bitmap cut_points, ++ loop_vec_info vinfo) ++{ ++ unsigned n_stmts = 0; ++ ++ /* STMTS that may be CUT_POINTS. */ ++ auto_vec stmts; ++ if (!find_isomorphic_stmts (vinfo, stmts)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "No temp array insertion: no isomorphic stmts" ++ " were found.\n"); ++ return 0; ++ } ++ ++ for (int i = 0; i < flow_only_rdg->n_vertices; i++) ++ { ++ if (stmts.contains (RDG_STMT (flow_only_rdg, i))) ++ bitmap_set_bit (cut_points, i); ++ } ++ n_stmts = bitmap_count_bits (cut_points); ++ ++ bool succ = check_no_dependency (flow_only_rdg, cut_points); ++ if (!succ) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "No temp array inserted: data dependency" ++ " among isomorphic stmts.\n"); ++ return 0; ++ } ++ return n_stmts; ++} ++ ++static void ++build_temp_array (struct vertex *v, gimple_stmt_iterator &gsi, ++ poly_uint64 array_extent, tree iv, ++ hash_set *tmp_array_vars, vec *transformed) ++{ ++ gimple *stmt = RDGV_STMT (v); ++ tree lhs = gimple_assign_lhs (stmt); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "original stmt:\t"); ++ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS); ++ } ++ tree var_ssa = duplicate_ssa_name (lhs, stmt); ++ gimple_assign_set_lhs (stmt, var_ssa); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "changed to:\t"); ++ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS | TDF_MEMSYMS); ++ } ++ gimple_set_uid (gsi_stmt (gsi), -1); ++ tree vect_elt_type = TREE_TYPE (lhs); ++ tree array_type = build_array_type_nelts (vect_elt_type, array_extent); ++ tree array = create_tmp_var (array_type); ++ tree array_ssa = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL); ++ tmp_array_vars->add (array_ssa); ++ gimple *store = gimple_build_assign (array_ssa, var_ssa); ++ tree new_vdef = make_ssa_name (gimple_vop (cfun), store); ++ gsi_insert_after (&gsi, store, GSI_NEW_STMT); ++ gimple_set_vdef (store, new_vdef); ++ transformed->safe_push (store); ++ gimple_set_uid (gsi_stmt (gsi), -1); ++ tree array_ssa2 = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL); ++ tmp_array_vars->add (array_ssa2); ++ gimple *load = gimple_build_assign (lhs, array_ssa2); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "insert stmt:\t"); ++ print_gimple_stmt (dump_file, store, 0, TDF_VOPS|TDF_MEMSYMS); ++ fprintf (dump_file, " and stmt:\t"); ++ print_gimple_stmt (dump_file, load, 0, TDF_VOPS|TDF_MEMSYMS); ++ } ++ gimple_set_vuse (load, new_vdef); ++ gsi_insert_after (&gsi, load, GSI_NEW_STMT); ++ gimple_set_uid (gsi_stmt (gsi), -1); ++} ++ ++/* Set bitmap PRODUCERS based on vec TRANSFORMED. */ ++ ++void ++loop_distribution::build_producers (loop_p loop, bitmap producers, ++ vec &transformed) ++{ ++ auto_vec stmts; ++ stmts_from_loop (loop, &stmts); ++ int i = 0; ++ gimple *stmt = NULL; ++ ++ FOR_EACH_VEC_ELT (stmts, i, stmt) ++ gimple_set_uid (stmt, i); ++ i = 0; ++ FOR_EACH_VEC_ELT (transformed, i, stmt) ++ bitmap_set_bit (producers, stmt->uid); ++} ++ ++/* Transform stmt ++ ++ A = FOO (ARG_1); ++ ++ to ++ ++ STMT_1: A1 = FOO (ARG_1); ++ STMT_2: X[I] = A1; ++ STMT_3: A = X[I]; ++ ++ Producer is STMT_2 who defines the temp array and consumer is ++ STMT_3 who uses the temp array. */ ++ ++void ++loop_distribution::do_insertion (loop_p loop, struct graph *flow_only_rdg, ++ tree iv, bitmap cut_points, ++ hash_set *tmp_array_vars, ++ bitmap producers) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "=== do insertion ===\n"); ++ ++ auto_vec transformed; ++ ++ /* Execution times of loop. */ ++ poly_uint64 array_extent ++ = tree_to_poly_uint64 (number_of_latch_executions (loop)) + 1; ++ ++ basic_block *bbs = get_loop_body_in_custom_order (loop, this, ++ bb_top_order_cmp_r); ++ ++ for (int i = 0; i < int (loop->num_nodes); i++) ++ { ++ basic_block bb = bbs[i]; ++ ++ /* Find all cut points in bb and transform them. */ ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ unsigned j = gimple_uid (gsi_stmt (gsi)); ++ if (bitmap_bit_p (cut_points, j)) ++ { ++ struct vertex *v = &(flow_only_rdg->vertices[j]); ++ build_temp_array (v, gsi, array_extent, iv, tmp_array_vars, ++ &transformed); ++ } ++ } ++ } ++ build_producers (loop, producers, transformed); ++ update_ssa (TODO_update_ssa); ++ free (bbs); ++} ++ ++/* After temp array insertion, given stmts ++ STMT_1: M = FOO (ARG_1); ++ STMT_2: X[I] = M; ++ STMT_3: A = X[I]; ++ STMT_2 is the producer, STMT_1 is its prev and STMT_3 is its next. ++ Replace M with A, and remove STMT_2 and STMT_3. */ ++ ++static void ++reset_gimple_assign (struct graph *flow_only_rdg, struct partition *partition, ++ gimple_stmt_iterator &gsi, int j) ++{ ++ struct vertex *v = &(flow_only_rdg->vertices[j]); ++ gimple *stmt = RDGV_STMT (v); ++ gimple *prev = stmt->prev; ++ gimple *next = stmt->next; ++ tree n_lhs = gimple_assign_lhs (next); ++ gimple_assign_set_lhs (prev, n_lhs); ++ unlink_stmt_vdef (stmt); ++ if (partition) ++ bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi))); ++ gsi_remove (&gsi, true); ++ release_defs (stmt); ++ if (partition) ++ bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi))); ++ gsi_remove (&gsi, true); ++} ++ ++void ++loop_distribution::remove_insertion (loop_p loop, struct graph *flow_only_rdg, ++ bitmap producers, struct partition *partition) ++{ ++ basic_block *bbs = get_loop_body_in_custom_order (loop, this, ++ bb_top_order_cmp_r); ++ for (int i = 0; i < int (loop->num_nodes); i++) ++ { ++ basic_block bb = bbs[i]; ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ unsigned j = gimple_uid (gsi_stmt (gsi)); ++ if (bitmap_bit_p (producers, j)) ++ reset_gimple_assign (flow_only_rdg, partition, gsi, j); ++ } ++ } ++ update_ssa (TODO_update_ssa); ++ free (bbs); ++} ++ ++/* Insert temp arrays if isomorphic computation exists. Temp arrays will be ++ regarded as SEED_STMTS for building partitions in succeeding processes. */ ++ ++bool ++loop_distribution::insert_temp_arrays (loop_p loop, vec seed_stmts, ++ hash_set *tmp_array_vars, bitmap producers) ++{ ++ struct graph *flow_only_rdg = build_rdg (loop, NULL); ++ gcc_checking_assert (flow_only_rdg != NULL); ++ tree iv = get_iv_from_seed (flow_only_rdg, seed_stmts); ++ if (iv == NULL) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Loop %d no temp array insertion: failed to get" ++ " iteration variable.\n", loop->num); ++ free_rdg (flow_only_rdg); ++ return false; ++ } ++ auto_bitmap cut_points; ++ loop_vec_info vinfo = loop_vec_info_for_loop (loop); ++ unsigned n_cut_points = get_cut_points (flow_only_rdg, cut_points, vinfo); ++ delete vinfo; ++ loop->aux = NULL; ++ if (n_cut_points == 0) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Loop %d no temp array insertion: no cut points" ++ " found.\n", loop->num); ++ free_rdg (flow_only_rdg); ++ return false; ++ } ++ do_insertion (loop, flow_only_rdg, iv, cut_points, tmp_array_vars, producers); ++ if (dump_enabled_p ()) ++ { ++ dump_user_location_t loc = find_loop_location (loop); ++ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion done:" ++ " %d temp arrays inserted in Loop %d.\n", ++ n_cut_points, loop->num); ++ } ++ free_rdg (flow_only_rdg); ++ return true; ++} ++ ++static bool find_seed_stmts_for_distribution (class loop *, vec *); ++ + /* Distributes the code from LOOP in such a way that producer statements + are placed before consumer statements. Tries to separate only the + statements from STMTS into separate loops. Returns the number of +@@ -3814,6 +4311,34 @@ loop_distribution::distribute_loop (class loop *loop, vec stmts, + return 0; + } + ++ /* Try to distribute LOOP if LOOP is simple enough and unable to vectorize. ++ If LOOP has grouped loads, recursively find isomorphic stmts and insert ++ temp arrays, rebuild RDG and call find_seed_stmts_for_distribution ++ to replace STMTS. */ ++ ++ hash_set tmp_array_vars; ++ ++ /* STMTs that define those inserted TMP_ARRAYs. */ ++ auto_bitmap producers; ++ ++ /* New SEED_STMTS after insertion. */ ++ auto_vec work_list; ++ bool insert_success = false; ++ if (may_insert_temp_arrays (loop, rdg, cd)) ++ { ++ if (insert_temp_arrays (loop, stmts, &tmp_array_vars, producers)) ++ { ++ if (find_seed_stmts_for_distribution (loop, &work_list)) ++ { ++ insert_success = true; ++ stmts = work_list; ++ } ++ else ++ remove_insertion (loop, rdg, producers, NULL); ++ rebuild_rdg (loop, rdg, cd); ++ } ++ } ++ + data_reference_p dref; + for (i = 0; datarefs_vec.iterate (i, &dref); ++i) + dref->aux = (void *) (uintptr_t) i; +@@ -3894,7 +4419,7 @@ loop_distribution::distribute_loop (class loop *loop, vec stmts, + for (int j = i + 1; + partitions.iterate (j, &partition); ++j) + { +- if (share_memory_accesses (rdg, into, partition)) ++ if (share_memory_accesses (rdg, into, partition, &tmp_array_vars)) + { + partition_merge_into (rdg, into, partition, FUSE_SHARE_REF); + partitions.unordered_remove (j); +@@ -3944,7 +4469,7 @@ loop_distribution::distribute_loop (class loop *loop, vec stmts, + } + } + +- finalize_partitions (loop, &partitions, &alias_ddrs); ++ finalize_partitions (loop, &partitions, &alias_ddrs, producers); + + /* If there is a reduction in all partitions make sure the last one + is not classified for builtin code generation. */ +@@ -3962,6 +4487,24 @@ loop_distribution::distribute_loop (class loop *loop, vec stmts, + } + + nbp = partitions.length (); ++ ++ /* If we have inserted TMP_ARRAYs but there is only one partition left in ++ the succeeding processes, remove those inserted TMP_ARRAYs back to the ++ original version. */ ++ ++ if (nbp == 1 && insert_success) ++ { ++ struct partition *partition = NULL; ++ partitions.iterate (0, &partition); ++ remove_insertion (loop, rdg, producers, partition); ++ if (dump_enabled_p ()) ++ { ++ dump_user_location_t loc = find_loop_location (loop); ++ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion removed:" ++ " unable to distribute loop %d.\n", loop->num); ++ } ++ } ++ + if (nbp == 0 + || (nbp == 1 && !partition_builtin_p (partitions[0])) + || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) +-- +2.27.0.windows.1 + diff --git a/0082-Revert-Backport-tree-optimization-102880-make-PHI-OP.patch b/0082-Revert-Backport-tree-optimization-102880-make-PHI-OP.patch new file mode 100644 index 0000000000000000000000000000000000000000..3642252201ac4158c0c3a478bea412e9dc4b6dd3 --- /dev/null +++ b/0082-Revert-Backport-tree-optimization-102880-make-PHI-OP.patch @@ -0,0 +1,206 @@ +From 717782ec36469eb81650b07e8b5536281a59993d Mon Sep 17 00:00:00 2001 +From: zhongyunde +Date: Tue, 29 Nov 2022 22:12:29 +0800 +Subject: [PATCH 34/35] Revert "[Backport] tree-optimization/102880 - make + PHI-OPT recognize more CFGs" + +This reverts commit 77398954ce517aa011b7a254c7aa2858521b2093. +--- + gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c | 31 --------- + gcc/tree-ssa-phiopt.c | 73 +++++++++------------- + 2 files changed, 29 insertions(+), 75 deletions(-) + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c +deleted file mode 100644 +index 21aa66e38..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c ++++ /dev/null +@@ -1,31 +0,0 @@ +-/* { dg-do compile } */ +-/* { dg-options "-O -fgimple -fdump-tree-phiopt1" } */ +- +-int __GIMPLE (ssa,startwith("phiopt")) +-foo (int a, int b, int flag) +-{ +- int res; +- +- __BB(2): +- if (flag_2(D) != 0) +- goto __BB6; +- else +- goto __BB4; +- +- __BB(4): +- if (a_3(D) > b_4(D)) +- goto __BB7; +- else +- goto __BB6; +- +- __BB(6): +- goto __BB7; +- +- __BB(7): +- res_1 = __PHI (__BB4: a_3(D), __BB6: b_4(D)); +- return res_1; +-} +- +-/* We should be able to detect MAX despite the extra edge into +- the middle BB. */ +-/* { dg-final { scan-tree-dump "MAX" "phiopt1" } } */ +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 079d29e74..21ac08145 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -219,6 +219,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + + /* If either bb1's succ or bb2 or bb2's succ is non NULL. */ + if (EDGE_COUNT (bb1->succs) == 0 ++ || bb2 == NULL + || EDGE_COUNT (bb2->succs) == 0) + continue; + +@@ -278,14 +279,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + || (e1->flags & EDGE_FALLTHRU) == 0) + continue; + ++ /* Also make sure that bb1 only have one predecessor and that it ++ is bb. */ ++ if (!single_pred_p (bb1) ++ || single_pred (bb1) != bb) ++ continue; ++ + if (do_store_elim) + { +- /* Also make sure that bb1 only have one predecessor and that it +- is bb. */ +- if (!single_pred_p (bb1) +- || single_pred (bb1) != bb) +- continue; +- + /* bb1 is the middle block, bb2 the join block, bb the split block, + e1 the fallthrough edge from bb1 to bb2. We can't do the + optimization if the join block has more than two predecessors. */ +@@ -330,11 +331,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + node. */ + gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); + +- gphi *newphi; +- if (single_pred_p (bb1) +- && (newphi = factor_out_conditional_conversion (e1, e2, phi, +- arg0, arg1, +- cond_stmt))) ++ gphi *newphi = factor_out_conditional_conversion (e1, e2, phi, ++ arg0, arg1, ++ cond_stmt); ++ if (newphi != NULL) + { + phi = newphi; + /* factor_out_conditional_conversion may create a new PHI in +@@ -355,14 +355,12 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p +- && single_pred_p (bb1) + && cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2, + phi, arg0, arg1)) + cfgchanged = true; + else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +- else if (single_pred_p (bb1) +- && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) ++ else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + } + } +@@ -393,41 +391,35 @@ replace_phi_edge_with_variable (basic_block cond_block, + edge e, gphi *phi, tree new_tree) + { + basic_block bb = gimple_bb (phi); ++ basic_block block_to_remove; + gimple_stmt_iterator gsi; + + /* Change the PHI argument to new. */ + SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree); + + /* Remove the empty basic block. */ +- edge edge_to_remove; + if (EDGE_SUCC (cond_block, 0)->dest == bb) +- edge_to_remove = EDGE_SUCC (cond_block, 1); +- else +- edge_to_remove = EDGE_SUCC (cond_block, 0); +- if (EDGE_COUNT (edge_to_remove->dest->preds) == 1) + { +- e->flags |= EDGE_FALLTHRU; +- e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); +- e->probability = profile_probability::always (); +- delete_basic_block (edge_to_remove->dest); +- +- /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ +- gsi = gsi_last_bb (cond_block); +- gsi_remove (&gsi, true); ++ EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU; ++ EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); ++ EDGE_SUCC (cond_block, 0)->probability = profile_probability::always (); ++ ++ block_to_remove = EDGE_SUCC (cond_block, 1)->dest; + } + else + { +- /* If there are other edges into the middle block make +- CFG cleanup deal with the edge removal to avoid +- updating dominators here in a non-trivial way. */ +- gcond *cond = as_a (last_stmt (cond_block)); +- if (edge_to_remove->flags & EDGE_TRUE_VALUE) +- gimple_cond_make_false (cond); +- else +- gimple_cond_make_true (cond); ++ EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU; ++ EDGE_SUCC (cond_block, 1)->flags ++ &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); ++ EDGE_SUCC (cond_block, 1)->probability = profile_probability::always (); ++ ++ block_to_remove = EDGE_SUCC (cond_block, 0)->dest; + } ++ delete_basic_block (block_to_remove); + +- statistics_counter_event (cfun, "Replace PHI with variable", 1); ++ /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ ++ gsi = gsi_last_bb (cond_block); ++ gsi_remove (&gsi, true); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, +@@ -854,9 +846,6 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + allow it and move it once the transformation is done. */ + if (!empty_block_p (middle_bb)) + { +- if (!single_pred_p (middle_bb)) +- return false; +- + stmt_to_move = last_and_only_stmt (middle_bb); + if (!stmt_to_move) + return false; +@@ -1236,11 +1225,6 @@ value_replacement (basic_block cond_bb, basic_block middle_bb, + } + else + { +- if (!single_pred_p (middle_bb)) +- return 0; +- statistics_counter_event (cfun, "Replace PHI with " +- "variable/value_replacement", 1); +- + /* Replace the PHI arguments with arg. */ + SET_PHI_ARG_DEF (phi, e0->dest_idx, arg); + SET_PHI_ARG_DEF (phi, e1->dest_idx, arg); +@@ -1255,6 +1239,7 @@ value_replacement (basic_block cond_bb, basic_block middle_bb, + } + return 1; + } ++ + } + + /* Now optimize (x != 0) ? x + y : y to just x + y. */ +-- +2.27.0.windows.1 + diff --git a/0083-Struct-reorg-Add-struct-semi-relayout-optimize.patch b/0083-Struct-reorg-Add-struct-semi-relayout-optimize.patch new file mode 100644 index 0000000000000000000000000000000000000000..05581a4d8f95a29e442ff5ea3fc8cbd7708c936e --- /dev/null +++ b/0083-Struct-reorg-Add-struct-semi-relayout-optimize.patch @@ -0,0 +1,1297 @@ +From cebf7903906d0b530fce240b601591d6254ee53f Mon Sep 17 00:00:00 2001 +From: benniaobufeijiushiji +Date: Wed, 30 Nov 2022 22:42:35 +0800 +Subject: [PATCH 35/35] [Struct reorg] Add struct-semi-relayout optimize + +Add support for structs with multi-allocation which is escaped in +complete-relayout. +Add flag -fipa-struct-reorg=6 and parameter semi-relayout-level. +--- + gcc/common.opt | 7 +- + gcc/ipa-struct-reorg/ipa-struct-reorg.c | 916 +++++++++++++++++- + gcc/ipa-struct-reorg/ipa-struct-reorg.h | 8 + + gcc/params.opt | 4 + + .../gcc.dg/struct/semi_relayout_rewrite.c | 86 ++ + gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 4 + + 6 files changed, 992 insertions(+), 33 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index 384595f16..588e19400 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1889,9 +1889,10 @@ Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization + Perform structure layout optimizations. + + fipa-struct-reorg= +-Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 5) +--fipa-struct-reorg=[0,1,2,3,4,5] adding none, struct-reorg, reorder-fields, +-dfe, safe-pointer-compression, unsafe-pointer-compression optimizations. ++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 6) ++-fipa-struct-reorg=[0,1,2,3,4,5,6] adding none, struct-reorg, reorder-fields, ++dfe, safe-pointer-compression, unsafe-pointer-compression, semi-relayout ++optimizations. + + fipa-extend-auto-profile + Common Report Var(flag_ipa_extend_auto_profile) +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c +index ee4893dfb..4751711fe 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c +@@ -265,7 +265,8 @@ enum struct_layout_opt_level + STRUCT_REORDER_FIELDS = 1 << 2, + DEAD_FIELD_ELIMINATION = 1 << 3, + POINTER_COMPRESSION_SAFE = 1 << 4, +- POINTER_COMPRESSION_UNSAFE = 1 << 5 ++ POINTER_COMPRESSION_UNSAFE = 1 << 5, ++ SEMI_RELAYOUT = 1 << 6 + }; + + /* Defines the target pointer size of compressed pointer, which should be 8, +@@ -280,6 +281,7 @@ void get_base (tree &base, tree expr); + static unsigned int current_layout_opt_level; + + hash_map replace_type_map; ++hash_map semi_relayout_map; + + /* Return true if one of these types is created by struct-reorg. */ + +@@ -398,7 +400,9 @@ srtype::srtype (tree type) + visited (false), + pc_candidate (false), + has_legal_alloc_num (false), +- has_alloc_array (0) ++ has_alloc_array (0), ++ semi_relayout (false), ++ bucket_parts (0) + { + for (int i = 0; i < max_split; i++) + newtype[i] = NULL_TREE; +@@ -883,6 +887,66 @@ srfield::create_new_optimized_fields (tree newtype[max_split], + newfield[0] = field; + } + ++/* Given a struct s whose fields has already reordered by size, we try to ++ combine fields less than 8 bytes together to 8 bytes. Example: ++ struct s { ++ uint64_t a, ++ uint32_t b, ++ uint32_t c, ++ uint32_t d, ++ uint16_t e, ++ uint8_t f ++ } ++ ++ We allocate memory for arrays of struct S, before semi-relayout, their ++ layout in memory is shown as below: ++ [a,b,c,d,e,f,padding;a,b,c,d,e,f,padding;...] ++ ++ During semi-relayout, we put a number of structs into a same region called ++ bucket. The number is determined by param realyout-bucket-capacity-level. ++ Using 1024 here as example. After semi-relayout, the layout in a bucket is ++ shown as below: ++ part1 [a;a;a...] ++ part2 [b,c;b,c;b,c;...] ++ part3 [d,e,f,pad;d,e,f,pad;d,e,f,pad;...] ++ ++ In the last bucket, if the amount of rest structs is less than the capacity ++ of a bucket, the rest of allcated memory will be wasted as padding. */ ++ ++unsigned ++srtype::calculate_bucket_size () ++{ ++ unsigned parts = 0; ++ unsigned bit_sum = 0; ++ unsigned relayout_offset = 0; ++ /* Currently, limit each 8 bytes with less than 2 fields. */ ++ unsigned curr_part_num = 0; ++ unsigned field_num = 0; ++ for (tree f = TYPE_FIELDS (newtype[0]); f; f = DECL_CHAIN (f)) ++ { ++ unsigned size = TYPE_PRECISION (TREE_TYPE (f)); ++ bit_sum += size; ++ field_num++; ++ if (++curr_part_num > 2 || bit_sum > 64) ++ { ++ bit_sum = size; ++ parts++; ++ relayout_offset = relayout_part_size * parts; ++ curr_part_num = 1; ++ } ++ else ++ { ++ relayout_offset = relayout_part_size * parts + (bit_sum - size) / 8; ++ } ++ new_field_offsets.put (f, relayout_offset); ++ } ++ /* Donnot relayout a struct with only one field after DFE. */ ++ if (field_num == 1) ++ return 0; ++ bucket_parts = ++parts; ++ return parts * relayout_part_size; ++} ++ + /* Create the new TYPE corresponding to THIS type. */ + + bool +@@ -994,6 +1058,15 @@ srtype::create_new_type (void) + if (pc_candidate && pc_gptr == NULL_TREE) + create_global_ptr_for_pc (); + ++ if (semi_relayout) ++ { ++ bucket_size = calculate_bucket_size (); ++ if (bucket_size == 0) ++ return false; ++ if (semi_relayout_map.get (this->newtype[0]) == NULL) ++ semi_relayout_map.put (this->newtype[0], this->type); ++ } ++ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Created %d types:\n", maxclusters); +@@ -1393,7 +1466,7 @@ public: + bool should_create = false, bool can_escape = false); + bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t); + +- void check_alloc_num (gimple *stmt, srtype *type); ++ void check_alloc_num (gimple *stmt, srtype *type, bool ptrptr); + void check_definition_assign (srdecl *decl, vec &worklist); + void check_definition_call (srdecl *decl, vec &worklist); + void check_definition (srdecl *decl, vec&); +@@ -1440,6 +1513,33 @@ public: + tree &); + basic_block create_bb_for_compress_nullptr (basic_block, tree &); + basic_block create_bb_for_decompress_nullptr (basic_block, tree, tree &); ++ ++ // Semi-relayout methods: ++ bool is_semi_relayout_candidate (tree); ++ srtype *get_semi_relayout_candidate_type (tree); ++ void check_and_prune_struct_for_semi_relayout (void); ++ tree rewrite_pointer_diff (gimple_stmt_iterator *, tree, tree, srtype *); ++ tree rewrite_pointer_plus_integer (gimple *, gimple_stmt_iterator *, tree, ++ tree, srtype *); ++ tree build_div_expr (gimple_stmt_iterator *, tree, tree); ++ tree get_true_pointer_base (gimple_stmt_iterator *, tree, srtype *); ++ tree get_real_allocated_ptr (tree, gimple_stmt_iterator *); ++ tree set_ptr_for_use (tree, gimple_stmt_iterator *); ++ void record_allocated_size (tree, gimple_stmt_iterator *, tree); ++ tree read_allocated_size (tree, gimple_stmt_iterator *); ++ gimple *create_aligned_alloc (gimple_stmt_iterator *, srtype *, tree, ++ tree &); ++ void create_memset_zero (tree, gimple_stmt_iterator *, tree); ++ void create_memcpy (tree, tree, tree, gimple_stmt_iterator *); ++ void create_free (tree, gimple_stmt_iterator *); ++ void copy_to_lhs (tree, tree, gimple_stmt_iterator *); ++ srtype *get_relayout_candidate_type (tree); ++ long unsigned int get_true_field_offset (srfield *, srtype *); ++ tree rewrite_address (tree, srfield *, srtype *, gimple_stmt_iterator *); ++ bool check_sr_copy (gimple *); ++ void relayout_field_copy (gimple_stmt_iterator *, gimple *, tree, tree, ++ tree&, tree &); ++ void do_semi_relayout (gimple_stmt_iterator *, gimple *, tree &, tree &); + }; + + struct ipa_struct_relayout +@@ -4528,7 +4628,7 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl, + } + + void +-ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) ++ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type, bool ptrptr) + { + if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT + && handled_allocation_stmt (stmt)) +@@ -4536,6 +4636,14 @@ ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) + tree arg0 = gimple_call_arg (stmt, 0); + basic_block bb = gimple_bb (stmt); + cgraph_node *node = current_function->node; ++ if (!ptrptr && current_layout_opt_level >= SEMI_RELAYOUT ++ && gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)) ++ { ++ /* Malloc is commonly used for allocations of a single struct ++ and semi-relayout will waste a mess of memory, so we skip it. */ ++ type->has_alloc_array = -4; ++ return; ++ } + if (integer_onep (arg0)) + { + /* Actually NOT an array, but may ruin other array. */ +@@ -4544,6 +4652,10 @@ ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) + else if (bb->loop_father != NULL + && loop_outer (bb->loop_father) != NULL) + { ++ /* For semi-relayout, do not escape realloc. */ ++ if (current_layout_opt_level & SEMI_RELAYOUT ++ && gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) ++ return; + /* The allocation is in a loop. */ + type->has_alloc_array = -2; + } +@@ -4635,6 +4747,13 @@ ipa_struct_reorg::check_definition_assign (srdecl *decl, vec &worklist) + return; + } + ++ if (semi_relayout_map.get (type->type) != NULL) ++ { ++ if (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT) ++ type->mark_escape (escape_unhandled_rewrite, stmt); ++ return; ++ } ++ + /* d) if the name is from a cast/assignment, make sure it is used as + that type or void* + i) If void* then push the ssa_name into worklist. */ +@@ -4679,7 +4798,8 @@ ipa_struct_reorg::check_definition_call (srdecl *decl, vec &worklist) + } + } + +- check_alloc_num (stmt, type); ++ bool ptrptr = isptrptr (decl->orig_type); ++ check_alloc_num (stmt, type, ptrptr); + return; + } + +@@ -6249,6 +6369,53 @@ ipa_struct_reorg::pc_candidate_tree_p (tree xhs) + return false; + } + ++srtype * ++ipa_struct_reorg::get_semi_relayout_candidate_type (tree xhs) ++{ ++ if (xhs == NULL) ++ return NULL; ++ if (TREE_CODE (xhs) == SSA_NAME || TREE_CODE (xhs) == COMPONENT_REF) ++ { ++ srtype *access_type = find_type (inner_type (TREE_TYPE (xhs))); ++ if (access_type != NULL && access_type->semi_relayout) ++ return access_type; ++ } ++ return NULL; ++} ++ ++bool ++ipa_struct_reorg::is_semi_relayout_candidate (tree xhs) ++{ ++ if (xhs == NULL) ++ return false; ++ ++ if (TREE_CODE (xhs) == SSA_NAME) ++ xhs = TREE_TYPE (xhs); ++ ++ if (TREE_CODE (xhs) == POINTER_TYPE) ++ { ++ srtype *var_type = find_type (TREE_TYPE (xhs)); ++ if (!var_type || var_type->has_escaped ()) ++ return false; ++ if (var_type->semi_relayout) ++ return true; ++ } ++ ++ if (TREE_CODE (xhs) == COMPONENT_REF) ++ { ++ tree mem = TREE_OPERAND (xhs, 0); ++ if (TREE_CODE (mem) == MEM_REF) ++ { ++ tree type = TREE_TYPE (mem); ++ srtype *old_type = get_relayout_candidate_type (type); ++ if (types_compatible_p (type, old_type->type) ++ && old_type->semi_relayout) ++ return true; ++ } ++ } ++ return false; ++} ++ + /* True if xhs is a component_ref that base has escaped but uses a compression + candidate type. */ + +@@ -6782,6 +6949,404 @@ ipa_struct_reorg::try_rewrite_with_pointer_compression (gassign *stmt, + } + } + ++tree ++ipa_struct_reorg::rewrite_pointer_diff (gimple_stmt_iterator *gsi, tree ptr1, ++ tree ptr2, srtype *type) ++{ ++ tree shifts = build_int_cst (long_integer_type_node, semi_relayout_align); ++ tree pointer_type = build_pointer_type (unsigned_char_type_node); ++ /* addr_high_1 = (intptr_t)ptr1 >> shifts */ ++ tree ptr1_cvt = fold_convert (pointer_type, ptr1); ++ tree addr_high_1 = gimplify_build2 (gsi, RSHIFT_EXPR, pointer_type, ++ ptr1_cvt, shifts); ++ /* addr_high_2 = (intptr_t)ptr2 >> shifts */ ++ tree ptr2_cvt = fold_convert (pointer_type, ptr2); ++ tree addr_high_2 = gimplify_build2 (gsi, RSHIFT_EXPR, pointer_type, ++ ptr2_cvt, shifts); ++ /* off1 = (intptr_t)ptr1 - (addr_high_1 << shifts) */ ++ tree bucket_start_1 = gimplify_build2 (gsi, LSHIFT_EXPR, pointer_type, ++ addr_high_1, shifts); ++ tree off1 = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node, ++ ptr1_cvt, bucket_start_1); ++ /* off2 = (intptr_t)ptr2 - (addr_high_2 << shifts) */ ++ tree bucket_start_2 = gimplify_build2 (gsi, LSHIFT_EXPR, pointer_type, ++ addr_high_2, shifts); ++ tree off2 = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node, ++ ptr2_cvt, bucket_start_2); ++ /* group_diff = (addr_high_1 - addr_high_2) / bucket_parts */ ++ tree bucket_sub = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node, ++ addr_high_1, addr_high_2); ++ tree bucket_parts = build_int_cst (long_integer_type_node, ++ type->bucket_parts); ++ tree group_diff = gimplify_build2 (gsi, TRUNC_DIV_EXPR, ++ long_integer_type_node, ++ bucket_sub, bucket_parts); ++ /* off_addr_diff = off1 - off2 */ ++ tree off_addr_diff = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node, ++ off1, off2); ++ /* res = group_diff * bucket_capacity + off_diff / 8 */ ++ tree capacity = build_int_cst (long_integer_type_node, ++ relayout_part_size / 8); ++ tree unit_size = build_int_cst (long_integer_type_node, 8); ++ tree bucket_index_diff = gimplify_build2 (gsi, MULT_EXPR, ++ long_integer_type_node, ++ group_diff, capacity); ++ tree off_index = gimplify_build2 (gsi, TRUNC_DIV_EXPR, ++ long_integer_type_node, ++ off_addr_diff, unit_size); ++ tree res = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, ++ bucket_index_diff, off_index); ++ return res; ++} ++ ++basic_block ++create_bb_for_group_diff_eq_0 (basic_block last_bb, tree phi, tree new_granule) ++{ ++ basic_block new_bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (new_bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ /* Emit res = new_granule; */ ++ gimple_stmt_iterator gsi = gsi_last_bb (new_bb); ++ gimple *new_stmt = gimple_build_assign (phi, new_granule); ++ gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); ++ return new_bb; ++} ++ ++basic_block ++create_bb_for_group_diff_ne_0 (basic_block new_bb, tree &phi, tree ptr, ++ tree group_diff, tree off_times_8, srtype *type) ++{ ++ tree shifts = build_int_cst (long_unsigned_type_node, semi_relayout_align); ++ gimple_stmt_iterator gsi = gsi_last_bb (new_bb); ++ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT); ++ /* curr_group_start = (ptr >> shifts) << shifts; */ ++ tree ptr_r_1 = gimplify_build2 (&gsi, RSHIFT_EXPR, long_integer_type_node, ++ ptr, shifts); ++ tree curr_group_start = gimplify_build2 (&gsi, LSHIFT_EXPR, long_integer_type_node, ++ ptr_r_1, shifts); ++ /* curr_off_from_group = ptr - curr_group_start; */ ++ tree curr_off_from_group = gimplify_build2 (&gsi, MINUS_EXPR, ++ long_integer_type_node, ++ ptr, curr_group_start); ++ /* res = curr_group_start + ((group_diff * parts) << shifts) ++ + ((curr_off_from_group + off_times_8) % shifts); */ ++ tree step1 = gimplify_build2 (&gsi, MULT_EXPR, long_integer_type_node, ++ group_diff, build_int_cst ( ++ long_integer_type_node, type->bucket_parts)); ++ tree step2 = gimplify_build2 (&gsi, LSHIFT_EXPR, long_integer_type_node, ++ step1, shifts); ++ tree step3 = gimplify_build2 (&gsi, PLUS_EXPR, long_integer_type_node, ++ curr_off_from_group, off_times_8); ++ tree step4 = gimplify_build2 (&gsi, TRUNC_MOD_EXPR, long_integer_type_node, ++ step3, build_int_cst ( ++ long_integer_type_node, relayout_part_size)); ++ tree step5 = gimplify_build2 (&gsi, PLUS_EXPR, long_integer_type_node, ++ step2, step4); ++ tree res_phi1 = gimplify_build2 (&gsi, PLUS_EXPR, long_integer_type_node, ++ curr_group_start, step5); ++ /* if (group_diff < 0) */ ++ gcond *cond = gimple_build_cond (LT_EXPR, group_diff, ++ build_int_cst (long_integer_type_node, 0), ++ NULL_TREE, NULL_TREE); ++ gsi_insert_before (&gsi, cond, GSI_SAME_STMT); ++ /* remove nop */ ++ gsi_remove (&gsi, true); ++ /* res += shifts */ ++ basic_block true_bb = create_empty_bb (new_bb); ++ if (new_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (true_bb, new_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ gimple_stmt_iterator true_gsi = gsi_last_bb (true_bb); ++ tree res_phi2 = make_ssa_name (long_integer_type_node); ++ gimple *new_stmt ++ = gimple_build_assign (res_phi2, PLUS_EXPR, res_phi1, ++ build_int_cst (long_integer_type_node, relayout_part_size)); ++ gsi_insert_after (&true_gsi, new_stmt, GSI_NEW_STMT); ++ /* create phi bb */ ++ basic_block res_bb = create_empty_bb (true_bb); ++ if (new_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (res_bb, new_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ /* rebuild cfg */ ++ edge etrue = make_edge (new_bb, true_bb, EDGE_TRUE_VALUE); ++ etrue->probability = profile_probability::unlikely (); ++ true_bb->count = etrue->count (); ++ ++ edge efalse = make_edge (new_bb, res_bb, EDGE_FALSE_VALUE); ++ efalse->probability = profile_probability::likely (); ++ res_bb->count = efalse->count (); ++ ++ edge efall = make_single_succ_edge (true_bb, res_bb, EDGE_FALLTHRU); ++ ++ phi = make_ssa_name (long_integer_type_node); ++ gphi *phi_node = create_phi_node (phi, res_bb); ++ add_phi_arg (phi_node, res_phi2, efall, UNKNOWN_LOCATION); ++ add_phi_arg (phi_node, res_phi1, efalse, UNKNOWN_LOCATION); ++ ++ if (dom_info_available_p (CDI_DOMINATORS)) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, true_bb, new_bb); ++ set_immediate_dominator (CDI_DOMINATORS, res_bb, new_bb); ++ } ++ return res_bb; ++} ++ ++tree ++ipa_struct_reorg::rewrite_pointer_plus_integer (gimple *stmt, ++ gimple_stmt_iterator *gsi, ++ tree ptr, tree offset, ++ srtype *type) ++{ ++ gcc_assert (type->semi_relayout); ++ tree off = fold_convert (long_integer_type_node, offset); ++ tree num_8 = build_int_cst (integer_type_node, 8); ++ tree shifts = build_int_cst (integer_type_node, semi_relayout_align); ++ /* off_times_8 = off * 8; */ ++ tree off_times_8 = gimplify_build2 (gsi, MULT_EXPR, long_integer_type_node, ++ off, num_8); ++ /* new_granule = ptr + off * 8; */ ++ tree ptr_int = fold_convert (long_integer_type_node, ptr); ++ tree new_granule = gimplify_build2 (gsi, PLUS_EXPR, long_integer_type_node, ++ ptr_int, off_times_8); ++ /* group_diff = (new_granule >> shifts) - (ptr >> shifts); */ ++ tree group_diff_rhs_1 = gimplify_build2 (gsi, RSHIFT_EXPR, ++ long_integer_type_node, ++ new_granule, shifts); ++ tree group_diff_rhs_2 = gimplify_build2 (gsi, RSHIFT_EXPR, ++ long_integer_type_node, ++ ptr, shifts); ++ tree group_diff = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node, ++ group_diff_rhs_1, group_diff_rhs_2); ++ /* if (group_diff == 0) */ ++ gcond *cond = gimple_build_cond (EQ_EXPR, group_diff, ++ build_int_cst (long_integer_type_node, 0), ++ NULL_TREE, NULL_TREE); ++ gimple_set_location (cond, UNKNOWN_LOCATION); ++ gsi_insert_before (gsi, cond, GSI_SAME_STMT); ++ ++ gimple *curr_stmt = as_a (cond); ++ edge e = split_block (curr_stmt->bb, curr_stmt); ++ basic_block split_src_bb = e->src; ++ basic_block split_dst_bb = e->dest; ++ remove_edge_raw (e); ++ /* if (group_diff == 0) ++ res = new_granule; */ ++ tree res_phi_1 = make_ssa_name (long_integer_type_node); ++ basic_block true_bb = create_bb_for_group_diff_eq_0 (split_src_bb, res_phi_1, ++ new_granule); ++ /* else */ ++ tree res_phi_2 = NULL_TREE; ++ basic_block false_bb = create_empty_bb (split_src_bb); ++ if (split_src_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (false_bb, split_src_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ ++ edge etrue = make_edge (split_src_bb, true_bb, EDGE_TRUE_VALUE); ++ etrue->probability = profile_probability::very_likely (); ++ true_bb->count = etrue->count (); ++ ++ edge efalse = make_edge (split_src_bb, false_bb, EDGE_FALSE_VALUE); ++ efalse->probability = profile_probability::unlikely (); ++ false_bb->count = efalse->count (); ++ basic_block res_bb = create_bb_for_group_diff_ne_0 (false_bb, res_phi_2, ++ ptr_int, group_diff, ++ off_times_8, type); ++ /* rebuild cfg */ ++ edge e_true_fall = make_single_succ_edge (true_bb, split_dst_bb, ++ EDGE_FALLTHRU); ++ edge e_false_fall = make_single_succ_edge (res_bb, split_dst_bb, ++ EDGE_FALLTHRU); ++ tree res_int = make_ssa_name (long_integer_type_node); ++ gphi *phi_node = create_phi_node (res_int, split_dst_bb); ++ add_phi_arg (phi_node, res_phi_1, e_true_fall, UNKNOWN_LOCATION); ++ add_phi_arg (phi_node, res_phi_2, e_false_fall, UNKNOWN_LOCATION); ++ if (dom_info_available_p (CDI_DOMINATORS)) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, split_dst_bb, split_src_bb); ++ set_immediate_dominator (CDI_DOMINATORS, true_bb, split_src_bb); ++ set_immediate_dominator (CDI_DOMINATORS, false_bb, split_src_bb); ++ } ++ *gsi = gsi_start_bb (split_dst_bb); ++ tree pointer_type = build_pointer_type (unsigned_char_type_node); ++ tree res = gimplify_build1 (gsi, NOP_EXPR, pointer_type, res_int); ++ return res; ++} ++ ++tree ++ipa_struct_reorg::build_div_expr (gimple_stmt_iterator *gsi, ++ tree expr, tree orig_size) ++{ ++ tree div_expr = build2 (TRUNC_DIV_EXPR, long_unsigned_type_node, ++ expr, orig_size); ++ tree num = make_ssa_name (long_unsigned_type_node); ++ gimple *g = gimple_build_assign (num, div_expr); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++ return num; ++} ++ ++srtype * ++ipa_struct_reorg::get_relayout_candidate_type (tree type) ++{ ++ if (type == NULL) ++ return NULL; ++ if (TREE_CODE (type) != RECORD_TYPE) ++ return NULL; ++ return find_type (inner_type (type)); ++} ++ ++long unsigned int ++ipa_struct_reorg::get_true_field_offset (srfield *field, srtype *type) ++{ ++ unsigned HOST_WIDE_INT new_offset; ++ new_offset = *(type->new_field_offsets.get (field->newfield[0])); ++ return new_offset; ++} ++ ++tree ++ipa_struct_reorg::get_true_pointer_base (gimple_stmt_iterator *gsi, ++ tree mem_ref, srtype *type) ++{ ++ tree ptr = TREE_OPERAND (mem_ref, 0); ++ tree off_bytes = TREE_OPERAND (mem_ref, 1); ++ unsigned num = tree_to_shwi (off_bytes); ++ if (num == 0) ++ return ptr; ++ tree orig_size = TYPE_SIZE_UNIT (TREE_TYPE (mem_ref)); ++ tree off = build_int_cst (long_integer_type_node, ++ num / tree_to_uhwi (orig_size)); ++ gimple *stmt = gsi_stmt (*gsi); ++ tree new_pointer_base = rewrite_pointer_plus_integer (stmt, gsi, ptr, ++ off, type); ++ return new_pointer_base; ++} ++ ++tree ++ipa_struct_reorg::rewrite_address (tree pointer_base, srfield *field, ++ srtype *type, gimple_stmt_iterator *gsi) ++{ ++ unsigned HOST_WIDE_INT field_offset = get_true_field_offset (field, type); ++ ++ tree pointer_ssa = fold_convert (long_unsigned_type_node, pointer_base); ++ tree step1 = gimplify_build1 (gsi, NOP_EXPR, long_unsigned_type_node, ++ pointer_ssa); ++ tree new_offset_ssa = build_int_cst (long_unsigned_type_node, field_offset); ++ tree step2 = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, step1, ++ new_offset_ssa); ++ tree field_ssa = fold_convert ( ++ build_pointer_type (TREE_TYPE (field->newfield[0])), step2); ++ tree step3 = gimplify_build1 (gsi, NOP_EXPR, ++ TREE_TYPE (field_ssa), field_ssa); ++ ++ tree new_mem_ref = fold_build2 (MEM_REF, TREE_TYPE (field->newfield[0]), ++ step3, build_int_cst (TREE_TYPE (field_ssa), 0)); ++ return new_mem_ref; ++} ++ ++bool ++ipa_struct_reorg::check_sr_copy (gimple *stmt) ++{ ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs = gimple_assign_rhs1 (stmt); ++ ++ if (TREE_CODE (lhs) != MEM_REF || TREE_CODE (rhs) != MEM_REF) ++ return false; ++ srtype *t1 = get_relayout_candidate_type (TREE_TYPE (lhs)); ++ srtype *t2 = get_relayout_candidate_type (TREE_TYPE (rhs)); ++ if (!t1 || !t2 || !t1->semi_relayout || !t2->semi_relayout || t1 != t2) ++ return false; ++ tree pointer1 = TREE_OPERAND (lhs, 0); ++ tree pointer2 = TREE_OPERAND (rhs, 0); ++ if (TREE_CODE (TREE_TYPE (pointer1)) != POINTER_TYPE ++ || TREE_CODE (TREE_TYPE (pointer2)) != POINTER_TYPE) ++ return false; ++ ++ tree type1 = TREE_TYPE (TREE_TYPE (pointer1)); ++ tree type2 = TREE_TYPE (TREE_TYPE (pointer2)); ++ ++ srtype *t3 = get_relayout_candidate_type (type1); ++ srtype *t4 = get_relayout_candidate_type (type2); ++ ++ if (t3 != t4 || t3 != t1) ++ return false; ++ ++ return true; ++} ++ ++void ++ipa_struct_reorg::relayout_field_copy (gimple_stmt_iterator *gsi, gimple *stmt, ++ tree lhs, tree rhs, ++ tree &newlhs, tree &newrhs) ++{ ++ srtype *type = get_relayout_candidate_type (TREE_TYPE (lhs)); ++ tree lhs_base_pointer = get_true_pointer_base (gsi, newlhs, type); ++ tree rhs_base_pointer = get_true_pointer_base (gsi, newrhs, type); ++ tree new_l_mem_ref = NULL_TREE; ++ tree new_r_mem_ref = NULL_TREE; ++ srfield *field = NULL; ++ unsigned i = 0; ++ FOR_EACH_VEC_ELT (type->fields, i, field) ++ { ++ if (!field->newfield[0]) ++ continue; ++ new_l_mem_ref = rewrite_address (lhs_base_pointer, field, type, gsi); ++ new_r_mem_ref = rewrite_address (rhs_base_pointer, field, type, gsi); ++ gimple *new_stmt = gimple_build_assign (new_l_mem_ref, new_r_mem_ref); ++ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); ++ } ++ newlhs = new_l_mem_ref; ++ newrhs = new_r_mem_ref; ++} ++ ++void ++ipa_struct_reorg::do_semi_relayout (gimple_stmt_iterator *gsi, gimple *stmt, ++ tree &newlhs, tree &newrhs) ++{ ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs = gimple_assign_rhs1 (stmt); ++ ++ bool l = TREE_CODE (lhs) == COMPONENT_REF ? is_semi_relayout_candidate (lhs) ++ : false; ++ bool r = TREE_CODE (rhs) == COMPONENT_REF ? is_semi_relayout_candidate (rhs) ++ : false; ++ ++ gcc_assert (!(l && r)); ++ ++ if (!l && !r) ++ { ++ if (check_sr_copy (stmt)) ++ relayout_field_copy (gsi, stmt, lhs, rhs, newlhs, newrhs); ++ } ++ else if (l) ++ { ++ srtype *type = get_relayout_candidate_type ( ++ TREE_TYPE (TREE_OPERAND (lhs, 0))); ++ srfield *new_field = type->find_field ( ++ int_byte_position (TREE_OPERAND (lhs, 1))); ++ tree pointer_base = get_true_pointer_base ( ++ gsi, TREE_OPERAND (newlhs, 0), type); ++ newlhs = rewrite_address (pointer_base, new_field, type, gsi); ++ } ++ else if (r) ++ { ++ srtype *type = get_relayout_candidate_type ( ++ TREE_TYPE (TREE_OPERAND (rhs, 0))); ++ srfield *new_field = type->find_field ( ++ int_byte_position (TREE_OPERAND (rhs, 1))); ++ tree pointer_base = get_true_pointer_base ( ++ gsi, TREE_OPERAND (newrhs, 0), type); ++ newrhs = rewrite_address (pointer_base, new_field, type, gsi); ++ } ++} ++ + bool + ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + { +@@ -6876,7 +7441,8 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + tree size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))); + tree num; + /* Check if rhs2 is a multiplication of the size of the type. */ +- if (!is_result_of_mult (rhs2, &num, size)) ++ if (!is_result_of_mult (rhs2, &num, size) ++ && !(current_layout_opt_level & SEMI_RELAYOUT)) + internal_error ("the rhs of pointer was not a multiplicate and it slipped through."); + + /* Add the judgment of num, support for POINTER_DIFF_EXPR. +@@ -6898,11 +7464,34 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhs[i]))); + newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, + newsize); ++ if (current_layout_opt_level >= SEMI_RELAYOUT) ++ { ++ if (is_semi_relayout_candidate (lhs)) ++ { ++ srtype *type = get_semi_relayout_candidate_type (lhs); ++ newrhs[i] = rewrite_pointer_plus_integer (stmt, gsi, ++ newrhs[i], num, type); ++ newsize = build_int_cst (long_unsigned_type_node, 0); ++ } ++ } + new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR, + newrhs[i], newsize); + } + else + { ++ /* rhs2 is not a const integer */ ++ if (current_layout_opt_level >= SEMI_RELAYOUT) ++ { ++ if (is_semi_relayout_candidate (lhs)) ++ { ++ num = build_div_expr (gsi, rhs2, ++ build_int_cst (long_unsigned_type_node, 1)); ++ srtype *type = get_semi_relayout_candidate_type (lhs); ++ newrhs[i] = rewrite_pointer_plus_integer (stmt, ++ gsi, newrhs[i], num, type); ++ rhs2 = build_int_cst (long_unsigned_type_node, 0); ++ } ++ } + new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR, + newrhs[i], rhs2); + } +@@ -6952,13 +7541,32 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + return false; + + /* The two operands always have pointer/reference type. */ +- for (unsigned i = 0; i < max_split && newrhs1[i] && newrhs2[i]; i++) ++ if (current_layout_opt_level >= SEMI_RELAYOUT ++ && (is_semi_relayout_candidate (rhs1) ++ || is_semi_relayout_candidate (rhs2))) + { +- gimple_assign_set_rhs1 (stmt, newrhs1[i]); +- gimple_assign_set_rhs2 (stmt, newrhs2[i]); +- update_stmt (stmt); ++ for (unsigned i = 0; i < max_split && newrhs1[i] &&newrhs2[i]; i++) ++ { ++ srtype *type = get_semi_relayout_candidate_type (rhs1); ++ if (!type) ++ type = get_semi_relayout_candidate_type (rhs2); ++ gcc_assert (type != NULL); ++ tree res = rewrite_pointer_diff (gsi, newrhs1[i], ++ newrhs2[i], type); ++ gimple *g = gimple_build_assign (gimple_assign_lhs (stmt), res); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++ } ++ remove = true; ++ } ++ else ++ { ++ for (unsigned i = 0; i < max_split && newrhs1[i] && newrhs2[i]; i++) ++ { ++ gimple_assign_set_rhs1 (stmt, newrhs1[i]); ++ gimple_assign_set_rhs2 (stmt, newrhs2[i]); ++ update_stmt (stmt); ++ } + } +- remove = false; + return remove; + } + +@@ -6985,6 +7593,8 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + fprintf (dump_file, "replaced with:\n"); + for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++) + { ++ if (current_layout_opt_level & SEMI_RELAYOUT) ++ do_semi_relayout (gsi, stmt, newlhs[i], newrhs[i]); + if (current_layout_opt_level >= POINTER_COMPRESSION_SAFE) + try_rewrite_with_pointer_compression (stmt, gsi, lhs, rhs, + newlhs[i], newrhs[i]); +@@ -7003,6 +7613,108 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + return remove; + } + ++tree ++ipa_struct_reorg::get_real_allocated_ptr (tree ptr, gimple_stmt_iterator *gsi) ++{ ++ tree ptr_to_int = fold_convert (long_unsigned_type_node, ptr); ++ tree align = build_int_cst (long_unsigned_type_node, relayout_part_size); ++ tree real_addr = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node, ++ ptr_to_int, align); ++ tree res = gimplify_build1 (gsi, NOP_EXPR, ++ build_pointer_type (long_unsigned_type_node), real_addr); ++ return res; ++} ++ ++tree ++ipa_struct_reorg::set_ptr_for_use (tree ptr, gimple_stmt_iterator *gsi) ++{ ++ tree ptr_to_int = fold_convert (long_unsigned_type_node, ptr); ++ tree align = build_int_cst (long_unsigned_type_node, relayout_part_size); ++ tree ptr_int = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, ++ ptr_to_int, align); ++ tree res = gimplify_build1 (gsi, NOP_EXPR, ++ build_pointer_type (long_unsigned_type_node), ptr_int); ++ return res; ++} ++ ++void ++ipa_struct_reorg::record_allocated_size (tree ptr, gimple_stmt_iterator *gsi, ++ tree size) ++{ ++ tree to_type = build_pointer_type (long_unsigned_type_node); ++ tree type_cast = fold_convert (to_type, ptr); ++ tree lhs = fold_build2 (MEM_REF, long_unsigned_type_node, ptr, ++ build_int_cst (build_pointer_type (long_unsigned_type_node), 0)); ++ gimple *stmt = gimple_build_assign (lhs, size); ++ gsi_insert_before (gsi, stmt, GSI_SAME_STMT); ++} ++ ++tree ++ipa_struct_reorg::read_allocated_size (tree ptr, gimple_stmt_iterator *gsi) ++{ ++ tree to_type = build_pointer_type (long_unsigned_type_node); ++ tree off = build_int_cst (to_type, 0); ++ tree size = gimplify_build2 (gsi, MEM_REF, long_unsigned_type_node, ++ ptr, off); ++ return size; ++} ++ ++gimple * ++ipa_struct_reorg::create_aligned_alloc (gimple_stmt_iterator *gsi, ++ srtype *type, tree num, tree &size) ++{ ++ tree fn = builtin_decl_implicit (BUILT_IN_ALIGNED_ALLOC); ++ ++ tree align = build_int_cst (long_unsigned_type_node, relayout_part_size); ++ unsigned bucket_size = type->bucket_size; ++ ++ tree nbuckets = gimplify_build2 (gsi, CEIL_DIV_EXPR, long_unsigned_type_node, ++ num, build_int_cst (long_unsigned_type_node, ++ relayout_part_size / 8)); ++ tree use_size = gimplify_build2 (gsi, MULT_EXPR, long_unsigned_type_node, ++ nbuckets, build_int_cst ( ++ long_unsigned_type_node, bucket_size)); ++ size = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, ++ use_size, align); ++ gimple *g = gimple_build_call (fn, 2, align, size); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++ return g; ++} ++ ++void ++ipa_struct_reorg::create_memset_zero (tree ptr, gimple_stmt_iterator *gsi, ++ tree size) ++{ ++ tree fn = builtin_decl_implicit (BUILT_IN_MEMSET); ++ tree val = build_int_cst (long_unsigned_type_node, 0); ++ gimple *g = gimple_build_call (fn, 3, ptr, val, size); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++} ++ ++void ++ipa_struct_reorg::create_memcpy (tree src, tree dst, tree size, ++ gimple_stmt_iterator *gsi) ++{ ++ tree fn = builtin_decl_implicit (BUILT_IN_MEMCPY); ++ gimple *g = gimple_build_call (fn, 3, dst, src, size); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++} ++ ++void ++ipa_struct_reorg::create_free (tree ptr, gimple_stmt_iterator *gsi) ++{ ++ tree fn = builtin_decl_implicit (BUILT_IN_FREE); ++ gimple *g = gimple_build_call (fn, 1, ptr); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++} ++ ++void ++ipa_struct_reorg::copy_to_lhs (tree lhs, tree new_lhs, gimple_stmt_iterator *gsi) ++{ ++ gimple *g = gimple_build_assign (lhs, new_lhs); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++} ++ + /* Rewrite function call statement STMT. Return TRUE if the statement + is to be removed. */ + +@@ -7044,24 +7756,77 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + ? TYPE_SIZE_UNIT (decl->orig_type) + : TYPE_SIZE_UNIT (type->newtype[i]); + gimple *g; +- /* Every allocation except for calloc needs the size multiplied out. */ +- if (!gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) +- newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize); +- +- if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA)) +- g = gimple_build_call (gimple_call_fndecl (stmt), +- 1, newsize); +- else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) +- g = gimple_build_call (gimple_call_fndecl (stmt), +- 2, num, newsize); +- else if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) +- g = gimple_build_call (gimple_call_fndecl (stmt), +- 2, newrhs1[i], newsize); +- else +- gcc_assert (false); +- gimple_call_set_lhs (g, decl->newdecl[i]); +- gsi_insert_before (gsi, g, GSI_SAME_STMT); ++ bool rewrite = false; ++ if (current_layout_opt_level >= SEMI_RELAYOUT ++ && type->semi_relayout) ++ { ++ if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)) ++ ; ++ else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) ++ { ++ tree rhs2 = gimple_call_arg (stmt, 1); ++ if (tree_to_uhwi (rhs2) == tree_to_uhwi ( ++ TYPE_SIZE_UNIT (type->type))) ++ { ++ rewrite = true; ++ tree size = NULL_TREE; ++ g = create_aligned_alloc (gsi, type, num, size); ++ tree real_ptr = make_ssa_name ( ++ build_pointer_type (unsigned_char_type_node)); ++ gimple_set_lhs (g, real_ptr); ++ create_memset_zero (real_ptr, gsi, size); ++ record_allocated_size (real_ptr, gsi, size); ++ tree lhs_use = set_ptr_for_use (real_ptr, gsi); ++ copy_to_lhs (decl->newdecl[i], lhs_use, gsi); ++ } ++ } ++ else if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) ++ { ++ rewrite = true; ++ tree size = NULL_TREE; ++ g = create_aligned_alloc (gsi, type, num, size); ++ tree real_ptr = make_ssa_name ( ++ build_pointer_type (unsigned_char_type_node)); ++ gimple_set_lhs (g, real_ptr); ++ create_memset_zero (real_ptr, gsi, size); ++ tree src = get_real_allocated_ptr (newrhs1[i], gsi); ++ tree old_size = read_allocated_size (src, gsi); ++ create_memcpy (src, real_ptr, old_size, gsi); ++ record_allocated_size (real_ptr, gsi, size); ++ tree lhs_use = set_ptr_for_use (real_ptr, gsi); ++ create_free (src, gsi); ++ copy_to_lhs (decl->newdecl[i], lhs_use, gsi); ++ } ++ else ++ { ++ gcc_assert (false); ++ internal_error ("supported type for semi-relayout."); ++ } ++ } ++ if (!rewrite ++ && (current_layout_opt_level >= STRUCT_REORDER_FIELDS ++ || current_layout_opt_level == STRUCT_SPLIT)) ++ { ++ /* Every allocation except for calloc needs the size ++ multiplied out. */ ++ if (!gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) ++ newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, ++ num, newsize); ++ if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA)) ++ g = gimple_build_call (gimple_call_fndecl (stmt), ++ 1, newsize); ++ else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) ++ g = gimple_build_call (gimple_call_fndecl (stmt), ++ 2, num, newsize); ++ else if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) ++ g = gimple_build_call (gimple_call_fndecl (stmt), ++ 2, newrhs1[i], newsize); ++ else ++ gcc_assert (false); ++ gimple_call_set_lhs (g, decl->newdecl[i]); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++ } + if (type->pc_candidate) + { + /* Init global header for pointer compression. */ +@@ -7081,8 +7846,11 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + if (!rewrite_expr (expr, newexpr)) + return false; + ++ srtype *t = find_type (TREE_TYPE (TREE_TYPE (expr))); + if (newexpr[1] == NULL) + { ++ if (t && t->semi_relayout) ++ newexpr[0] = get_real_allocated_ptr (newexpr[0], gsi); + gimple_call_set_arg (stmt, 0, newexpr[0]); + update_stmt (stmt); + return false; +@@ -7789,6 +8557,85 @@ ipa_struct_reorg::check_and_prune_struct_for_pointer_compression (void) + } + } + ++void ++ipa_struct_reorg::check_and_prune_struct_for_semi_relayout (void) ++{ ++ unsigned relayout_transform = 0; ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ srtype *type = types[i]; ++ if (dump_file) ++ { ++ print_generic_expr (dump_file, type->type); ++ } ++ if (type->has_escaped ()) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, " has escaped by %s, skip relayout.\n", ++ type->escape_reason ()); ++ } ++ continue; ++ } ++ if (TYPE_FIELDS (type->type) == NULL) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, " has zero field, skip relayout.\n"); ++ } ++ continue; ++ } ++ if (type->chain_type) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, " is chain_type, skip relayout.\n"); ++ } ++ continue; ++ } ++ if (type->has_alloc_array == 0 || type->has_alloc_array == 1 ++ || type->has_alloc_array == -1 || type->has_alloc_array == -3 ++ || type->has_alloc_array == -4) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, " has alloc number: %d, skip relayout.\n", ++ type->has_alloc_array); ++ } ++ continue; ++ } ++ if (get_type_name (type->type) == NULL) ++ { ++ if (dump_file) ++ { ++ fprintf (dump_file, " has empty struct name," ++ " skip relayout.\n"); ++ } ++ continue; ++ } ++ relayout_transform++; ++ type->semi_relayout = true; ++ if (dump_file) ++ { ++ fprintf (dump_file, " attempts to do semi-relayout.\n"); ++ } ++ } ++ ++ if (dump_file) ++ { ++ if (relayout_transform) ++ { ++ fprintf (dump_file, "\nNumber of structures to transform in " ++ "semi-relayout is %d\n", relayout_transform); ++ } ++ else ++ { ++ fprintf (dump_file, "\nNo structures to transform in " ++ "semi-relayout.\n"); ++ } ++ } ++} ++ + /* Init pointer size from parameter param_pointer_compression_size. */ + + static void +@@ -7829,7 +8676,8 @@ ipa_struct_reorg::execute (unsigned int opt) + } + if (opt >= POINTER_COMPRESSION_SAFE) + check_and_prune_struct_for_pointer_compression (); +- ++ if (opt >= SEMI_RELAYOUT) ++ check_and_prune_struct_for_semi_relayout (); + ret = rewrite_functions (); + } + else // do COMPLETE_STRUCT_RELAYOUT +@@ -7881,6 +8729,8 @@ public: + unsigned int level = 0; + switch (struct_layout_optimize_level) + { ++ case 6: level |= SEMI_RELAYOUT; ++ // FALLTHRU + case 5: level |= POINTER_COMPRESSION_UNSAFE; + // FALLTHRU + case 4: level |= POINTER_COMPRESSION_SAFE; +@@ -7900,6 +8750,12 @@ public: + if (level & POINTER_COMPRESSION_SAFE) + init_pointer_size_for_pointer_compression (); + ++ if (level & SEMI_RELAYOUT) ++ { ++ semi_relayout_align = semi_relayout_level; ++ relayout_part_size = 1 << semi_relayout_level; ++ } ++ + /* Preserved for backward compatibility, reorder fields needs run before + struct split and complete struct relayout. */ + if (flag_ipa_reorder_fields && level < STRUCT_REORDER_FIELDS) +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +index d88799982..982f43e58 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +@@ -25,6 +25,9 @@ namespace struct_reorg { + + const int max_split = 2; + ++unsigned semi_relayout_align = semi_relayout_level; ++unsigned relayout_part_size = 1 << semi_relayout_level; ++ + template + struct auto_vec_del : auto_vec + { +@@ -127,6 +130,10 @@ public: + bool pc_candidate; + bool has_legal_alloc_num; + int has_alloc_array; ++ bool semi_relayout; ++ hash_map new_field_offsets; ++ unsigned bucket_parts; ++ unsigned bucket_size; + + // Constructors + srtype(tree type); +@@ -148,6 +155,7 @@ public: + bool has_dead_field (void); + void mark_escape (escape_type, gimple *stmt); + void create_global_ptr_for_pc (); ++ unsigned calculate_bucket_size (); + bool has_escaped (void) + { + return escapes != does_not_escape; +diff --git a/gcc/params.opt b/gcc/params.opt +index 1d355819c..83fd705ee 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -988,4 +988,8 @@ Threshold functions of cache miss counts to be analyzed in prefetching. + Common Joined UInteger Var(param_pointer_compression_size) Init(32) IntegerRange(8, 32) Param Optimization + Target size of compressed pointer, which should be 8, 16 or 32. + ++-param=semi-relayout-level= ++Common Joined UInteger Var(semi_relayout_level) Init(13) IntegerRange(11, 15) Param Optimization ++Set capacity of each bucket to semi-relayout to (1 << semi-relayout-level) / 8 . ++ + ; This comment is to ensure we retain the blank line above. +diff --git a/gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c b/gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c +new file mode 100644 +index 000000000..87c756c79 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c +@@ -0,0 +1,86 @@ ++// Check simplify rewrite chance for semi-relayout ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++ network_t* net_add; ++}; ++ ++ ++const int MAX = 100; ++network_t* net; ++node_p node; ++arc_p arc; ++ ++int ++main () ++{ ++ net = (network_t*) calloc (1, sizeof(network_t)); ++ net->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ net->sorted_arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ net->nodes = (node_p) calloc (MAX, sizeof (node_t)); ++ net->arcs->id = 100; ++ ++ node = net->nodes; ++ arc = net->arcs; ++ ++ for (unsigned i = 0; i < MAX; i++) ++ { ++ arc->head = node; ++ arc->head->child = node; ++ node->potential = i + 1; ++ arc->cost = arc->head->potential; ++ arc->tail = node->sibling; ++ node = node + 1; ++ arc = arc + 1; ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in semi-relayout is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +index d7367ed96..281046b48 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp ++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +@@ -93,6 +93,10 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pc*.c]] \ + gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pc*.c]] \ + "" "-fipa-struct-reorg=5 -fdump-ipa-all -flto-partition=one -fwhole-program" + ++# -fipa-struct-reorg=6 ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/semi_relayout*.c]] \ ++ "" "-fipa-struct-reorg=6 -fdump-ipa-all -flto-partition=one -fwhole-program" ++ + # All done. + torture-finish + dg-finish +-- +2.27.0.windows.1 + diff --git a/gcc.spec b/gcc.spec index 21b83b42a74e6e937dcb13a2a1c9caaa28bb8ea6..6f85ae5249322545279677dd704bfc143d06db4d 100644 --- a/gcc.spec +++ b/gcc.spec @@ -61,7 +61,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: gcc Version: %{gcc_version} -Release: 16 +Release: 17 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD URL: https://gcc.gnu.org @@ -162,6 +162,41 @@ Patch45: 0045-Transposed-SLP-Enable-Transposed-SLP.patch Patch46: 0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch Patch47: 0047-DFE-Fix-the-bug-caused-by-inconsistent-types.patch Patch48: 0048-Struct-Reorg-Type-simplify-limitation-when-in-struct.patch +Patch49: 0049-build-Add-some-file-right-to-executable.patch +Patch50: 0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch +Patch51: 0051-Backport-phiopt-Fix-up-conditional_replacement-PR993.patch +Patch52: 0052-Backport-phiopt-Handle-bool-in-two_value_replacement.patch +Patch53: 0053-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch +Patch54: 0054-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch +Patch55: 0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch +Patch56: 0056-Backport-phiopt-Optimize-x-y-cmp-z-PR94589.patch +Patch57: 0057-Backport-Add-support-for-__builtin_bswap128.patch +Patch58: 0058-Backport-tree-optimization-95393-fold-MIN-MAX_EXPR-g.patch +Patch59: 0059-Backport-Add-a-couple-of-A-CST1-CST2-match-and-simpl.patch +Patch60: 0060-Backport-Optimize-x-0-y-y-to-x-31-y-in-match.pd.patch +Patch61: 0061-Backport-Replace-conditional_replacement-with-match-.patch +Patch62: 0062-Backport-Allow-match-and-simplified-phiopt-to-run-in.patch +Patch63: 0063-Backport-Improve-match_simplify_replacement-in-phi-o.patch +Patch64: 0064-Backport-phiopt-Use-gphi-phi-instead-of-gimple-phi-s.patch +Patch65: 0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch +Patch66: 0066-Backport-tree-optimization-102880-make-PHI-OPT-recog.patch +Patch67: 0067-Backport-tree-Add-vector_element_bits-_tree-PR94980-.patch +Patch68: 0068-Backport-Lower-VEC_COND_EXPR-into-internal-functions.patch +Patch69: 0069-Backport-gimple-match-Add-a-gimple_extract_op-functi.patch +Patch70: 0070-Backport-aarch64-Fix-subs_compare_2.c-regression-PR1.patch +Patch71: 0071-PHIOPT-Disable-the-match-A-CST1-0-when-the-CST1-is-n.patch +Patch72: 0072-Struct-Reorg-Merge-struct_layout-pass-into-struct_re.patch +Patch73: 0073-PHIOPT-Add-A-B-op-CST-B-match-and-simplify-optimizat.patch +Patch74: 0074-FORWPROP-Fold-series-of-instructions-into-mul.patch +Patch75: 0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch +Patch76: 0076-Struct-Reorg-Fix-speccpu2006-462-double-free-I60YUV.patch +Patch77: 0077-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch +Patch78: 0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch +Patch79: 0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch +Patch80: 0080-Struct-Reorg-Add-Unsafe-Structure-Pointer-Compressio.patch +Patch81: 0081-Loop-distribution-Insert-temp-arrays-built-from-isom.patch +Patch82: 0082-Revert-Backport-tree-optimization-102880-make-PHI-OP.patch +Patch83: 0083-Struct-reorg-Add-struct-semi-relayout-optimize.patch %global gcc_target_platform %{_arch}-linux-gnu @@ -664,7 +699,41 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch46 -p1 %patch47 -p1 %patch48 -p1 - +%patch49 -p1 +%patch50 -p1 +%patch51 -p1 +%patch52 -p1 +%patch53 -p1 +%patch54 -p1 +%patch55 -p1 +%patch56 -p1 +%patch57 -p1 +%patch58 -p1 +%patch59 -p1 +%patch60 -p1 +%patch61 -p1 +%patch62 -p1 +%patch63 -p1 +%patch64 -p1 +%patch65 -p1 +%patch66 -p1 +%patch67 -p1 +%patch68 -p1 +%patch69 -p1 +%patch70 -p1 +%patch71 -p1 +%patch72 -p1 +%patch73 -p1 +%patch74 -p1 +%patch75 -p1 +%patch76 -p1 +%patch77 -p1 +%patch78 -p1 +%patch79 -p1 +%patch80 -p1 +%patch81 -p1 +%patch82 -p1 +%patch83 -p1 %build @@ -2684,6 +2753,12 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Thu Dec 6 2022 benniaobufeijiushiji - 10.3.1-17 +- Type:Sync +- ID:NA +- SUG:NA +- DESC:Sync patch from openeuler/gcc + * Fri Sep 16 2022 eastb233 - 10.3.1-16 - Type:Sync - ID:NA