diff --git a/0049-build-Add-some-file-right-to-executable.patch b/0049-build-Add-some-file-right-to-executable.patch new file mode 100644 index 0000000000000000000000000000000000000000..1d981634b5f00f68607b6ec337e0365d6625e7e0 --- /dev/null +++ b/0049-build-Add-some-file-right-to-executable.patch @@ -0,0 +1,21 @@ +From 7dffda64fcbbd522616d7dc9c70530d146f4fed6 Mon Sep 17 00:00:00 2001 +From: zhongyunde +Date: Tue, 1 Nov 2022 16:38:38 +0800 +Subject: [PATCH 01/29] [build] Add some file right to executable + +--- + libgcc/mkheader.sh | 0 + move-if-change | 0 + 2 files changed, 0 insertions(+), 0 deletions(-) + mode change 100644 => 100755 libgcc/mkheader.sh + mode change 100644 => 100755 move-if-change + +diff --git a/libgcc/mkheader.sh b/libgcc/mkheader.sh +old mode 100644 +new mode 100755 +diff --git a/move-if-change b/move-if-change +old mode 100644 +new mode 100755 +-- +2.25.1 + diff --git a/0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch b/0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch new file mode 100644 index 0000000000000000000000000000000000000000..81d79c6deace82ecaa2ea532dc29a59dce4221d8 --- /dev/null +++ b/0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch @@ -0,0 +1,186 @@ +From c690da762e873d0f5c66ea084e420ba4842354a6 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Wed, 4 Nov 2020 11:55:29 +0100 +Subject: [PATCH 02/29] [Backport] phiopt: Optimize x ? 1024 : 0 to (int) x << + 10 [PR97690] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3e190757fa332d327bee27495f37beb01155cfab + +The following patch generalizes the x ? 1 : 0 -> (int) x optimization +to handle also left shifts by constant. + +During x86_64-linux and i686-linux bootstraps + regtests it triggered +in 1514 unique non-LTO -m64 cases (sort -u on log mentioning +filename, function name and shift count) and 1866 -m32 cases. + +Unfortunately, the patch regresses (before the tests have been adjusted): ++FAIL: gcc.dg/tree-ssa/ssa-ccp-11.c scan-tree-dump-times optimized "if " 0 ++FAIL: gcc.dg/vect/bb-slp-pattern-2.c -flto -ffat-lto-objects scan-tree-dump-times slp1 "optimized: basic block" 1 ++FAIL: gcc.dg/vect/bb-slp-pattern-2.c scan-tree-dump-times slp1 "optimized: basic block" 1 +and in both cases it actually results in worse code. + +> > We'd need some optimization that would go through all PHI edges and +> > compute if some use of the phi results don't actually compute a constant +> > across all the PHI edges - 1 & 0 and 0 & 1 is always 0. + +> PRE should do this, IMHO only optimizing it at -O2 is fine. + +> > Similarly, in the slp vectorization test there is: +> > a[0] = b[0] ? 1 : 7; + +> note this, carefully avoiding the already "optimized" b[0] ? 1 : 0 ... + +> So the option is to put : 7 in the 2, 4 an 8 case as well. The testcase +> wasn't added for any real-world case but is artificial I guess for +> COND_EXPR handling of invariants. + +> But yeah, for things like SLP it means we eventually have to +> implement reverse transforms for all of this to make the lanes +> matching. But that's true anyway for things like x + 1 vs. x + 0 +> or x / 3 vs. x / 2 or other simplifications we do. + +2020-11-04 Jakub Jelinek + + PR tree-optimization/97690 + * tree-ssa-phiopt.c (conditional_replacement): Also optimize + cond ? pow2p_cst : 0 as ((type) cond) << cst. + + * gcc.dg/tree-ssa/phi-opt-22.c: New test. + * gcc.dg/tree-ssa/ssa-ccp-11.c: Use -O2 instead of -O1. + * gcc.dg/vect/bb-slp-pattern-2.c (foo): Use ? 2 : 7, ? 4 : 7 and + ? 8 : 7 instead of ? 2 : 0, ? 4 : 0, ? 8 : 0. +--- + gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c | 11 ++++++ + gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c | 2 +- + gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c | 6 ++-- + gcc/tree-ssa-phiopt.c | 38 ++++++++++++++------ + 4 files changed, 43 insertions(+), 14 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c +new file mode 100644 +index 000000000..fd3706666 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c +@@ -0,0 +1,11 @@ ++/* PR tree-optimization/97690 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-phiopt2" } */ ++ ++int foo (_Bool d) { return d ? 2 : 0; } ++int bar (_Bool d) { return d ? 1 : 0; } ++int baz (_Bool d) { return d ? -__INT_MAX__ - 1 : 0; } ++int qux (_Bool d) { return d ? 1024 : 0; } ++ ++/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " << " 3 "phiopt2" } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c +index 36b8e7fc8..d70ea5a01 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O1 -fdump-tree-optimized" } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ + + /* Test for CPROP across a DAG. */ + +diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c +index d32cb7585..e64f0115a 100644 +--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c ++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c +@@ -13,13 +13,13 @@ foo (short * __restrict__ a, int * __restrict__ b, int stride) + for (i = 0; i < N/stride; i++, a += stride, b += stride) + { + a[0] = b[0] ? 1 : 7; +- a[1] = b[1] ? 2 : 0; ++ a[1] = b[1] ? 2 : 7; + a[2] = b[2] ? 3 : 0; +- a[3] = b[3] ? 4 : 0; ++ a[3] = b[3] ? 4 : 7; + a[4] = b[4] ? 5 : 0; + a[5] = b[5] ? 6 : 0; + a[6] = b[6] ? 7 : 0; +- a[7] = b[7] ? 8 : 0; ++ a[7] = b[7] ? 8 : 7; + } + } + +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 591b6435f..85587e8d1 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -753,7 +753,9 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb, + gimple_stmt_iterator gsi; + edge true_edge, false_edge; + tree new_var, new_var2; +- bool neg; ++ bool neg = false; ++ int shift = 0; ++ tree nonzero_arg; + + /* FIXME: Gimplification of complex type is too hard for now. */ + /* We aren't prepared to handle vectors either (and it is a question +@@ -764,14 +766,22 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb, + || POINTER_TYPE_P (TREE_TYPE (arg1)))) + return false; + +- /* The PHI arguments have the constants 0 and 1, or 0 and -1, then +- convert it to the conditional. */ +- if ((integer_zerop (arg0) && integer_onep (arg1)) +- || (integer_zerop (arg1) && integer_onep (arg0))) +- neg = false; +- else if ((integer_zerop (arg0) && integer_all_onesp (arg1)) +- || (integer_zerop (arg1) && integer_all_onesp (arg0))) ++ /* The PHI arguments have the constants 0 and 1, or 0 and -1 or ++ 0 and (1 << cst), then convert it to the conditional. */ ++ if (integer_zerop (arg0)) ++ nonzero_arg = arg1; ++ else if (integer_zerop (arg1)) ++ nonzero_arg = arg0; ++ else ++ return false; ++ if (integer_all_onesp (nonzero_arg)) + neg = true; ++ else if (integer_pow2p (nonzero_arg)) ++ { ++ shift = tree_log2 (nonzero_arg); ++ if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg))) ++ return false; ++ } + else + return false; + +@@ -783,12 +793,12 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb, + falls through into BB. + + There is a single PHI node at the join point (BB) and its arguments +- are constants (0, 1) or (0, -1). ++ are constants (0, 1) or (0, -1) or (0, (1 << shift)). + + So, given the condition COND, and the two PHI arguments, we can + rewrite this PHI into non-branching code: + +- dest = (COND) or dest = COND' ++ dest = (COND) or dest = COND' or dest = (COND) << shift + + We use the condition as-is if the argument associated with the + true edge has the value one or the argument associated with the +@@ -823,6 +833,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb, + cond = fold_build1_loc (gimple_location (stmt), + NEGATE_EXPR, TREE_TYPE (cond), cond); + } ++ else if (shift) ++ { ++ cond = fold_convert_loc (gimple_location (stmt), ++ TREE_TYPE (result), cond); ++ cond = fold_build2_loc (gimple_location (stmt), ++ LSHIFT_EXPR, TREE_TYPE (cond), cond, ++ build_int_cst (integer_type_node, shift)); ++ } + + /* Insert our new statements at the end of conditional block before the + COND_STMT. */ +-- +2.25.1 + diff --git a/0051-Backport-phiopt-Fix-up-conditional_replacement-PR993.patch b/0051-Backport-phiopt-Fix-up-conditional_replacement-PR993.patch new file mode 100644 index 0000000000000000000000000000000000000000..b98c171f5ebb6eaf99621693168e7dcffa698aff --- /dev/null +++ b/0051-Backport-phiopt-Fix-up-conditional_replacement-PR993.patch @@ -0,0 +1,92 @@ +From 79a974bc7bb67cf425a7839f3c1f5689e41c7ee8 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Tue, 9 Mar 2021 19:13:11 +0100 +Subject: [PATCH 03/29] [Backport] phiopt: Fix up conditional_replacement + [PR99305] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b610c30453d8e4cc88693d85a5a100d089640be5 + +Before my PR97690 changes, conditional_replacement would not set neg +when the nonzero arg was boolean true. +I've simplified the testing, so that it first finds the zero argument +and then checks the other argument for all the handled cases +(1, -1 and 1 << X, where the last case is what the patch added support for). +But, unfortunately I've placed the integer_all_onesp test first. +For unsigned precision 1 types such as bool integer_all_onesp, integer_onep +and integer_pow2p can all be true and the code set neg to true in that case, +which is undesirable. + +The following patch tests integer_pow2p first (which is trivially true +for integer_onep too and tree_log2 in that case gives shift == 0) +and only if that isn't the case, integer_all_onesp. + +2021-03-09 Jakub Jelinek + + PR tree-optimization/99305 + * tree-ssa-phiopt.c (conditional_replacement): Test integer_pow2p + before integer_all_onesp instead of vice versa. + + * g++.dg/opt/pr99305.C: New test. +--- + gcc/testsuite/g++.dg/opt/pr99305.C | 26 ++++++++++++++++++++++++++ + gcc/tree-ssa-phiopt.c | 6 +++--- + 2 files changed, 29 insertions(+), 3 deletions(-) + create mode 100644 gcc/testsuite/g++.dg/opt/pr99305.C + +diff --git a/gcc/testsuite/g++.dg/opt/pr99305.C b/gcc/testsuite/g++.dg/opt/pr99305.C +new file mode 100644 +index 000000000..8a91277e7 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/opt/pr99305.C +@@ -0,0 +1,26 @@ ++// PR tree-optimization/99305 ++// { dg-do compile } ++// { dg-options "-O3 -fno-ipa-icf -fdump-tree-optimized" } ++// { dg-final { scan-tree-dump-times " = \\\(unsigned char\\\) c_\[0-9]*\\\(D\\\);" 3 "optimized" } } ++// { dg-final { scan-tree-dump-times " = \[^\n\r]* \\+ \[0-9]*;" 3 "optimized" } } ++// { dg-final { scan-tree-dump-times " = \[^\n\r]* <= 9;" 3 "optimized" } } ++// { dg-final { scan-tree-dump-not "if \\\(c_\[0-9]*\\\(D\\\) \[!=]= 0\\\)" "optimized" } } ++// { dg-final { scan-tree-dump-not " = PHI <" "optimized" } } ++ ++bool ++foo (char c) ++{ ++ return c >= 48 && c <= 57; ++} ++ ++bool ++bar (char c) ++{ ++ return c != 0 && foo (c); ++} ++ ++bool ++baz (char c) ++{ ++ return c != 0 && c >= 48 && c <= 57; ++} +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 85587e8d1..b9be28474 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -774,14 +774,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb, + nonzero_arg = arg0; + else + return false; +- if (integer_all_onesp (nonzero_arg)) +- neg = true; +- else if (integer_pow2p (nonzero_arg)) ++ if (integer_pow2p (nonzero_arg)) + { + shift = tree_log2 (nonzero_arg); + if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg))) + return false; + } ++ else if (integer_all_onesp (nonzero_arg)) ++ neg = true; + else + return false; + +-- +2.25.1 + diff --git a/0052-Backport-phiopt-Handle-bool-in-two_value_replacement.patch b/0052-Backport-phiopt-Handle-bool-in-two_value_replacement.patch new file mode 100644 index 0000000000000000000000000000000000000000..47c62ba8bd755ea4f00393c56a8d6efacc96abe3 --- /dev/null +++ b/0052-Backport-phiopt-Handle-bool-in-two_value_replacement.patch @@ -0,0 +1,122 @@ +From 09263d5ed4d81a008ca8ffcc2883dc766e7874d5 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Sun, 6 Dec 2020 10:58:10 +0100 +Subject: [PATCH 04/29] [Backport] phiopt: Handle bool in two_value_replacement + [PR796232] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=8c23434fdadcf4caa1f0e966294c5f67ccf4bcf9 + +The following patch improves code generation on the included testcase by +enabling two_value_replacement on booleans. It does that only for arg0/arg1 +values that conditional_replacement doesn't handle. Additionally +it limits two_value_replacement optimization to the late phiopt like +conditional_replacement. + +2020-12-06 Jakub Jelinek + + PR tree-optimization/96232 + * tree-ssa-phiopt.c (two_value_replacement): Optimize even boolean lhs + cases as long as arg0 has wider precision and conditional_replacement + doesn't handle that case. + (tree_ssa_phiopt_worker): Don't call two_value_replacement during + early phiopt. + + * gcc.dg/tree-ssa/pr96232-2.c: New test. + * gcc.dg/tree-ssa/pr88676-2.c: Check phiopt2 dump rather than phiopt1. +--- + gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c | 4 ++-- + gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c | 18 ++++++++++++++++++ + gcc/tree-ssa-phiopt.c | 23 +++++++++++++++++++---- + 3 files changed, 39 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c +index 0e616365b..ea88407b6 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c +@@ -1,7 +1,7 @@ + /* PR tree-optimization/88676 */ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdump-tree-phiopt1" } */ +-/* { dg-final { scan-tree-dump-not " = PHI <" "phiopt1" { target le } } } */ ++/* { dg-options "-O2 -fdump-tree-phiopt2" } */ ++/* { dg-final { scan-tree-dump-not " = PHI <" "phiopt2" { target le } } } */ + + struct foo1 { + int i:1; +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c +new file mode 100644 +index 000000000..9f51820ed +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c +@@ -0,0 +1,18 @@ ++/* PR tree-optimization/96232 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++/* { dg-final { scan-tree-dump " 38 - " "optimized" } } */ ++/* { dg-final { scan-tree-dump " \\+ 97;" "optimized" } } */ ++/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */ ++ ++int ++foo (_Bool x) ++{ ++ return x ? 37 : 38; ++} ++ ++int ++bar (_Bool x) ++{ ++ return x ? 98 : 97; ++} +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index b9be28474..0623d740d 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -339,7 +339,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + } + + /* Do the replacement of conditional if it can be done. */ +- if (two_value_replacement (bb, bb1, e2, phi, arg0, arg1)) ++ if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p + && conditional_replacement (bb, bb1, e1, e2, phi, +@@ -636,7 +636,6 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb, + + if (TREE_CODE (lhs) != SSA_NAME + || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)) +- || TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE + || TREE_CODE (rhs) != INTEGER_CST) + return false; + +@@ -649,9 +648,25 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb, + return false; + } + ++ /* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to ++ conditional_replacement. */ ++ if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE ++ && (integer_zerop (arg0) ++ || integer_zerop (arg1) ++ || TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE ++ || (TYPE_PRECISION (TREE_TYPE (arg0)) ++ <= TYPE_PRECISION (TREE_TYPE (lhs))))) ++ return false; ++ + wide_int min, max; +- if (get_range_info (lhs, &min, &max) != VR_RANGE +- || min + 1 != max ++ if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE) ++ { ++ min = wi::to_wide (boolean_false_node); ++ max = wi::to_wide (boolean_true_node); ++ } ++ else if (get_range_info (lhs, &min, &max) != VR_RANGE) ++ return false; ++ if (min + 1 != max + || (wi::to_wide (rhs) != min + && wi::to_wide (rhs) != max)) + return false; +-- +2.25.1 + diff --git a/0053-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch b/0053-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e19d5daa3ae26f5a753326f0f73ffbf0af2facb --- /dev/null +++ b/0053-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch @@ -0,0 +1,256 @@ +From a92cf465f10585350f7cd5739457c3f2852cfc86 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Wed, 21 Oct 2020 10:51:33 +0200 +Subject: [PATCH 05/29] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32 + in GIMPLE [PR97503] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=5244b4af5e47bc98a2a9cf36f048981583a1b163 + +While we have at the RTL level noce_try_ifelse_collapse combined with +simplify_cond_clz_ctz, that optimization doesn't always trigger because +e.g. on powerpc there is an define_insn to compare a reg against zero and +copy that register to another one and so we end up with a different pseudo +in the simplify_cond_clz_ctz test and punt. + +For targets that define C?Z_DEFINED_VALUE_AT_ZERO to 2 for certain modes, +we can optimize it already in phiopt though, just need to ensure that +we transform the __builtin_c?z* calls into .C?Z ifns because my recent +VRP changes codified that the builtin calls are always undefined at zero, +while ifns honor C?Z_DEFINED_VALUE_AT_ZERO equal to 2. +And, in phiopt we already have popcount handling that does pretty much the +same thing, except for always using a zero value rather than the one set +by C?Z_DEFINED_VALUE_AT_ZERO. + +So, this patch extends that function to handle not just popcount, but also +clz and ctz. + +2020-10-21 Jakub Jelinek + + PR tree-optimization/97503 + * tree-ssa-phiopt.c: Include internal-fn.h. + (cond_removal_in_popcount_pattern): Rename to ... + (cond_removal_in_popcount_clz_ctz_pattern): ... this. Handle not just + popcount, but also clz and ctz if it has C?Z_DEFINED_VALUE_AT_ZERO 2. + + * gcc.dg/tree-ssa/pr97503.c: New test. +--- + gcc/testsuite/gcc.dg/tree-ssa/pr97503.c | 19 +++++ + gcc/tree-ssa-phiopt.c | 100 ++++++++++++++++++------ + 2 files changed, 95 insertions(+), 24 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr97503.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c +new file mode 100644 +index 000000000..3a3dae6c7 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c +@@ -0,0 +1,19 @@ ++/* PR tree-optimization/97503 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++/* { dg-additional-options "-mbmi -mlzcnt" { target i?86-*-* x86_64-*-* } } */ ++/* { dg-final { scan-tree-dump-times "\.CLZ" 2 "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-* } && lp64 } } } } */ ++/* { dg-final { scan-tree-dump-not "__builtin_clz" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */ ++/* { dg-final { scan-tree-dump-not "PHI <" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */ ++ ++int ++foo (int x) ++{ ++ return x ? __builtin_clz (x) : 32; ++} ++ ++int ++bar (unsigned long long x) ++{ ++ return x ? __builtin_clzll (x) : 64; ++} +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 0623d740d..c1e11916e 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see + #include "tree-inline.h" + #include "case-cfn-macros.h" + #include "tree-eh.h" ++#include "internal-fn.h" + + static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); + static bool two_value_replacement (basic_block, basic_block, edge, gphi *, +@@ -60,8 +61,9 @@ static bool minmax_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); + static bool abs_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); +-static bool cond_removal_in_popcount_pattern (basic_block, basic_block, +- edge, edge, gimple *, tree, tree); ++static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, ++ edge, edge, gimple *, ++ tree, tree); + static bool cond_store_replacement (basic_block, basic_block, edge, edge, + hash_set *); + static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); +@@ -348,8 +350,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p +- && cond_removal_in_popcount_pattern (bb, bb1, e1, e2, +- phi, arg0, arg1)) ++ && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, ++ e2, phi, arg0, ++ arg1)) + cfgchanged = true; + else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +@@ -1771,16 +1774,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, + + + c_12 = PHI <_9(2)> +-*/ ++ ++ Similarly for __builtin_clz or __builtin_ctz if ++ C?Z_DEFINED_VALUE_AT_ZERO is 2, optab is present and ++ instead of 0 above it uses the value from that macro. */ + + static bool +-cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, +- edge e1, edge e2, +- gimple *phi, tree arg0, tree arg1) ++cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, ++ basic_block middle_bb, ++ edge e1, edge e2, gimple *phi, ++ tree arg0, tree arg1) + { + gimple *cond; + gimple_stmt_iterator gsi, gsi_from; +- gimple *popcount; ++ gimple *call; + gimple *cast = NULL; + tree lhs, arg; + +@@ -1798,35 +1805,67 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, + gsi_next_nondebug (&gsi); + if (!gsi_end_p (gsi)) + { +- popcount = gsi_stmt (gsi); ++ call = gsi_stmt (gsi); + gsi_next_nondebug (&gsi); + if (!gsi_end_p (gsi)) + return false; + } + else + { +- popcount = cast; ++ call = cast; + cast = NULL; + } + +- /* Check that we have a popcount builtin. */ +- if (!is_gimple_call (popcount)) ++ /* Check that we have a popcount/clz/ctz builtin. */ ++ if (!is_gimple_call (call) || gimple_call_num_args (call) != 1) ++ return false; ++ ++ arg = gimple_call_arg (call, 0); ++ lhs = gimple_get_lhs (call); ++ ++ if (lhs == NULL_TREE) + return false; +- combined_fn cfn = gimple_call_combined_fn (popcount); ++ ++ combined_fn cfn = gimple_call_combined_fn (call); ++ internal_fn ifn = IFN_LAST; ++ int val = 0; + switch (cfn) + { + CASE_CFN_POPCOUNT: + break; ++ CASE_CFN_CLZ: ++ if (INTEGRAL_TYPE_P (TREE_TYPE (arg))) ++ { ++ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg)); ++ if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg), ++ OPTIMIZE_FOR_BOTH) ++ && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2) ++ { ++ ifn = IFN_CLZ; ++ break; ++ } ++ } ++ return false; ++ CASE_CFN_CTZ: ++ if (INTEGRAL_TYPE_P (TREE_TYPE (arg))) ++ { ++ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg)); ++ if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg), ++ OPTIMIZE_FOR_BOTH) ++ && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2) ++ { ++ ifn = IFN_CTZ; ++ break; ++ } ++ } ++ return false; + default: + return false; + } + +- arg = gimple_call_arg (popcount, 0); +- lhs = gimple_get_lhs (popcount); +- + if (cast) + { +- /* We have a cast stmt feeding popcount builtin. */ ++ /* We have a cast stmt feeding popcount/clz/ctz builtin. */ + /* Check that we have a cast prior to that. */ + if (gimple_code (cast) != GIMPLE_ASSIGN + || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast))) +@@ -1839,7 +1878,7 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, + + cond = last_stmt (cond_bb); + +- /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount ++ /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount/clz/ctz + builtin. */ + if (gimple_code (cond) != GIMPLE_COND + || (gimple_cond_code (cond) != NE_EXPR +@@ -1859,10 +1898,13 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, + } + + /* Check PHI arguments. */ +- if (lhs != arg0 || !integer_zerop (arg1)) ++ if (lhs != arg0 ++ || TREE_CODE (arg1) != INTEGER_CST ++ || wi::to_wide (arg1) != val) + return false; + +- /* And insert the popcount builtin and cast stmt before the cond_bb. */ ++ /* And insert the popcount/clz/ctz builtin and cast stmt before the ++ cond_bb. */ + gsi = gsi_last_bb (cond_bb); + if (cast) + { +@@ -1870,9 +1912,19 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb, + gsi_move_before (&gsi_from, &gsi); + reset_flow_sensitive_info (gimple_get_lhs (cast)); + } +- gsi_from = gsi_for_stmt (popcount); +- gsi_move_before (&gsi_from, &gsi); +- reset_flow_sensitive_info (gimple_get_lhs (popcount)); ++ gsi_from = gsi_for_stmt (call); ++ if (ifn == IFN_LAST || gimple_call_internal_p (call)) ++ gsi_move_before (&gsi_from, &gsi); ++ else ++ { ++ /* For __builtin_c[lt]z* force .C[LT]Z ifn, because only ++ the latter is well defined at zero. */ ++ call = gimple_build_call_internal (ifn, 1, gimple_call_arg (call, 0)); ++ gimple_call_set_lhs (call, lhs); ++ gsi_insert_before (&gsi, call, GSI_SAME_STMT); ++ gsi_remove (&gsi_from, true); ++ } ++ reset_flow_sensitive_info (lhs); + + /* Now update the PHI and remove unneeded bbs. */ + replace_phi_edge_with_variable (cond_bb, e2, phi, lhs); +-- +2.25.1 + diff --git a/0054-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch b/0054-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch new file mode 100644 index 0000000000000000000000000000000000000000..15d97abcc043646e2df6fc03e8757903396f554f --- /dev/null +++ b/0054-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch @@ -0,0 +1,69 @@ +From 7d5d2ab082ce9986db4f3313013b44faa46bc412 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Thu, 22 Oct 2020 09:34:28 +0200 +Subject: [PATCH 06/29] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32 + in GIMPLE fallout [PR97503] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=ef2d3ec325b1b720df5da20784eba46249af2294 + +> this broke sparc-sun-solaris2.11 bootstrap +> +> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c: In function 'bool cond_removal_in_popcount_clz_ctz_pattern(basic_block, basic_block, edge, edge, gimple*, tree, tree)': +> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c:1858:27: error: variable 'mode' set but not used [-Werror=unused-but-set-variable] +> 1858 | scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg)); +> | ^~~~ +> +> +> and doubtlessly several other targets that use the defaults.h definition of +> +> #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) 0 + +Ugh, seems many of those macros do not evaluate the first argument. +This got broken by the change to direct_internal_fn_supported_p, previously +it used mode also in the optab test. + +2020-10-22 Jakub Jelinek + + * tree-ssa-phiopt.c (cond_removal_in_popcount_clz_ctz_pattern): + For CLZ and CTZ tests, use type temporary instead of mode. +--- + gcc/tree-ssa-phiopt.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index c1e11916e..707a5882e 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -1836,10 +1836,10 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + CASE_CFN_CLZ: + if (INTEGRAL_TYPE_P (TREE_TYPE (arg))) + { +- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg)); +- if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg), +- OPTIMIZE_FOR_BOTH) +- && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2) ++ tree type = TREE_TYPE (arg); ++ if (direct_internal_fn_supported_p (IFN_CLZ, type, OPTIMIZE_FOR_BOTH) ++ && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ++ val) == 2) + { + ifn = IFN_CLZ; + break; +@@ -1849,10 +1849,10 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + CASE_CFN_CTZ: + if (INTEGRAL_TYPE_P (TREE_TYPE (arg))) + { +- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg)); +- if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg), +- OPTIMIZE_FOR_BOTH) +- && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2) ++ tree type = TREE_TYPE (arg); ++ if (direct_internal_fn_supported_p (IFN_CTZ, type, OPTIMIZE_FOR_BOTH) ++ && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type), ++ val) == 2) + { + ifn = IFN_CTZ; + break; +-- +2.25.1 + diff --git a/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch b/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch new file mode 100644 index 0000000000000000000000000000000000000000..e9a6e86d2cbb89e957c05920ad4fc5d6ca6091a6 --- /dev/null +++ b/0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch @@ -0,0 +1,218 @@ +From 018523df11698dd0e2d42326c57bdf724a7a1aa5 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Tue, 5 Jan 2021 16:35:22 +0100 +Subject: [PATCH 07/29] [Backport] phiopt: Optimize x < 0 ? ~y : y to (x >> 31) + ^ y [PR96928] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=576714b309b330df0e80e34114bcdf0bba35e146 + +As requested in the PR, the one's complement abs can be done more +efficiently without cmov or branching. + +Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize +it in ifcvt, on x86_64 with -m32 we generate in the end the exact same +code, but with -m64: + movl %edi, %eax +- notl %eax +- cmpl %edi, %eax +- cmovl %edi, %eax ++ sarl $31, %eax ++ xorl %edi, %eax + ret + +2021-01-05 Jakub Jelinek + + PR tree-optimization/96928 + * tree-ssa-phiopt.c (xor_replacement): New function. + (tree_ssa_phiopt_worker): Call it. + + * gcc.dg/tree-ssa/pr96928.c: New test. + * gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1, + instead of scanning rtl dump for ifcvt message check assembly + for xor instruction. +--- + gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 38 +++++++++ + gcc/tree-ssa-phiopt.c | 108 ++++++++++++++++++++++++ + 2 files changed, 146 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c +new file mode 100644 +index 000000000..209135726 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c +@@ -0,0 +1,38 @@ ++/* PR tree-optimization/96928 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-phiopt2" } */ ++/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */ ++ ++int ++foo (int a) ++{ ++ return a < 0 ? ~a : a; ++} ++ ++int ++bar (int a, int b) ++{ ++ return a < 0 ? ~b : b; ++} ++ ++unsigned ++baz (int a, unsigned int b) ++{ ++ return a < 0 ? ~b : b; ++} ++ ++unsigned ++qux (int a, unsigned int c) ++{ ++ return a >= 0 ? ~c : c; ++} ++ ++int ++corge (int a, int b) ++{ ++ return a >= 0 ? b : ~b; ++} +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 707a5882e..b9cd07a60 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -61,6 +61,8 @@ static bool minmax_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); + static bool abs_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); ++static bool xor_replacement (basic_block, basic_block, ++ edge, edge, gimple *, tree, tree); + static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, + edge, edge, gimple *, + tree, tree); +@@ -349,6 +351,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + cfgchanged = true; + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; ++ else if (!early_p ++ && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) ++ cfgchanged = true; + else if (!early_p + && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, + e2, phi, arg0, +@@ -2059,6 +2064,109 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + ++/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */ ++ ++static bool ++xor_replacement (basic_block cond_bb, basic_block middle_bb, ++ edge e0 ATTRIBUTE_UNUSED, edge e1, ++ gimple *phi, tree arg0, tree arg1) ++{ ++ if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1))) ++ return false; ++ ++ /* OTHER_BLOCK must have only one executable statement which must have the ++ form arg0 = ~arg1 or arg1 = ~arg0. */ ++ ++ gimple *assign = last_and_only_stmt (middle_bb); ++ /* If we did not find the proper one's complement assignment, then we cannot ++ optimize. */ ++ if (assign == NULL) ++ return false; ++ ++ /* If we got here, then we have found the only executable statement ++ in OTHER_BLOCK. If it is anything other than arg = ~arg1 or ++ arg1 = ~arg0, then we cannot optimize. */ ++ if (!is_gimple_assign (assign)) ++ return false; ++ ++ if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR) ++ return false; ++ ++ tree lhs = gimple_assign_lhs (assign); ++ tree rhs = gimple_assign_rhs1 (assign); ++ ++ /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ ++ if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0)) ++ return false; ++ ++ gimple *cond = last_stmt (cond_bb); ++ tree result = PHI_RESULT (phi); ++ ++ /* Only relationals comparing arg[01] against zero are interesting. */ ++ enum tree_code cond_code = gimple_cond_code (cond); ++ if (cond_code != LT_EXPR && cond_code != GE_EXPR) ++ return false; ++ ++ /* Make sure the conditional is x OP 0. */ ++ tree clhs = gimple_cond_lhs (cond); ++ if (TREE_CODE (clhs) != SSA_NAME ++ || !INTEGRAL_TYPE_P (TREE_TYPE (clhs)) ++ || TYPE_UNSIGNED (TREE_TYPE (clhs)) ++ || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1)) ++ || !integer_zerop (gimple_cond_rhs (cond))) ++ return false; ++ ++ /* We need to know which is the true edge and which is the false ++ edge so that we know if have xor or inverted xor. */ ++ edge true_edge, false_edge; ++ extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); ++ ++ /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we ++ will need to invert the result. Similarly for LT_EXPR if ++ the false edge goes to OTHER_BLOCK. */ ++ edge e; ++ if (cond_code == GE_EXPR) ++ e = true_edge; ++ else ++ e = false_edge; ++ ++ bool invert = e->dest == middle_bb; ++ ++ result = duplicate_ssa_name (result, NULL); ++ ++ gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); ++ ++ int prec = TYPE_PRECISION (TREE_TYPE (clhs)); ++ gimple *new_stmt ++ = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs, ++ build_int_cst (integer_type_node, prec - 1)); ++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); ++ ++ if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs))) ++ { ++ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)), ++ NOP_EXPR, gimple_assign_lhs (new_stmt)); ++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); ++ } ++ lhs = gimple_assign_lhs (new_stmt); ++ ++ if (invert) ++ { ++ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)), ++ BIT_NOT_EXPR, rhs); ++ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); ++ rhs = gimple_assign_lhs (new_stmt); ++ } ++ ++ new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs); ++ gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); ++ ++ replace_phi_edge_with_variable (cond_bb, e1, phi, result); ++ ++ /* Note that we optimized this PHI. */ ++ return true; ++} ++ + /* Auxiliary functions to determine the set of memory accesses which + can't trap because they are preceded by accesses to the same memory + portion. We do that for MEM_REFs, so we only need to track +-- +2.25.1 + diff --git a/0056-Backport-phiopt-Optimize-x-y-cmp-z-PR94589.patch b/0056-Backport-phiopt-Optimize-x-y-cmp-z-PR94589.patch new file mode 100644 index 0000000000000000000000000000000000000000..62b1fa890e3ac429ca5d2692de29730fb10a64f2 --- /dev/null +++ b/0056-Backport-phiopt-Optimize-x-y-cmp-z-PR94589.patch @@ -0,0 +1,1067 @@ +From 02313ab8cf7eb4defc1482ece48c07c2d8c77be9 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Thu, 6 May 2021 10:15:40 +0200 +Subject: [PATCH 08/29] [Backport] phiopt: Optimize (x <=> y) cmp z [PR94589] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=ad96c867e173c1ebcfc201b201adac5095683a08 + +genericize_spaceship genericizes i <=> j to approximately +({ int c; if (i == j) c = 0; else if (i < j) c = -1; else c = 1; c; }) +for strong ordering and +({ int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; c; }) +for partial ordering. +The C++ standard supports then == or != comparisons of that against +strong/partial ordering enums, or />= comparisons of <=> result +against literal 0. + +In some cases we already optimize that but in many cases we keep performing +all the 2 or 3 comparisons, compute the spaceship value and then compare +that. + +The following patch recognizes those patterns if the <=> operands are +integral types or floating point (the latter only for -ffast-math) and +optimizes it to the single comparison that is needed (plus adds debug stmts +if needed for the spaceship result). + +There is one thing I'd like to address in a follow-up: the pr94589-2.C +testcase should be matching just 12 times each, but runs +into operator>=(partial_ordering, unspecified) being defined as +(_M_value&1)==_M_value +rather than _M_value>=0. When not honoring NaNs, the 2 case should be +unreachable and so (_M_value&1)==_M_value is then equivalent to _M_value>=0, +but is not a single use but two uses. I'll need to pattern match that case +specially. + +2021-05-06 Jakub Jelinek + + PR tree-optimization/94589 + * tree-ssa-phiopt.c (tree_ssa_phiopt_worker): Call + spaceship_replacement. + (cond_only_block_p, spaceship_replacement): New functions. + + * gcc.dg/pr94589-1.c: New test. + * gcc.dg/pr94589-2.c: New test. + * gcc.dg/pr94589-3.c: New test. + * gcc.dg/pr94589-4.c: New test. + * g++.dg/opt/pr94589-1.C: New test. + * g++.dg/opt/pr94589-2.C: New test. + * g++.dg/opt/pr94589-3.C: New test. + * g++.dg/opt/pr94589-4.C: New test. +--- + gcc/testsuite/g++.dg/opt/pr94589-1.C | 33 +++ + gcc/testsuite/g++.dg/opt/pr94589-2.C | 33 +++ + gcc/testsuite/g++.dg/opt/pr94589-3.C | 84 ++++++ + gcc/testsuite/g++.dg/opt/pr94589-4.C | 84 ++++++ + gcc/testsuite/gcc.dg/pr94589-1.c | 35 +++ + gcc/testsuite/gcc.dg/pr94589-2.c | 35 +++ + gcc/testsuite/gcc.dg/pr94589-3.c | 97 ++++++ + gcc/testsuite/gcc.dg/pr94589-4.c | 97 ++++++ + gcc/tree-ssa-phiopt.c | 424 +++++++++++++++++++++++++++ + 9 files changed, 922 insertions(+) + create mode 100644 gcc/testsuite/g++.dg/opt/pr94589-1.C + create mode 100644 gcc/testsuite/g++.dg/opt/pr94589-2.C + create mode 100644 gcc/testsuite/g++.dg/opt/pr94589-3.C + create mode 100644 gcc/testsuite/g++.dg/opt/pr94589-4.C + create mode 100644 gcc/testsuite/gcc.dg/pr94589-1.c + create mode 100644 gcc/testsuite/gcc.dg/pr94589-2.c + create mode 100644 gcc/testsuite/gcc.dg/pr94589-3.c + create mode 100644 gcc/testsuite/gcc.dg/pr94589-4.c + +diff --git a/gcc/testsuite/g++.dg/opt/pr94589-1.C b/gcc/testsuite/g++.dg/opt/pr94589-1.C +new file mode 100644 +index 000000000..d1cc5050c +--- /dev/null ++++ b/gcc/testsuite/g++.dg/opt/pr94589-1.C +@@ -0,0 +1,33 @@ ++// PR tree-optimization/94589 ++// { dg-do compile { target c++20 } } ++// { dg-options "-O2 -g0 -fdump-tree-optimized" } ++// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 12 "optimized" } } ++// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[45]" 12 "optimized" } } ++ ++#include ++ ++#define A __attribute__((noipa)) ++A bool f1 (int i, int j) { auto c = i <=> j; return c == 0; } ++A bool f2 (int i, int j) { auto c = i <=> j; return c != 0; } ++A bool f3 (int i, int j) { auto c = i <=> j; return c > 0; } ++A bool f4 (int i, int j) { auto c = i <=> j; return c < 0; } ++A bool f5 (int i, int j) { auto c = i <=> j; return c >= 0; } ++A bool f6 (int i, int j) { auto c = i <=> j; return c <= 0; } ++A bool f7 (int i, int j) { auto c = i <=> j; return c == std::strong_ordering::less; } ++A bool f8 (int i, int j) { auto c = i <=> j; return c != std::strong_ordering::less; } ++A bool f9 (int i, int j) { auto c = i <=> j; return c == std::strong_ordering::equal; } ++A bool f10 (int i, int j) { auto c = i <=> j; return c != std::strong_ordering::equal; } ++A bool f11 (int i, int j) { auto c = i <=> j; return c == std::strong_ordering::greater; } ++A bool f12 (int i, int j) { auto c = i <=> j; return c != std::strong_ordering::greater; } ++A bool f13 (int i) { auto c = i <=> 5; return c == 0; } ++A bool f14 (int i) { auto c = i <=> 5; return c != 0; } ++A bool f15 (int i) { auto c = i <=> 5; return c > 0; } ++A bool f16 (int i) { auto c = i <=> 5; return c < 0; } ++A bool f17 (int i) { auto c = i <=> 5; return c >= 0; } ++A bool f18 (int i) { auto c = i <=> 5; return c <= 0; } ++A bool f19 (int i) { auto c = i <=> 5; return c == std::strong_ordering::less; } ++A bool f20 (int i) { auto c = i <=> 5; return c != std::strong_ordering::less; } ++A bool f21 (int i) { auto c = i <=> 5; return c == std::strong_ordering::equal; } ++A bool f22 (int i) { auto c = i <=> 5; return c != std::strong_ordering::equal; } ++A bool f23 (int i) { auto c = i <=> 5; return c == std::strong_ordering::greater; } ++A bool f24 (int i) { auto c = i <=> 5; return c != std::strong_ordering::greater; } +diff --git a/gcc/testsuite/g++.dg/opt/pr94589-2.C b/gcc/testsuite/g++.dg/opt/pr94589-2.C +new file mode 100644 +index 000000000..dda947e22 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/opt/pr94589-2.C +@@ -0,0 +1,33 @@ ++// PR tree-optimization/94589 ++// { dg-do compile { target c++20 } } ++// { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" } ++// { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } } ++// { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } } ++ ++#include ++ ++#define A __attribute__((noipa)) ++A bool f1 (double i, double j) { auto c = i <=> j; return c == 0; } ++A bool f2 (double i, double j) { auto c = i <=> j; return c != 0; } ++A bool f3 (double i, double j) { auto c = i <=> j; return c > 0; } ++A bool f4 (double i, double j) { auto c = i <=> j; return c < 0; } ++A bool f5 (double i, double j) { auto c = i <=> j; return c >= 0; } ++A bool f6 (double i, double j) { auto c = i <=> j; return c <= 0; } ++A bool f7 (double i, double j) { auto c = i <=> j; return c == std::partial_ordering::less; } ++A bool f8 (double i, double j) { auto c = i <=> j; return c != std::partial_ordering::less; } ++A bool f9 (double i, double j) { auto c = i <=> j; return c == std::partial_ordering::equivalent; } ++A bool f10 (double i, double j) { auto c = i <=> j; return c != std::partial_ordering::equivalent; } ++A bool f11 (double i, double j) { auto c = i <=> j; return c == std::partial_ordering::greater; } ++A bool f12 (double i, double j) { auto c = i <=> j; return c != std::partial_ordering::greater; } ++A bool f13 (double i) { auto c = i <=> 5.0; return c == 0; } ++A bool f14 (double i) { auto c = i <=> 5.0; return c != 0; } ++A bool f15 (double i) { auto c = i <=> 5.0; return c > 0; } ++A bool f16 (double i) { auto c = i <=> 5.0; return c < 0; } ++A bool f17 (double i) { auto c = i <=> 5.0; return c >= 0; } ++A bool f18 (double i) { auto c = i <=> 5.0; return c <= 0; } ++A bool f19 (double i) { auto c = i <=> 5.0; return c == std::partial_ordering::less; } ++A bool f20 (double i) { auto c = i <=> 5.0; return c != std::partial_ordering::less; } ++A bool f21 (double i) { auto c = i <=> 5.0; return c == std::partial_ordering::equivalent; } ++A bool f22 (double i) { auto c = i <=> 5.0; return c != std::partial_ordering::equivalent; } ++A bool f23 (double i) { auto c = i <=> 5.0; return c == std::partial_ordering::greater; } ++A bool f24 (double i) { auto c = i <=> 5.0; return c != std::partial_ordering::greater; } +diff --git a/gcc/testsuite/g++.dg/opt/pr94589-3.C b/gcc/testsuite/g++.dg/opt/pr94589-3.C +new file mode 100644 +index 000000000..725b81f56 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/opt/pr94589-3.C +@@ -0,0 +1,84 @@ ++// { dg-do run { target c++20 } } ++// { dg-options "-O2 -g" } ++ ++#include "pr94589-1.C" ++ ++#define C(fn, i, j, r) if (fn (i, j) != r) __builtin_abort () ++#define D(fn, i, r) if (fn (i) != r) __builtin_abort () ++ ++int ++main () ++{ ++ C (f1, 7, 8, false); ++ C (f1, 8, 8, true); ++ C (f1, 9, 8, false); ++ C (f2, 7, 8, true); ++ C (f2, 8, 8, false); ++ C (f2, 9, 8, true); ++ C (f3, 7, 8, false); ++ C (f3, 8, 8, false); ++ C (f3, 9, 8, true); ++ C (f4, 7, 8, true); ++ C (f4, 8, 8, false); ++ C (f4, 9, 8, false); ++ C (f5, 7, 8, false); ++ C (f5, 8, 8, true); ++ C (f5, 9, 8, true); ++ C (f6, 7, 8, true); ++ C (f6, 8, 8, true); ++ C (f6, 9, 8, false); ++ C (f7, 7, 8, true); ++ C (f7, 8, 8, false); ++ C (f7, 9, 8, false); ++ C (f8, 7, 8, false); ++ C (f8, 8, 8, true); ++ C (f8, 9, 8, true); ++ C (f9, 7, 8, false); ++ C (f9, 8, 8, true); ++ C (f9, 9, 8, false); ++ C (f10, 7, 8, true); ++ C (f10, 8, 8, false); ++ C (f10, 9, 8, true); ++ C (f11, 7, 8, false); ++ C (f11, 8, 8, false); ++ C (f11, 9, 8, true); ++ C (f12, 7, 8, true); ++ C (f12, 8, 8, true); ++ C (f12, 9, 8, false); ++ D (f13, 4, false); ++ D (f13, 5, true); ++ D (f13, 6, false); ++ D (f14, 4, true); ++ D (f14, 5, false); ++ D (f14, 6, true); ++ D (f15, 4, false); ++ D (f15, 5, false); ++ D (f15, 6, true); ++ D (f16, 4, true); ++ D (f16, 5, false); ++ D (f16, 6, false); ++ D (f17, 4, false); ++ D (f17, 5, true); ++ D (f17, 6, true); ++ D (f18, 4, true); ++ D (f18, 5, true); ++ D (f18, 6, false); ++ D (f19, 4, true); ++ D (f19, 5, false); ++ D (f19, 6, false); ++ D (f20, 4, false); ++ D (f20, 5, true); ++ D (f20, 6, true); ++ D (f21, 4, false); ++ D (f21, 5, true); ++ D (f21, 6, false); ++ D (f22, 4, true); ++ D (f22, 5, false); ++ D (f22, 6, true); ++ D (f23, 4, false); ++ D (f23, 5, false); ++ D (f23, 6, true); ++ D (f24, 4, true); ++ D (f24, 5, true); ++ D (f24, 6, false); ++} +diff --git a/gcc/testsuite/g++.dg/opt/pr94589-4.C b/gcc/testsuite/g++.dg/opt/pr94589-4.C +new file mode 100644 +index 000000000..256a45580 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/opt/pr94589-4.C +@@ -0,0 +1,84 @@ ++// { dg-do run { target c++20 } } ++// { dg-options "-O2 -g -ffast-math" } ++ ++#include "pr94589-2.C" ++ ++#define C(fn, i, j, r) if (fn (i, j) != r) __builtin_abort () ++#define D(fn, i, r) if (fn (i) != r) __builtin_abort () ++ ++int ++main () ++{ ++ C (f1, 7.0, 8.0, false); ++ C (f1, 8.0, 8.0, true); ++ C (f1, 9.0, 8.0, false); ++ C (f2, 7.0, 8.0, true); ++ C (f2, 8.0, 8.0, false); ++ C (f2, 9.0, 8.0, true); ++ C (f3, 7.0, 8.0, false); ++ C (f3, 8.0, 8.0, false); ++ C (f3, 9.0, 8.0, true); ++ C (f4, 7.0, 8.0, true); ++ C (f4, 8.0, 8.0, false); ++ C (f4, 9.0, 8.0, false); ++ C (f5, 7.0, 8.0, false); ++ C (f5, 8.0, 8.0, true); ++ C (f5, 9.0, 8.0, true); ++ C (f6, 7.0, 8.0, true); ++ C (f6, 8.0, 8.0, true); ++ C (f6, 9.0, 8.0, false); ++ C (f7, 7.0, 8.0, true); ++ C (f7, 8.0, 8.0, false); ++ C (f7, 9.0, 8.0, false); ++ C (f8, 7.0, 8.0, false); ++ C (f8, 8.0, 8.0, true); ++ C (f8, 9.0, 8.0, true); ++ C (f9, 7.0, 8.0, false); ++ C (f9, 8.0, 8.0, true); ++ C (f9, 9.0, 8.0, false); ++ C (f10, 7.0, 8.0, true); ++ C (f10, 8.0, 8.0, false); ++ C (f10, 9.0, 8.0, true); ++ C (f11, 7.0, 8.0, false); ++ C (f11, 8.0, 8.0, false); ++ C (f11, 9.0, 8.0, true); ++ C (f12, 7.0, 8.0, true); ++ C (f12, 8.0, 8.0, true); ++ C (f12, 9.0, 8.0, false); ++ D (f13, 4.0, false); ++ D (f13, 5.0, true); ++ D (f13, 6.0, false); ++ D (f14, 4.0, true); ++ D (f14, 5.0, false); ++ D (f14, 6.0, true); ++ D (f15, 4.0, false); ++ D (f15, 5.0, false); ++ D (f15, 6.0, true); ++ D (f16, 4.0, true); ++ D (f16, 5.0, false); ++ D (f16, 6.0, false); ++ D (f17, 4.0, false); ++ D (f17, 5.0, true); ++ D (f17, 6.0, true); ++ D (f18, 4.0, true); ++ D (f18, 5.0, true); ++ D (f18, 6.0, false); ++ D (f19, 4.0, true); ++ D (f19, 5.0, false); ++ D (f19, 6.0, false); ++ D (f20, 4.0, false); ++ D (f20, 5.0, true); ++ D (f20, 6.0, true); ++ D (f21, 4.0, false); ++ D (f21, 5.0, true); ++ D (f21, 6.0, false); ++ D (f22, 4.0, true); ++ D (f22, 5.0, false); ++ D (f22, 6.0, true); ++ D (f23, 4.0, false); ++ D (f23, 5.0, false); ++ D (f23, 6.0, true); ++ D (f24, 4.0, true); ++ D (f24, 5.0, true); ++ D (f24, 6.0, false); ++} +diff --git a/gcc/testsuite/gcc.dg/pr94589-1.c b/gcc/testsuite/gcc.dg/pr94589-1.c +new file mode 100644 +index 000000000..de404ea82 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr94589-1.c +@@ -0,0 +1,35 @@ ++/* PR tree-optimization/94589 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -g0 -fdump-tree-optimized" } */ ++/* { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[45]" 14 "optimized" } } */ ++ ++#define A __attribute__((noipa)) ++A int f1 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c == 0; } ++A int f2 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c != 0; } ++A int f3 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c > 0; } ++A int f4 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c < 0; } ++A int f5 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c >= 0; } ++A int f6 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c <= 0; } ++A int f7 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c == -1; } ++A int f8 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c != -1; } ++A int f9 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c > -1; } ++A int f10 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c <= -1; } ++A int f11 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c == 1; } ++A int f12 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c != 1; } ++A int f13 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c < 1; } ++A int f14 (int i, int j) { int c = i == j ? 0 : i < j ? -1 : 1; return c >= 1; } ++A int f15 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c == 0; } ++A int f16 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c != 0; } ++A int f17 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c > 0; } ++A int f18 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c < 0; } ++A int f19 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c >= 0; } ++A int f20 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c <= 0; } ++A int f21 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c == -1; } ++A int f22 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c != -1; } ++A int f23 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c > -1; } ++A int f24 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c <= -1; } ++A int f25 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c == 1; } ++A int f26 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c != 1; } ++A int f27 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c < 1; } ++A int f28 (int i) { int c = i == 5 ? 0 : i < 5 ? -1 : 1; return c >= 1; } +diff --git a/gcc/testsuite/gcc.dg/pr94589-2.c b/gcc/testsuite/gcc.dg/pr94589-2.c +new file mode 100644 +index 000000000..9481b764d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr94589-2.c +@@ -0,0 +1,35 @@ ++/* PR tree-optimization/94589 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -g0 -ffast-math -fdump-tree-optimized" } */ ++/* { dg-final { scan-tree-dump-times "\[ij]_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) \[ij]_\[0-9]+\\(D\\)" 14 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "i_\[0-9]+\\(D\\) (?:<|<=|==|!=|>|>=) 5\\.0" 14 "optimized" } } */ ++ ++#define A __attribute__((noipa)) ++A int f1 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c == 0; } ++A int f2 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c != 0; } ++A int f3 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c > 0; } ++A int f4 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c < 0; } ++A int f5 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c >= 0; } ++A int f6 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c <= 0; } ++A int f7 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c == -1; } ++A int f8 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c != -1; } ++A int f9 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c > -1; } ++A int f10 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c <= -1; } ++A int f11 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c == 1; } ++A int f12 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c != 1; } ++A int f13 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c < 1; } ++A int f14 (double i, double j) { int c; if (i == j) c = 0; else if (i < j) c = -1; else if (i > j) c = 1; else c = 2; return c >= 1; } ++A int f15 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c == 0; } ++A int f16 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c != 0; } ++A int f17 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c > 0; } ++A int f18 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c < 0; } ++A int f19 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c >= 0; } ++A int f20 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c <= 0; } ++A int f21 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c == -1; } ++A int f22 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c != -1; } ++A int f23 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c > -1; } ++A int f24 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c <= -1; } ++A int f25 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c == 1; } ++A int f26 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c != 1; } ++A int f27 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c < 1; } ++A int f28 (double i) { int c; if (i == 5.0) c = 0; else if (i < 5.0) c = -1; else if (i > 5.0) c = 1; else c = 2; return c >= 1; } +diff --git a/gcc/testsuite/gcc.dg/pr94589-3.c b/gcc/testsuite/gcc.dg/pr94589-3.c +new file mode 100644 +index 000000000..df82fab73 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr94589-3.c +@@ -0,0 +1,97 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -g" } */ ++ ++#include "pr94589-1.c" ++ ++#define C(fn, i, j, r) if (fn (i, j) != r) __builtin_abort () ++#define D(fn, i, r) if (fn (i) != r) __builtin_abort () ++ ++int ++main () ++{ ++ C (f1, 7, 8, 0); ++ C (f1, 8, 8, 1); ++ C (f1, 9, 8, 0); ++ C (f2, 7, 8, 1); ++ C (f2, 8, 8, 0); ++ C (f2, 9, 8, 1); ++ C (f3, 7, 8, 0); ++ C (f3, 8, 8, 0); ++ C (f3, 9, 8, 1); ++ C (f4, 7, 8, 1); ++ C (f4, 8, 8, 0); ++ C (f4, 9, 8, 0); ++ C (f5, 7, 8, 0); ++ C (f5, 8, 8, 1); ++ C (f5, 9, 8, 1); ++ C (f6, 7, 8, 1); ++ C (f6, 8, 8, 1); ++ C (f6, 9, 8, 0); ++ C (f7, 7, 8, 1); ++ C (f7, 8, 8, 0); ++ C (f7, 9, 8, 0); ++ C (f8, 7, 8, 0); ++ C (f8, 8, 8, 1); ++ C (f8, 9, 8, 1); ++ C (f9, 7, 8, 0); ++ C (f9, 8, 8, 1); ++ C (f9, 9, 8, 1); ++ C (f10, 7, 8, 1); ++ C (f10, 8, 8, 0); ++ C (f10, 9, 8, 0); ++ C (f11, 7, 8, 0); ++ C (f11, 8, 8, 0); ++ C (f11, 9, 8, 1); ++ C (f12, 7, 8, 1); ++ C (f12, 8, 8, 1); ++ C (f12, 9, 8, 0); ++ C (f13, 7, 8, 1); ++ C (f13, 8, 8, 1); ++ C (f13, 9, 8, 0); ++ C (f14, 7, 8, 0); ++ C (f14, 8, 8, 0); ++ C (f14, 9, 8, 1); ++ D (f15, 4, 0); ++ D (f15, 5, 1); ++ D (f15, 6, 0); ++ D (f16, 4, 1); ++ D (f16, 5, 0); ++ D (f16, 6, 1); ++ D (f17, 4, 0); ++ D (f17, 5, 0); ++ D (f17, 6, 1); ++ D (f18, 4, 1); ++ D (f18, 5, 0); ++ D (f18, 6, 0); ++ D (f19, 4, 0); ++ D (f19, 5, 1); ++ D (f19, 6, 1); ++ D (f20, 4, 1); ++ D (f20, 5, 1); ++ D (f20, 6, 0); ++ D (f21, 4, 1); ++ D (f21, 5, 0); ++ D (f21, 6, 0); ++ D (f22, 4, 0); ++ D (f22, 5, 1); ++ D (f22, 6, 1); ++ D (f23, 4, 0); ++ D (f23, 5, 1); ++ D (f23, 6, 1); ++ D (f24, 4, 1); ++ D (f24, 5, 0); ++ D (f24, 6, 0); ++ D (f25, 4, 0); ++ D (f25, 5, 0); ++ D (f25, 6, 1); ++ D (f26, 4, 1); ++ D (f26, 5, 1); ++ D (f26, 6, 0); ++ D (f27, 4, 1); ++ D (f27, 5, 1); ++ D (f27, 6, 0); ++ D (f28, 4, 0); ++ D (f28, 5, 0); ++ D (f28, 6, 1); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.dg/pr94589-4.c b/gcc/testsuite/gcc.dg/pr94589-4.c +new file mode 100644 +index 000000000..b2557fb07 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr94589-4.c +@@ -0,0 +1,97 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -g -ffast-math" } */ ++ ++#include "pr94589-2.c" ++ ++#define C(fn, i, j, r) if (fn (i, j) != r) __builtin_abort () ++#define D(fn, i, r) if (fn (i) != r) __builtin_abort () ++ ++int ++main () ++{ ++ C (f1, 7.0, 8.0, 0); ++ C (f1, 8.0, 8.0, 1); ++ C (f1, 9.0, 8.0, 0); ++ C (f2, 7.0, 8.0, 1); ++ C (f2, 8.0, 8.0, 0); ++ C (f2, 9.0, 8.0, 1); ++ C (f3, 7.0, 8.0, 0); ++ C (f3, 8.0, 8.0, 0); ++ C (f3, 9.0, 8.0, 1); ++ C (f4, 7.0, 8.0, 1); ++ C (f4, 8.0, 8.0, 0); ++ C (f4, 9.0, 8.0, 0); ++ C (f5, 7.0, 8.0, 0); ++ C (f5, 8.0, 8.0, 1); ++ C (f5, 9.0, 8.0, 1); ++ C (f6, 7.0, 8.0, 1); ++ C (f6, 8.0, 8.0, 1); ++ C (f6, 9.0, 8.0, 0); ++ C (f7, 7.0, 8.0, 1); ++ C (f7, 8.0, 8.0, 0); ++ C (f7, 9.0, 8.0, 0); ++ C (f8, 7.0, 8.0, 0); ++ C (f8, 8.0, 8.0, 1); ++ C (f8, 9.0, 8.0, 1); ++ C (f9, 7.0, 8.0, 0); ++ C (f9, 8.0, 8.0, 1); ++ C (f9, 9.0, 8.0, 1); ++ C (f10, 7.0, 8.0, 1); ++ C (f10, 8.0, 8.0, 0); ++ C (f10, 9.0, 8.0, 0); ++ C (f11, 7.0, 8.0, 0); ++ C (f11, 8.0, 8.0, 0); ++ C (f11, 9.0, 8.0, 1); ++ C (f12, 7.0, 8.0, 1); ++ C (f12, 8.0, 8.0, 1); ++ C (f12, 9.0, 8.0, 0); ++ C (f13, 7.0, 8.0, 1); ++ C (f13, 8.0, 8.0, 1); ++ C (f13, 9.0, 8.0, 0); ++ C (f14, 7.0, 8.0, 0); ++ C (f14, 8.0, 8.0, 0); ++ C (f14, 9.0, 8.0, 1); ++ D (f15, 4.0, 0); ++ D (f15, 5.0, 1); ++ D (f15, 6.0, 0); ++ D (f16, 4.0, 1); ++ D (f16, 5.0, 0); ++ D (f16, 6.0, 1); ++ D (f17, 4.0, 0); ++ D (f17, 5.0, 0); ++ D (f17, 6.0, 1); ++ D (f18, 4.0, 1); ++ D (f18, 5.0, 0); ++ D (f18, 6.0, 0); ++ D (f19, 4.0, 0); ++ D (f19, 5.0, 1); ++ D (f19, 6.0, 1); ++ D (f20, 4.0, 1); ++ D (f20, 5.0, 1); ++ D (f20, 6.0, 0); ++ D (f21, 4.0, 1); ++ D (f21, 5.0, 0); ++ D (f21, 6.0, 0); ++ D (f22, 4.0, 0); ++ D (f22, 5.0, 1); ++ D (f22, 6.0, 1); ++ D (f23, 4.0, 0); ++ D (f23, 5.0, 1); ++ D (f23, 6.0, 1); ++ D (f24, 4.0, 1); ++ D (f24, 5.0, 0); ++ D (f24, 6.0, 0); ++ D (f25, 4.0, 0); ++ D (f25, 5.0, 0); ++ D (f25, 6.0, 1); ++ D (f26, 4.0, 1); ++ D (f26, 5.0, 1); ++ D (f26, 6.0, 0); ++ D (f27, 4.0, 1); ++ D (f27, 5.0, 1); ++ D (f27, 6.0, 0); ++ D (f28, 4.0, 0); ++ D (f28, 5.0, 0); ++ D (f28, 6.0, 1); ++ return 0; ++} +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index b9cd07a60..fca32222f 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -63,6 +63,8 @@ static bool abs_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); + static bool xor_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); ++static bool spaceship_replacement (basic_block, basic_block, ++ edge, edge, gphi *, tree, tree); + static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, + edge, edge, gimple *, + tree, tree); +@@ -361,6 +363,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + cfgchanged = true; + else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; ++ else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) ++ cfgchanged = true; + } + } + +@@ -1753,6 +1757,426 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + ++/* Return true if the only executable statement in BB is a GIMPLE_COND. */ ++ ++static bool ++cond_only_block_p (basic_block bb) ++{ ++ /* BB must have no executable statements. */ ++ gimple_stmt_iterator gsi = gsi_after_labels (bb); ++ if (phi_nodes (bb)) ++ return false; ++ while (!gsi_end_p (gsi)) ++ { ++ gimple *stmt = gsi_stmt (gsi); ++ if (is_gimple_debug (stmt)) ++ ; ++ else if (gimple_code (stmt) == GIMPLE_NOP ++ || gimple_code (stmt) == GIMPLE_PREDICT ++ || gimple_code (stmt) == GIMPLE_COND) ++ ; ++ else ++ return false; ++ gsi_next (&gsi); ++ } ++ return true; ++} ++ ++/* Attempt to optimize (x <=> y) cmp 0 and similar comparisons. ++ For strong ordering <=> try to match something like: ++ : // cond3_bb (== cond2_bb) ++ if (x_4(D) != y_5(D)) ++ goto ; [INV] ++ else ++ goto ; [INV] ++ ++ : // cond_bb ++ if (x_4(D) < y_5(D)) ++ goto ; [INV] ++ else ++ goto ; [INV] ++ ++ : // middle_bb ++ ++ : // phi_bb ++ # iftmp.0_2 = PHI <1(4), 0(2), -1(3)> ++ _1 = iftmp.0_2 == 0; ++ ++ and for partial ordering <=> something like: ++ ++ : // cond3_bb ++ if (a_3(D) == b_5(D)) ++ goto ; [50.00%] ++ else ++ goto ; [50.00%] ++ ++ [local count: 536870913]: // cond2_bb ++ if (a_3(D) < b_5(D)) ++ goto ; [50.00%] ++ else ++ goto ; [50.00%] ++ ++ [local count: 268435456]: // cond_bb ++ if (a_3(D) > b_5(D)) ++ goto ; [50.00%] ++ else ++ goto ; [50.00%] ++ ++ [local count: 134217728]: // middle_bb ++ ++ [local count: 1073741824]: // phi_bb ++ # SR.27_4 = PHI <0(2), -1(3), 1(4), 2(5)> ++ _2 = SR.27_4 > 0; */ ++ ++static bool ++spaceship_replacement (basic_block cond_bb, basic_block middle_bb, ++ edge e0, edge e1, gphi *phi, ++ tree arg0, tree arg1) ++{ ++ if (!INTEGRAL_TYPE_P (TREE_TYPE (PHI_RESULT (phi))) ++ || TYPE_UNSIGNED (TREE_TYPE (PHI_RESULT (phi))) ++ || !tree_fits_shwi_p (arg0) ++ || !tree_fits_shwi_p (arg1) ++ || !IN_RANGE (tree_to_shwi (arg0), -1, 2) ++ || !IN_RANGE (tree_to_shwi (arg1), -1, 2)) ++ return false; ++ ++ basic_block phi_bb = gimple_bb (phi); ++ gcc_assert (phi_bb == e0->dest && phi_bb == e1->dest); ++ if (!IN_RANGE (EDGE_COUNT (phi_bb->preds), 3, 4)) ++ return false; ++ ++ use_operand_p use_p; ++ gimple *use_stmt; ++ if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi))) ++ return false; ++ if (!single_imm_use (PHI_RESULT (phi), &use_p, &use_stmt)) ++ return false; ++ enum tree_code cmp; ++ tree lhs, rhs; ++ if (gimple_code (use_stmt) == GIMPLE_COND) ++ { ++ cmp = gimple_cond_code (use_stmt); ++ lhs = gimple_cond_lhs (use_stmt); ++ rhs = gimple_cond_rhs (use_stmt); ++ } ++ else if (is_gimple_assign (use_stmt)) ++ { ++ if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) ++ { ++ cmp = gimple_assign_rhs_code (use_stmt); ++ lhs = gimple_assign_rhs1 (use_stmt); ++ rhs = gimple_assign_rhs2 (use_stmt); ++ } ++ else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR) ++ { ++ tree cond = gimple_assign_rhs1 (use_stmt); ++ if (!COMPARISON_CLASS_P (cond)) ++ return false; ++ cmp = TREE_CODE (cond); ++ lhs = TREE_OPERAND (cond, 0); ++ rhs = TREE_OPERAND (cond, 1); ++ } ++ else ++ return false; ++ } ++ else ++ return false; ++ switch (cmp) ++ { ++ case EQ_EXPR: ++ case NE_EXPR: ++ case LT_EXPR: ++ case GT_EXPR: ++ case LE_EXPR: ++ case GE_EXPR: ++ break; ++ default: ++ return false; ++ } ++ if (lhs != PHI_RESULT (phi) ++ || !tree_fits_shwi_p (rhs) ++ || !IN_RANGE (tree_to_shwi (rhs), -1, 1)) ++ return false; ++ ++ if (!empty_block_p (middle_bb)) ++ return false; ++ ++ gcond *cond1 = as_a (last_stmt (cond_bb)); ++ enum tree_code cmp1 = gimple_cond_code (cond1); ++ if (cmp1 != LT_EXPR && cmp1 != GT_EXPR) ++ return false; ++ tree lhs1 = gimple_cond_lhs (cond1); ++ tree rhs1 = gimple_cond_rhs (cond1); ++ /* The optimization may be unsafe due to NaNs. */ ++ if (HONOR_NANS (TREE_TYPE (lhs1))) ++ return false; ++ if (TREE_CODE (lhs1) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs1)) ++ return false; ++ if (TREE_CODE (rhs1) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)) ++ return false; ++ ++ if (!single_pred_p (cond_bb) || !cond_only_block_p (cond_bb)) ++ return false; ++ ++ basic_block cond2_bb = single_pred (cond_bb); ++ if (EDGE_COUNT (cond2_bb->succs) != 2) ++ return false; ++ edge cond2_phi_edge; ++ if (EDGE_SUCC (cond2_bb, 0)->dest == cond_bb) ++ { ++ if (EDGE_SUCC (cond2_bb, 1)->dest != phi_bb) ++ return false; ++ cond2_phi_edge = EDGE_SUCC (cond2_bb, 1); ++ } ++ else if (EDGE_SUCC (cond2_bb, 0)->dest != phi_bb) ++ return false; ++ else ++ cond2_phi_edge = EDGE_SUCC (cond2_bb, 0); ++ tree arg2 = gimple_phi_arg_def (phi, cond2_phi_edge->dest_idx); ++ if (!tree_fits_shwi_p (arg2)) ++ return false; ++ gimple *cond2 = last_stmt (cond2_bb); ++ if (cond2 == NULL || gimple_code (cond2) != GIMPLE_COND) ++ return false; ++ enum tree_code cmp2 = gimple_cond_code (cond2); ++ tree lhs2 = gimple_cond_lhs (cond2); ++ tree rhs2 = gimple_cond_rhs (cond2); ++ if (lhs2 == lhs1) ++ { ++ if (!operand_equal_p (rhs2, rhs1, 0)) ++ return false; ++ } ++ else if (lhs2 == rhs1) ++ { ++ if (rhs2 != lhs1) ++ return false; ++ } ++ else ++ return false; ++ ++ tree arg3 = arg2; ++ basic_block cond3_bb = cond2_bb; ++ edge cond3_phi_edge = cond2_phi_edge; ++ gimple *cond3 = cond2; ++ enum tree_code cmp3 = cmp2; ++ tree lhs3 = lhs2; ++ tree rhs3 = rhs2; ++ if (EDGE_COUNT (phi_bb->preds) == 4) ++ { ++ if (absu_hwi (tree_to_shwi (arg2)) != 1) ++ return false; ++ if (e1->flags & EDGE_TRUE_VALUE) ++ { ++ if (tree_to_shwi (arg0) != 2 ++ || absu_hwi (tree_to_shwi (arg1)) != 1 ++ || wi::to_widest (arg1) == wi::to_widest (arg2)) ++ return false; ++ } ++ else if (tree_to_shwi (arg1) != 2 ++ || absu_hwi (tree_to_shwi (arg0)) != 1 ++ || wi::to_widest (arg0) == wi::to_widest (arg1)) ++ return false; ++ if (cmp2 != LT_EXPR && cmp2 != GT_EXPR) ++ return false; ++ /* if (x < y) goto phi_bb; else fallthru; ++ if (x > y) goto phi_bb; else fallthru; ++ bbx:; ++ phi_bb:; ++ is ok, but if x and y are swapped in one of the comparisons, ++ or the comparisons are the same and operands not swapped, ++ or second goto phi_bb is not the true edge, it is not. */ ++ if ((lhs2 == lhs1) ++ ^ (cmp2 == cmp1) ++ ^ ((e1->flags & EDGE_TRUE_VALUE) != 0)) ++ return false; ++ if ((cond2_phi_edge->flags & EDGE_TRUE_VALUE) == 0) ++ return false; ++ if (!single_pred_p (cond2_bb) || !cond_only_block_p (cond2_bb)) ++ return false; ++ cond3_bb = single_pred (cond2_bb); ++ if (EDGE_COUNT (cond2_bb->succs) != 2) ++ return false; ++ if (EDGE_SUCC (cond3_bb, 0)->dest == cond2_bb) ++ { ++ if (EDGE_SUCC (cond3_bb, 1)->dest != phi_bb) ++ return false; ++ cond3_phi_edge = EDGE_SUCC (cond3_bb, 1); ++ } ++ else if (EDGE_SUCC (cond3_bb, 0)->dest != phi_bb) ++ return false; ++ else ++ cond3_phi_edge = EDGE_SUCC (cond3_bb, 0); ++ arg3 = gimple_phi_arg_def (phi, cond3_phi_edge->dest_idx); ++ cond3 = last_stmt (cond3_bb); ++ if (cond3 == NULL || gimple_code (cond3) != GIMPLE_COND) ++ return false; ++ cmp3 = gimple_cond_code (cond3); ++ lhs3 = gimple_cond_lhs (cond3); ++ rhs3 = gimple_cond_rhs (cond3); ++ if (lhs3 == lhs1) ++ { ++ if (!operand_equal_p (rhs3, rhs1, 0)) ++ return false; ++ } ++ else if (lhs3 == rhs1) ++ { ++ if (rhs3 != lhs1) ++ return false; ++ } ++ else ++ return false; ++ } ++ else if (absu_hwi (tree_to_shwi (arg0)) != 1 ++ || absu_hwi (tree_to_shwi (arg1)) != 1 ++ || wi::to_widest (arg0) == wi::to_widest (arg1)) ++ return false; ++ ++ if (!integer_zerop (arg3) || (cmp3 != EQ_EXPR && cmp3 != NE_EXPR)) ++ return false; ++ if ((cond3_phi_edge->flags & (cmp3 == EQ_EXPR ++ ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE)) == 0) ++ return false; ++ ++ /* lhs1 one_cmp rhs1 results in PHI_RESULT (phi) of 1. */ ++ enum tree_code one_cmp; ++ if ((cmp1 == LT_EXPR) ++ ^ (!integer_onep ((e1->flags & EDGE_TRUE_VALUE) ? arg1 : arg0))) ++ one_cmp = LT_EXPR; ++ else ++ one_cmp = GT_EXPR; ++ ++ enum tree_code res_cmp; ++ switch (cmp) ++ { ++ case EQ_EXPR: ++ if (integer_zerop (rhs)) ++ res_cmp = EQ_EXPR; ++ else if (integer_minus_onep (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR; ++ else if (integer_onep (rhs)) ++ res_cmp = one_cmp; ++ else ++ return false; ++ break; ++ case NE_EXPR: ++ if (integer_zerop (rhs)) ++ res_cmp = NE_EXPR; ++ else if (integer_minus_onep (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR; ++ else if (integer_onep (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR; ++ else ++ return false; ++ break; ++ case LT_EXPR: ++ if (integer_onep (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR; ++ else if (integer_zerop (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR; ++ else ++ return false; ++ break; ++ case LE_EXPR: ++ if (integer_zerop (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? GE_EXPR : LE_EXPR; ++ else if (integer_minus_onep (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? GT_EXPR : LT_EXPR; ++ else ++ return false; ++ break; ++ case GT_EXPR: ++ if (integer_minus_onep (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR; ++ else if (integer_zerop (rhs)) ++ res_cmp = one_cmp; ++ else ++ return false; ++ break; ++ case GE_EXPR: ++ if (integer_zerop (rhs)) ++ res_cmp = one_cmp == LT_EXPR ? LE_EXPR : GE_EXPR; ++ else if (integer_onep (rhs)) ++ res_cmp = one_cmp; ++ else ++ return false; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ ++ if (gimple_code (use_stmt) == GIMPLE_COND) ++ { ++ gcond *use_cond = as_a (use_stmt); ++ gimple_cond_set_code (use_cond, res_cmp); ++ gimple_cond_set_lhs (use_cond, lhs1); ++ gimple_cond_set_rhs (use_cond, rhs1); ++ } ++ else if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS) ++ { ++ gimple_assign_set_rhs_code (use_stmt, res_cmp); ++ gimple_assign_set_rhs1 (use_stmt, lhs1); ++ gimple_assign_set_rhs2 (use_stmt, rhs1); ++ } ++ else ++ { ++ tree cond = build2 (res_cmp, TREE_TYPE (gimple_assign_rhs1 (use_stmt)), ++ lhs1, rhs1); ++ gimple_assign_set_rhs1 (use_stmt, cond); ++ } ++ update_stmt (use_stmt); ++ ++ if (MAY_HAVE_DEBUG_BIND_STMTS) ++ { ++ use_operand_p use_p; ++ imm_use_iterator iter; ++ bool has_debug_uses = false; ++ FOR_EACH_IMM_USE_FAST (use_p, iter, PHI_RESULT (phi)) ++ { ++ gimple *use_stmt = USE_STMT (use_p); ++ gcc_assert (is_gimple_debug (use_stmt)); ++ has_debug_uses = true; ++ break; ++ } ++ ++ if (has_debug_uses) ++ { ++ /* If there are debug uses, emit something like: ++ # DEBUG D#1 => i_2(D) > j_3(D) ? 1 : -1 ++ # DEBUG D#2 => i_2(D) == j_3(D) ? 0 : D#1 ++ where > stands for the comparison that yielded 1 ++ and replace debug uses of phi result with that D#2. ++ Ignore the value of 2, because if NaNs aren't expected, ++ all floating point numbers should be comparable. */ ++ gimple_stmt_iterator gsi = gsi_after_labels (gimple_bb (phi)); ++ tree type = TREE_TYPE (PHI_RESULT (phi)); ++ tree temp1 = make_node (DEBUG_EXPR_DECL); ++ DECL_ARTIFICIAL (temp1) = 1; ++ TREE_TYPE (temp1) = type; ++ SET_DECL_MODE (temp1, TYPE_MODE (type)); ++ tree t = build2 (one_cmp, boolean_type_node, lhs1, rhs2); ++ t = build3 (COND_EXPR, type, t, build_one_cst (type), ++ build_int_cst (type, -1)); ++ gimple *g = gimple_build_debug_bind (temp1, t, phi); ++ gsi_insert_before (&gsi, g, GSI_SAME_STMT); ++ tree temp2 = make_node (DEBUG_EXPR_DECL); ++ DECL_ARTIFICIAL (temp2) = 1; ++ TREE_TYPE (temp2) = type; ++ SET_DECL_MODE (temp2, TYPE_MODE (type)); ++ t = build2 (EQ_EXPR, boolean_type_node, lhs1, rhs2); ++ t = build3 (COND_EXPR, type, t, build_zero_cst (type), temp1); ++ g = gimple_build_debug_bind (temp2, t, phi); ++ gsi_insert_before (&gsi, g, GSI_SAME_STMT); ++ replace_uses_by (PHI_RESULT (phi), temp2); ++ } ++ } ++ ++ gimple_stmt_iterator psi = gsi_for_stmt (phi); ++ remove_phi_node (&psi, true); ++ ++ return true; ++} ++ + /* Convert + + +-- +2.25.1 + diff --git a/0057-Backport-Add-support-for-__builtin_bswap128.patch b/0057-Backport-Add-support-for-__builtin_bswap128.patch new file mode 100644 index 0000000000000000000000000000000000000000..9b91e50a0d700494665ffc7e244bdc233bfc945e --- /dev/null +++ b/0057-Backport-Add-support-for-__builtin_bswap128.patch @@ -0,0 +1,253 @@ +From 96afd5b761a74e9eef40a2e843810c503c669de8 Mon Sep 17 00:00:00 2001 +From: Eric Botcazou +Date: Thu, 28 May 2020 00:31:15 +0200 +Subject: [PATCH 09/29] [Backport] Add support for __builtin_bswap128 + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=fe7ebef7fe4f9acb79658ed9db0749b07efc3105 + +This patch introduces a new builtin named __builtin_bswap128 on targets +where TImode is supported, i.e. 64-bit targets only in practice. The +implementation simply reuses the existing double word path in optab, so +no routine is added to libgcc (which means that you get two calls to +_bswapdi2 in the worst case). + +gcc/ChangeLog: + + * builtin-types.def (BT_UINT128): New primitive type. + (BT_FN_UINT128_UINT128): New function type. + * builtins.def (BUILT_IN_BSWAP128): New GCC builtin. + * doc/extend.texi (__builtin_bswap128): Document it. + * builtins.c (expand_builtin): Deal with BUILT_IN_BSWAP128. + (is_inexpensive_builtin): Likewise. + * fold-const-call.c (fold_const_call_ss): Likewise. + * fold-const.c (tree_call_nonnegative_warnv_p): Likewise. + * tree-ssa-ccp.c (evaluate_stmt): Likewise. + * tree-vect-stmts.c (vect_get_data_ptr_increment): Likewise. + (vectorizable_call): Likewise. + * optabs.c (expand_unop): Always use the double word path for it. + * tree-core.h (enum tree_index): Add TI_UINT128_TYPE. + * tree.h (uint128_type_node): New global type. + * tree.c (build_common_tree_nodes): Build it if TImode is supported. + +gcc/testsuite/ChangeLog: + + * gcc.dg/builtin-bswap-10.c: New test. + * gcc.dg/builtin-bswap-11.c: Likewise. + * gcc.dg/builtin-bswap-12.c: Likewise. + * gcc.target/i386/builtin-bswap-5.c: Likewise. +--- + gcc/builtin-types.def | 4 ++++ + gcc/builtins.c | 2 ++ + gcc/builtins.def | 2 ++ + gcc/doc/extend.texi | 10 ++++++++-- + gcc/fold-const-call.c | 1 + + gcc/fold-const.c | 2 ++ + gcc/optabs.c | 5 ++++- + gcc/tree-core.h | 1 + + gcc/tree-ssa-ccp.c | 1 + + gcc/tree-vect-stmts.c | 5 +++-- + gcc/tree.c | 2 ++ + gcc/tree.h | 1 + + 12 files changed, 31 insertions(+), 5 deletions(-) + +diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def +index c7aa691b2..c46b1bc5c 100644 +--- a/gcc/builtin-types.def ++++ b/gcc/builtin-types.def +@@ -73,6 +73,9 @@ DEF_PRIMITIVE_TYPE (BT_UINT8, unsigned_char_type_node) + DEF_PRIMITIVE_TYPE (BT_UINT16, uint16_type_node) + DEF_PRIMITIVE_TYPE (BT_UINT32, uint32_type_node) + DEF_PRIMITIVE_TYPE (BT_UINT64, uint64_type_node) ++DEF_PRIMITIVE_TYPE (BT_UINT128, uint128_type_node ++ ? uint128_type_node ++ : error_mark_node) + DEF_PRIMITIVE_TYPE (BT_WORD, (*lang_hooks.types.type_for_mode) (word_mode, 1)) + DEF_PRIMITIVE_TYPE (BT_UNWINDWORD, (*lang_hooks.types.type_for_mode) + (targetm.unwind_word_mode (), 1)) +@@ -300,6 +303,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_UINT8_FLOAT, BT_UINT8, BT_FLOAT) + DEF_FUNCTION_TYPE_1 (BT_FN_UINT16_UINT16, BT_UINT16, BT_UINT16) + DEF_FUNCTION_TYPE_1 (BT_FN_UINT32_UINT32, BT_UINT32, BT_UINT32) + DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_UINT64, BT_UINT64, BT_UINT64) ++DEF_FUNCTION_TYPE_1 (BT_FN_UINT128_UINT128, BT_UINT128, BT_UINT128) + DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_FLOAT, BT_UINT64, BT_FLOAT) + DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_INT, BT_BOOL, BT_INT) + DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_PTR, BT_BOOL, BT_PTR) +diff --git a/gcc/builtins.c b/gcc/builtins.c +index 10b6fd3bb..1b1c75cc1 100644 +--- a/gcc/builtins.c ++++ b/gcc/builtins.c +@@ -8015,6 +8015,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, + case BUILT_IN_BSWAP16: + case BUILT_IN_BSWAP32: + case BUILT_IN_BSWAP64: ++ case BUILT_IN_BSWAP128: + target = expand_builtin_bswap (target_mode, exp, target, subtarget); + if (target) + return target; +@@ -11732,6 +11733,7 @@ is_inexpensive_builtin (tree decl) + case BUILT_IN_BSWAP16: + case BUILT_IN_BSWAP32: + case BUILT_IN_BSWAP64: ++ case BUILT_IN_BSWAP128: + case BUILT_IN_CLZ: + case BUILT_IN_CLZIMAX: + case BUILT_IN_CLZL: +diff --git a/gcc/builtins.def b/gcc/builtins.def +index fa8b0641a..ee67ac15d 100644 +--- a/gcc/builtins.def ++++ b/gcc/builtins.def +@@ -834,6 +834,8 @@ DEF_GCC_BUILTIN (BUILT_IN_APPLY_ARGS, "apply_args", BT_FN_PTR_VAR, ATTR_L + DEF_GCC_BUILTIN (BUILT_IN_BSWAP16, "bswap16", BT_FN_UINT16_UINT16, ATTR_CONST_NOTHROW_LEAF_LIST) + DEF_GCC_BUILTIN (BUILT_IN_BSWAP32, "bswap32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LEAF_LIST) + DEF_GCC_BUILTIN (BUILT_IN_BSWAP64, "bswap64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LEAF_LIST) ++DEF_GCC_BUILTIN (BUILT_IN_BSWAP128, "bswap128", BT_FN_UINT128_UINT128, ATTR_CONST_NOTHROW_LEAF_LIST) ++ + DEF_EXT_LIB_BUILTIN (BUILT_IN_CLEAR_CACHE, "__clear_cache", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LEAF_LIST) + /* [trans-mem]: Adjust BUILT_IN_TM_CALLOC if BUILT_IN_CALLOC is changed. */ + DEF_LIB_BUILTIN (BUILT_IN_CALLOC, "calloc", BT_FN_PTR_SIZE_SIZE, ATTR_MALLOC_WARN_UNUSED_RESULT_SIZE_1_2_NOTHROW_LEAF_LIST) +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 9c7345959..a7bd772de 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -13727,14 +13727,20 @@ exactly 8 bits. + + @deftypefn {Built-in Function} uint32_t __builtin_bswap32 (uint32_t x) + Similar to @code{__builtin_bswap16}, except the argument and return types +-are 32 bit. ++are 32-bit. + @end deftypefn + + @deftypefn {Built-in Function} uint64_t __builtin_bswap64 (uint64_t x) + Similar to @code{__builtin_bswap32}, except the argument and return types +-are 64 bit. ++are 64-bit. + @end deftypefn + ++@deftypefn {Built-in Function} uint128_t __builtin_bswap128 (uint128_t x) ++Similar to @code{__builtin_bswap64}, except the argument and return types ++are 128-bit. Only supported on targets when 128-bit types are supported. ++@end deftypefn ++ ++ + @deftypefn {Built-in Function} Pmode __builtin_extend_pointer (void * x) + On targets where the user visible pointer size is smaller than the size + of an actual hardware address this function returns the extended user +diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c +index 6150d7ada..da01759d9 100644 +--- a/gcc/fold-const-call.c ++++ b/gcc/fold-const-call.c +@@ -1032,6 +1032,7 @@ fold_const_call_ss (wide_int *result, combined_fn fn, const wide_int_ref &arg, + case CFN_BUILT_IN_BSWAP16: + case CFN_BUILT_IN_BSWAP32: + case CFN_BUILT_IN_BSWAP64: ++ case CFN_BUILT_IN_BSWAP128: + *result = wide_int::from (arg, precision, TYPE_SIGN (arg_type)).bswap (); + return true; + +diff --git a/gcc/fold-const.c b/gcc/fold-const.c +index 6e635382f..78227a83d 100644 +--- a/gcc/fold-const.c ++++ b/gcc/fold-const.c +@@ -13889,8 +13889,10 @@ tree_call_nonnegative_warnv_p (tree type, combined_fn fn, tree arg0, tree arg1, + CASE_CFN_POPCOUNT: + CASE_CFN_CLZ: + CASE_CFN_CLRSB: ++ case CFN_BUILT_IN_BSWAP16: + case CFN_BUILT_IN_BSWAP32: + case CFN_BUILT_IN_BSWAP64: ++ case CFN_BUILT_IN_BSWAP128: + /* Always true. */ + return true; + +diff --git a/gcc/optabs.c b/gcc/optabs.c +index 049a18ceb..c3751fdf7 100644 +--- a/gcc/optabs.c ++++ b/gcc/optabs.c +@@ -2896,8 +2896,11 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target, + if (temp) + return temp; + ++ /* We do not provide a 128-bit bswap in libgcc so force the use of ++ a double bswap for 64-bit targets. */ + if (GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD +- && optab_handler (unoptab, word_mode) != CODE_FOR_nothing) ++ && (UNITS_PER_WORD == 64 ++ || optab_handler (unoptab, word_mode) != CODE_FOR_nothing)) + { + temp = expand_doubleword_bswap (mode, op0, target); + if (temp) +diff --git a/gcc/tree-core.h b/gcc/tree-core.h +index eb01c2434..058e046aa 100644 +--- a/gcc/tree-core.h ++++ b/gcc/tree-core.h +@@ -600,6 +600,7 @@ enum tree_index { + TI_UINT16_TYPE, + TI_UINT32_TYPE, + TI_UINT64_TYPE, ++ TI_UINT128_TYPE, + + TI_VOID, + +diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c +index 952fd9cd4..dcdf10369 100644 +--- a/gcc/tree-ssa-ccp.c ++++ b/gcc/tree-ssa-ccp.c +@@ -2005,6 +2005,7 @@ evaluate_stmt (gimple *stmt) + case BUILT_IN_BSWAP16: + case BUILT_IN_BSWAP32: + case BUILT_IN_BSWAP64: ++ case BUILT_IN_BSWAP128: + val = get_value_for_expr (gimple_call_arg (stmt, 0), true); + if (val.lattice_val == UNDEFINED) + break; +diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +index b872cfc8d..4636b7ba2 100644 +--- a/gcc/tree-vect-stmts.c ++++ b/gcc/tree-vect-stmts.c +@@ -3085,7 +3085,7 @@ vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type, + return iv_step; + } + +-/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */ ++/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */ + + static bool + vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, +@@ -3454,7 +3454,8 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + else if (modifier == NONE + && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16) + || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32) +- || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64))) ++ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64) ++ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128))) + return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node, + vectype_in, cost_vec); + else +diff --git a/gcc/tree.c b/gcc/tree.c +index 84a440b35..3e6647ae0 100644 +--- a/gcc/tree.c ++++ b/gcc/tree.c +@@ -10394,6 +10394,8 @@ build_common_tree_nodes (bool signed_char) + uint16_type_node = make_or_reuse_type (16, 1); + uint32_type_node = make_or_reuse_type (32, 1); + uint64_type_node = make_or_reuse_type (64, 1); ++ if (targetm.scalar_mode_supported_p (TImode)) ++ uint128_type_node = make_or_reuse_type (128, 1); + + /* Decimal float types. */ + if (targetm.decimal_float_supported_p ()) +diff --git a/gcc/tree.h b/gcc/tree.h +index 328a2d5d2..bddc6e528 100644 +--- a/gcc/tree.h ++++ b/gcc/tree.h +@@ -4035,6 +4035,7 @@ tree_strip_any_location_wrapper (tree exp) + #define uint16_type_node global_trees[TI_UINT16_TYPE] + #define uint32_type_node global_trees[TI_UINT32_TYPE] + #define uint64_type_node global_trees[TI_UINT64_TYPE] ++#define uint128_type_node global_trees[TI_UINT128_TYPE] + + #define void_node global_trees[TI_VOID] + +-- +2.25.1 + diff --git a/0058-Backport-tree-optimization-95393-fold-MIN-MAX_EXPR-g.patch b/0058-Backport-tree-optimization-95393-fold-MIN-MAX_EXPR-g.patch new file mode 100644 index 0000000000000000000000000000000000000000..a3c89ed7c3708a45ff6b1ac247df9cb00a4d2820 --- /dev/null +++ b/0058-Backport-tree-optimization-95393-fold-MIN-MAX_EXPR-g.patch @@ -0,0 +1,113 @@ +From b9ac0cc69aab3c8d662d5b0a9ed43d971c13ac70 Mon Sep 17 00:00:00 2001 +From: Richard Biener +Date: Fri, 29 May 2020 09:25:53 +0200 +Subject: [PATCH 10/29] [Backport] tree-optimization/95393 - fold MIN/MAX_EXPR + generated by phiopt + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=07852a81f58532c63a57631d7c3757fc6bcea17d + +This makes sure to fold generated stmts so they do not survive +until RTL expansion and cause awkward code generation. + +2020-05-29 Richard Biener + + PR tree-optimization/95393 + * tree-ssa-phiopt.c (minmax_replacement): Use gimple_build + to build the min/max expression so we simplify cases like + MAX(0, s) immediately. + + * gcc.dg/tree-ssa/phi-opt-21.c: New testcase. + * g++.dg/vect/slp-pr87105.cc: Adjust. +--- + gcc/testsuite/g++.dg/vect/slp-pr87105.cc | 2 +- + gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c | 15 +++++++++++++ + gcc/tree-ssa-phiopt.c | 25 +++++++++++----------- + 3 files changed, 29 insertions(+), 13 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c + +diff --git a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc +index 5518f319b..d07b1cd46 100644 +--- a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc ++++ b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc +@@ -102,4 +102,4 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept { + // { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } } + // It's a bit awkward to detect that all stores were vectorized but the + // following more or less does the trick +-// { dg-final { scan-tree-dump "vect_iftmp\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } } ++// { dg-final { scan-tree-dump "vect_\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } } +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c +new file mode 100644 +index 000000000..9f3d56957 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-phiopt4-details" } */ ++ ++int f(unsigned s) ++{ ++ int i; ++ for (i = 0; i < s; ++i) ++ ; ++ ++ return i; ++} ++ ++/* { dg-final { scan-tree-dump "converted to straightline code" "phiopt4" } } */ ++/* Make sure we fold the detected MAX. */ ++/* { dg-final { scan-tree-dump-not "MAX" "phiopt4" } } */ +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index fca32222f..269eda21c 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see + #include "tree-inline.h" + #include "case-cfn-macros.h" + #include "tree-eh.h" ++#include "gimple-fold.h" + #include "internal-fn.h" + + static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); +@@ -1414,7 +1415,6 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, + { + tree result, type, rhs; + gcond *cond; +- gassign *new_stmt; + edge true_edge, false_edge; + enum tree_code cmp, minmax, ass_code; + tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false; +@@ -1738,19 +1738,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, + gsi_move_before (&gsi_from, &gsi); + } + +- /* Create an SSA var to hold the min/max result. If we're the only +- things setting the target PHI, then we can clone the PHI +- variable. Otherwise we must create a new one. */ +- result = PHI_RESULT (phi); +- if (EDGE_COUNT (gimple_bb (phi)->preds) == 2) +- result = duplicate_ssa_name (result, NULL); +- else +- result = make_ssa_name (TREE_TYPE (result)); +- + /* Emit the statement to compute min/max. */ +- new_stmt = gimple_build_assign (result, minmax, arg0, arg1); ++ gimple_seq stmts = NULL; ++ tree phi_result = PHI_RESULT (phi); ++ result = gimple_build (&stmts, minmax, TREE_TYPE (phi_result), arg0, arg1); ++ /* Duplicate range info if we're the only things setting the target PHI. */ ++ if (!gimple_seq_empty_p (stmts) ++ && EDGE_COUNT (gimple_bb (phi)->preds) == 2 ++ && !POINTER_TYPE_P (TREE_TYPE (phi_result)) ++ && SSA_NAME_RANGE_INFO (phi_result)) ++ duplicate_ssa_name_range_info (result, SSA_NAME_RANGE_TYPE (phi_result), ++ SSA_NAME_RANGE_INFO (phi_result)); ++ + gsi = gsi_last_bb (cond_bb); +- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); ++ gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT); + + replace_phi_edge_with_variable (cond_bb, e1, phi, result); + +-- +2.25.1 + diff --git a/0059-Backport-Add-a-couple-of-A-CST1-CST2-match-and-simpl.patch b/0059-Backport-Add-a-couple-of-A-CST1-CST2-match-and-simpl.patch new file mode 100644 index 0000000000000000000000000000000000000000..b69e4c48d23684081e165427bd73354264b96009 --- /dev/null +++ b/0059-Backport-Add-a-couple-of-A-CST1-CST2-match-and-simpl.patch @@ -0,0 +1,91 @@ +From 9f3a8c600abe16f172b36d8113862e8f7aea940c Mon Sep 17 00:00:00 2001 +From: Andrew Pinski +Date: Sun, 16 May 2021 13:07:06 -0700 +Subject: [PATCH 11/29] [Backport] Add a couple of A?CST1:CST2 match and + simplify optimizations + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b6bdd7a4cb41ee057f2d064fffcb00f23ce6b497 + +Instead of some of the more manual optimizations inside phi-opt, +it would be good idea to do a lot of the heavy lifting inside match +and simplify instead. In the process, this moves the three simple +A?CST1:CST2 (where CST1 or CST2 is zero) simplifications. + +OK? Boostrapped and tested on x86_64-linux-gnu with no regressions. + +Differences from V1: +* Use bit_xor 1 instead of bit_not to fix the problem with boolean types +which are not 1 bit precision. + +Thanks, +Andrew Pinski + +gcc: + * match.pd (A?CST1:CST2): Add simplifcations for A?0:+-1, A?+-1:0, + A?POW2:0 and A?0:POW2. +--- + gcc/match.pd | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 48 insertions(+) + +diff --git a/gcc/match.pd b/gcc/match.pd +index 660d5c268..032830b0d 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3334,6 +3334,54 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (if (cst1 && cst2) + (vec_cond @0 { cst1; } { cst2; }))))) + ++/* A few simplifications of "a ? CST1 : CST2". */ ++/* NOTE: Only do this on gimple as the if-chain-to-switch ++ optimization depends on the gimple to have if statements in it. */ ++#if GIMPLE ++(simplify ++ (cond @0 INTEGER_CST@1 INTEGER_CST@2) ++ (switch ++ (if (integer_zerop (@2)) ++ (switch ++ /* a ? 1 : 0 -> a if 0 and 1 are integral types. */ ++ (if (integer_onep (@1)) ++ (convert (convert:boolean_type_node @0))) ++ /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */ ++ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1)) ++ (with { ++ tree shift = build_int_cst (integer_type_node, tree_log2 (@1)); ++ } ++ (lshift (convert (convert:boolean_type_node @0)) { shift; }))) ++ /* a ? -1 : 0 -> -a. No need to check the TYPE_PRECISION not being 1 ++ here as the powerof2cst case above will handle that case correctly. */ ++ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1)) ++ (negate (convert (convert:boolean_type_node @0)))))) ++ (if (integer_zerop (@1)) ++ (with { ++ tree booltrue = constant_boolean_node (true, boolean_type_node); ++ } ++ (switch ++ /* a ? 0 : 1 -> !a. */ ++ (if (integer_onep (@2)) ++ (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))) ++ /* a ? powerof2cst : 0 -> (!a) << (log2(powerof2cst)) */ ++ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2)) ++ (with { ++ tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); ++ } ++ (lshift (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )) ++ { shift; }))) ++ /* a ? -1 : 0 -> -(!a). No need to check the TYPE_PRECISION not being 1 ++ here as the powerof2cst case above will handle that case correctly. */ ++ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2)) ++ (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )))) ++ ) ++ ) ++ ) ++ ) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +-- +2.25.1 + diff --git a/0060-Backport-Optimize-x-0-y-y-to-x-31-y-in-match.pd.patch b/0060-Backport-Optimize-x-0-y-y-to-x-31-y-in-match.pd.patch new file mode 100644 index 0000000000000000000000000000000000000000..b12f8eb0b4ac187905ea6c2a701c892a91c7738f --- /dev/null +++ b/0060-Backport-Optimize-x-0-y-y-to-x-31-y-in-match.pd.patch @@ -0,0 +1,155 @@ +From 4352b952ba24c413697fcfc191d06165a8a31ced Mon Sep 17 00:00:00 2001 +From: Andrew Pinski +Date: Sat, 22 May 2021 19:49:50 +0000 +Subject: [PATCH 12/29] [Backport] Optimize x < 0 ? ~y : y to (x >> 31) ^ y in + match.pd + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=1fd76b24306ed4df4cf9e797d900699ed59ce7f7 + +This copies the optimization that is done in phiopt for +"x < 0 ? ~y : y to (x >> 31) ^ y" into match.pd. The code +for phiopt is kept around until phiopt uses match.pd (which +I am working towards). + +Note the original testcase is now optimized early on and I added a +new testcase to optimize during phiopt. + +OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. + +Thanks, +Andrew Pinski + +Differences from v1: +V2: Add check for integeral type to make sure vector types are not done. + +gcc: + * match.pd (x < 0 ? ~y : y): New patterns. + +gcc/testsuite: + * gcc.dg/tree-ssa/pr96928.c: Update test for slightly different IR. + * gcc.dg/tree-ssa/pr96928-1.c: New testcase. +--- + gcc/match.pd | 32 +++++++++++++++ + gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c | 48 +++++++++++++++++++++++ + gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 7 +++- + 3 files changed, 85 insertions(+), 2 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index 032830b0d..5899eea95 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4390,6 +4390,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (cmp (bit_and@2 @0 integer_pow2p@1) @1) + (icmp @2 { build_zero_cst (TREE_TYPE (@0)); }))) + ++(for cmp (ge lt) ++/* x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */ ++/* x >= 0 ? ~y : y into ~((x >> (prec-1)) ^ y). */ ++ (simplify ++ (cond (cmp @0 integer_zerop) (bit_not @1) @1) ++ (if (INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) ++ && !TYPE_UNSIGNED (TREE_TYPE (@0)) ++ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type)) ++ (with ++ { ++ tree shifter = build_int_cst (integer_type_node, TYPE_PRECISION (type) - 1); ++ } ++ (if (cmp == LT_EXPR) ++ (bit_xor (convert (rshift @0 {shifter;})) @1) ++ (bit_not (bit_xor (convert (rshift @0 {shifter;})) @1)))))) ++/* x < 0 ? y : ~y into ~((x >> (prec-1)) ^ y). */ ++/* x >= 0 ? y : ~y into (x >> (prec-1)) ^ y. */ ++ (simplify ++ (cond (cmp @0 integer_zerop) @1 (bit_not @1)) ++ (if (INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) ++ && !TYPE_UNSIGNED (TREE_TYPE (@0)) ++ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type)) ++ (with ++ { ++ tree shifter = build_int_cst (integer_type_node, TYPE_PRECISION (type) - 1); ++ } ++ (if (cmp == GE_EXPR) ++ (bit_xor (convert (rshift @0 {shifter;})) @1) ++ (bit_not (bit_xor (convert (rshift @0 {shifter;})) @1))))))) ++ + /* If we have (A & C) != 0 ? D : 0 where C and D are powers of 2, + convert this into a shift followed by ANDing with D. */ + (simplify +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c +new file mode 100644 +index 000000000..a2770e5e8 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c +@@ -0,0 +1,48 @@ ++/* PR tree-optimization/96928 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-phiopt2" } */ ++/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */ ++/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */ ++ ++int ++foo (int a) ++{ ++ if (a < 0) ++ return ~a; ++ return a; ++} ++ ++int ++bar (int a, int b) ++{ ++ if (a < 0) ++ return ~b; ++ return b; ++} ++ ++unsigned ++baz (int a, unsigned int b) ++{ ++ if (a < 0) ++ return ~b; ++ return b; ++} ++ ++unsigned ++qux (int a, unsigned int c) ++{ ++ if (a >= 0) ++ return ~c; ++ return c; ++} ++ ++int ++corge (int a, int b) ++{ ++ if (a >= 0) ++ return b; ++ return ~b; ++} +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c +index 209135726..e8fd82fc2 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c +@@ -1,8 +1,11 @@ + /* PR tree-optimization/96928 */ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdump-tree-phiopt2" } */ ++/* { dg-options "-O2 -fdump-tree-phiopt2 -fdump-tree-optimized" } */ + /* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */ +-/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */ ++/* The following check is done at optimized because a ^ (~b) is rewritten as ~(a^b) ++ and in the case of match.pd optimizing these ?:, the ~ is moved out already ++ by the time we get to phiopt2. */ ++/* { dg-final { scan-tree-dump-times "\\\^ c_\[0-9]*\\\(D\\\);" 1 "optimized" } } */ + /* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */ + /* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */ + /* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */ +-- +2.25.1 + diff --git a/0061-Backport-Replace-conditional_replacement-with-match-.patch b/0061-Backport-Replace-conditional_replacement-with-match-.patch new file mode 100644 index 0000000000000000000000000000000000000000..6cc10e2e173cea1961645b26b7ae5638f86e09a1 --- /dev/null +++ b/0061-Backport-Replace-conditional_replacement-with-match-.patch @@ -0,0 +1,249 @@ +From 406071e8c1838c824f06c35ef3cf9419aa543e6e Mon Sep 17 00:00:00 2001 +From: Andrew Pinski +Date: Tue, 1 Jun 2021 01:05:09 +0000 +Subject: [PATCH 13/29] [Backport] Replace conditional_replacement with match + and simplify + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=9f55df63154a39d67ef5b24def7044bf87300831 + +This is the first of series of patches to simplify phi-opt +to use match and simplify in many cases. This simplification +will more things to optimize. + +This is what Richard requested in +https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571197.html +and I think it is the right thing to do too. + +OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. + +gcc/ChangeLog: + + PR tree-optimization/25290 + * tree-ssa-phiopt.c (match_simplify_replacement): + New function. + (tree_ssa_phiopt_worker): Use match_simplify_replacement. + (two_value_replacement): Change the comment about + conditional_replacement. + (conditional_replacement): Delete. +--- + gcc/tree-ssa-phiopt.c | 144 ++++++++++++------------------------------ + 1 file changed, 39 insertions(+), 105 deletions(-) + +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 269eda21c..9fa6363b6 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -52,8 +52,8 @@ along with GCC; see the file COPYING3. If not see + static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); + static bool two_value_replacement (basic_block, basic_block, edge, gphi *, + tree, tree); +-static bool conditional_replacement (basic_block, basic_block, +- edge, edge, gphi *, tree, tree); ++static bool match_simplify_replacement (basic_block, basic_block, ++ edge, edge, gphi *, tree, tree); + static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree, + gimple *); + static int value_replacement (basic_block, basic_block, +@@ -349,8 +349,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p +- && conditional_replacement (bb, bb1, e1, e2, phi, +- arg0, arg1)) ++ && match_simplify_replacement (bb, bb1, e1, e2, phi, ++ arg0, arg1)) + cfgchanged = true; + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +@@ -662,7 +662,7 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb, + } + + /* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to +- conditional_replacement. */ ++ match_simplify_replacement. */ + if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE + && (integer_zerop (arg0) + || integer_zerop (arg1) +@@ -763,137 +763,71 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + +-/* The function conditional_replacement does the main work of doing the +- conditional replacement. Return true if the replacement is done. ++/* The function match_simplify_replacement does the main work of doing the ++ replacement using match and simplify. Return true if the replacement is done. + Otherwise return false. + BB is the basic block where the replacement is going to be done on. ARG0 + is argument 0 from PHI. Likewise for ARG1. */ + + static bool +-conditional_replacement (basic_block cond_bb, basic_block middle_bb, +- edge e0, edge e1, gphi *phi, +- tree arg0, tree arg1) ++match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, ++ edge e0, edge e1, gphi *phi, ++ tree arg0, tree arg1) + { +- tree result; + gimple *stmt; +- gassign *new_stmt; + tree cond; + gimple_stmt_iterator gsi; + edge true_edge, false_edge; +- tree new_var, new_var2; +- bool neg = false; +- int shift = 0; +- tree nonzero_arg; +- +- /* FIXME: Gimplification of complex type is too hard for now. */ +- /* We aren't prepared to handle vectors either (and it is a question +- if it would be worthwhile anyway). */ +- if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0)) +- || POINTER_TYPE_P (TREE_TYPE (arg0))) +- || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1)) +- || POINTER_TYPE_P (TREE_TYPE (arg1)))) +- return false; ++ gimple_seq seq = NULL; ++ tree result; + +- /* The PHI arguments have the constants 0 and 1, or 0 and -1 or +- 0 and (1 << cst), then convert it to the conditional. */ +- if (integer_zerop (arg0)) +- nonzero_arg = arg1; +- else if (integer_zerop (arg1)) +- nonzero_arg = arg0; +- else +- return false; +- if (integer_pow2p (nonzero_arg)) +- { +- shift = tree_log2 (nonzero_arg); +- if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg))) +- return false; +- } +- else if (integer_all_onesp (nonzero_arg)) +- neg = true; +- else ++ if (!empty_block_p (middle_bb)) + return false; + +- if (!empty_block_p (middle_bb)) ++ /* Special case A ? B : B as this will always simplify to B. */ ++ if (operand_equal_for_phi_arg_p (arg0, arg1)) + return false; + +- /* At this point we know we have a GIMPLE_COND with two successors. ++ /* At this point we know we have a GIMPLE_COND with two successors. + One successor is BB, the other successor is an empty block which + falls through into BB. + +- There is a single PHI node at the join point (BB) and its arguments +- are constants (0, 1) or (0, -1) or (0, (1 << shift)). +- +- So, given the condition COND, and the two PHI arguments, we can +- rewrite this PHI into non-branching code: ++ There is a single PHI node at the join point (BB). + +- dest = (COND) or dest = COND' or dest = (COND) << shift +- +- We use the condition as-is if the argument associated with the +- true edge has the value one or the argument associated with the +- false edge as the value zero. Note that those conditions are not +- the same since only one of the outgoing edges from the GIMPLE_COND +- will directly reach BB and thus be associated with an argument. */ ++ So, given the condition COND, and the two PHI arguments, match and simplify ++ can happen on (COND) ? arg0 : arg1. */ + + stmt = last_stmt (cond_bb); +- result = PHI_RESULT (phi); + + /* To handle special cases like floating point comparison, it is easier and + less error-prone to build a tree and gimplify it on the fly though it is +- less efficient. */ +- cond = fold_build2_loc (gimple_location (stmt), +- gimple_cond_code (stmt), boolean_type_node, +- gimple_cond_lhs (stmt), gimple_cond_rhs (stmt)); ++ less efficient. ++ Don't use fold_build2 here as that might create (bool)a instead of just ++ "a != 0". */ ++ cond = build2_loc (gimple_location (stmt), ++ gimple_cond_code (stmt), boolean_type_node, ++ gimple_cond_lhs (stmt), gimple_cond_rhs (stmt)); + + /* We need to know which is the true edge and which is the false + edge so that we know when to invert the condition below. */ + extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); +- if ((e0 == true_edge && integer_zerop (arg0)) +- || (e0 == false_edge && !integer_zerop (arg0)) +- || (e1 == true_edge && integer_zerop (arg1)) +- || (e1 == false_edge && !integer_zerop (arg1))) +- cond = fold_build1_loc (gimple_location (stmt), +- TRUTH_NOT_EXPR, TREE_TYPE (cond), cond); +- +- if (neg) +- { +- cond = fold_convert_loc (gimple_location (stmt), +- TREE_TYPE (result), cond); +- cond = fold_build1_loc (gimple_location (stmt), +- NEGATE_EXPR, TREE_TYPE (cond), cond); +- } +- else if (shift) +- { +- cond = fold_convert_loc (gimple_location (stmt), +- TREE_TYPE (result), cond); +- cond = fold_build2_loc (gimple_location (stmt), +- LSHIFT_EXPR, TREE_TYPE (cond), cond, +- build_int_cst (integer_type_node, shift)); +- } ++ if (e1 == true_edge || e0 == false_edge) ++ std::swap (arg0, arg1); + +- /* Insert our new statements at the end of conditional block before the +- COND_STMT. */ +- gsi = gsi_for_stmt (stmt); +- new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true, +- GSI_SAME_STMT); ++ tree type = TREE_TYPE (gimple_phi_result (phi)); ++ result = gimple_simplify (COND_EXPR, type, ++ cond, ++ arg0, arg1, ++ &seq, NULL); ++ if (!result) ++ return false; + +- if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var))) +- { +- location_t locus_0, locus_1; ++ gsi = gsi_last_bb (cond_bb); + +- new_var2 = make_ssa_name (TREE_TYPE (result)); +- new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var); +- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); +- new_var = new_var2; +- +- /* Set the locus to the first argument, unless is doesn't have one. */ +- locus_0 = gimple_phi_arg_location (phi, 0); +- locus_1 = gimple_phi_arg_location (phi, 1); +- if (locus_0 == UNKNOWN_LOCATION) +- locus_0 = locus_1; +- gimple_set_location (new_stmt, locus_0); +- } ++ if (seq) ++ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); + +- replace_phi_edge_with_variable (cond_bb, e1, phi, new_var); ++ replace_phi_edge_with_variable (cond_bb, e1, phi, result); + + /* Note that we optimized this PHI. */ + return true; +@@ -3905,7 +3839,7 @@ gate_hoist_loads (void) + Conditional Replacement + ----------------------- + +- This transformation, implemented in conditional_replacement, ++ This transformation, implemented in match_simplify_replacement, + replaces + + bb0: +-- +2.25.1 + diff --git a/0062-Backport-Allow-match-and-simplified-phiopt-to-run-in.patch b/0062-Backport-Allow-match-and-simplified-phiopt-to-run-in.patch new file mode 100644 index 0000000000000000000000000000000000000000..aff904ce73d804ea7d488321541cd282aedb3724 --- /dev/null +++ b/0062-Backport-Allow-match-and-simplified-phiopt-to-run-in.patch @@ -0,0 +1,174 @@ +From fabbe6ccc798d3cb097c6371b4d53cd6dfde6c7c Mon Sep 17 00:00:00 2001 +From: Andrew Pinski +Date: Fri, 11 Jun 2021 13:21:34 -0700 +Subject: [PATCH 14/29] [Backport] Allow match-and-simplified phiopt to run in + early phiopt + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=cd48e550d1dc58307ab1c0ab490745673f748ccc + +To move a few things more to match-and-simplify from phiopt, +we need to allow match_simplify_replacement to run in early +phiopt. To do this we add a replacement for gimple_simplify +that is explictly for phiopt. + +OK? Bootstrapped and tested on x86_64-linux-gnu with no +regressions. + +gcc/ChangeLog: + + * tree-ssa-phiopt.c (match_simplify_replacement): + Add early_p argument. Call gimple_simplify_phiopt + instead of gimple_simplify. + (tree_ssa_phiopt_worker): Update call to + match_simplify_replacement and allow unconditionally. + (phiopt_early_allow): New function. + (gimple_simplify_phiopt): New function. +--- + gcc/tree-ssa-phiopt.c | 89 ++++++++++++++++++++++++++++++++++--------- + 1 file changed, 70 insertions(+), 19 deletions(-) + +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 9fa6363b6..92aeb8415 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -48,12 +48,13 @@ along with GCC; see the file COPYING3. If not see + #include "tree-eh.h" + #include "gimple-fold.h" + #include "internal-fn.h" ++#include "gimple-match.h" + + static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); + static bool two_value_replacement (basic_block, basic_block, edge, gphi *, + tree, tree); + static bool match_simplify_replacement (basic_block, basic_block, +- edge, edge, gphi *, tree, tree); ++ edge, edge, gphi *, tree, tree, bool); + static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree, + gimple *); + static int value_replacement (basic_block, basic_block, +@@ -348,9 +349,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + /* Do the replacement of conditional if it can be done. */ + if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1)) + cfgchanged = true; +- else if (!early_p +- && match_simplify_replacement (bb, bb1, e1, e2, phi, +- arg0, arg1)) ++ else if (match_simplify_replacement (bb, bb1, e1, e2, phi, ++ arg0, arg1, ++ early_p)) + cfgchanged = true; + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +@@ -763,6 +764,67 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + ++/* Return TRUE if CODE should be allowed during early phiopt. ++ Currently this is to allow MIN/MAX and ABS/NEGATE. */ ++static bool ++phiopt_early_allow (enum tree_code code) ++{ ++ switch (code) ++ { ++ case MIN_EXPR: ++ case MAX_EXPR: ++ case ABS_EXPR: ++ case ABSU_EXPR: ++ case NEGATE_EXPR: ++ case SSA_NAME: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++/* gimple_simplify_phiopt is like gimple_simplify but designed for PHIOPT. ++ Return NULL if nothing can be simplified or the resulting simplified value ++ with parts pushed if EARLY_P was true. Also rejects non allowed tree code ++ if EARLY_P is set. ++ Takes the comparison from COMP_STMT and two args, ARG0 and ARG1 and tries ++ to simplify CMP ? ARG0 : ARG1. */ ++static tree ++gimple_simplify_phiopt (bool early_p, tree type, gimple *comp_stmt, ++ tree arg0, tree arg1, ++ gimple_seq *seq) ++{ ++ tree result; ++ enum tree_code comp_code = gimple_cond_code (comp_stmt); ++ location_t loc = gimple_location (comp_stmt); ++ tree cmp0 = gimple_cond_lhs (comp_stmt); ++ tree cmp1 = gimple_cond_rhs (comp_stmt); ++ /* To handle special cases like floating point comparison, it is easier and ++ less error-prone to build a tree and gimplify it on the fly though it is ++ less efficient. ++ Don't use fold_build2 here as that might create (bool)a instead of just ++ "a != 0". */ ++ tree cond = build2_loc (loc, comp_code, boolean_type_node, ++ cmp0, cmp1); ++ gimple_match_op op (gimple_match_cond::UNCOND, ++ COND_EXPR, type, cond, arg0, arg1); ++ ++ if (op.resimplify (early_p ? NULL : seq, follow_all_ssa_edges)) ++ { ++ /* Early we want only to allow some generated tree codes. */ ++ if (!early_p ++ || op.code.is_tree_code () ++ || phiopt_early_allow ((tree_code)op.code)) ++ { ++ result = maybe_push_res_to_seq (&op, seq); ++ if (result) ++ return result; ++ } ++ } ++ ++ return NULL; ++} ++ + /* The function match_simplify_replacement does the main work of doing the + replacement using match and simplify. Return true if the replacement is done. + Otherwise return false. +@@ -772,10 +834,9 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb, + static bool + match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + edge e0, edge e1, gphi *phi, +- tree arg0, tree arg1) ++ tree arg0, tree arg1, bool early_p) + { + gimple *stmt; +- tree cond; + gimple_stmt_iterator gsi; + edge true_edge, false_edge; + gimple_seq seq = NULL; +@@ -799,15 +860,6 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + + stmt = last_stmt (cond_bb); + +- /* To handle special cases like floating point comparison, it is easier and +- less error-prone to build a tree and gimplify it on the fly though it is +- less efficient. +- Don't use fold_build2 here as that might create (bool)a instead of just +- "a != 0". */ +- cond = build2_loc (gimple_location (stmt), +- gimple_cond_code (stmt), boolean_type_node, +- gimple_cond_lhs (stmt), gimple_cond_rhs (stmt)); +- + /* We need to know which is the true edge and which is the false + edge so that we know when to invert the condition below. */ + extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); +@@ -815,10 +867,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + std::swap (arg0, arg1); + + tree type = TREE_TYPE (gimple_phi_result (phi)); +- result = gimple_simplify (COND_EXPR, type, +- cond, +- arg0, arg1, +- &seq, NULL); ++ result = gimple_simplify_phiopt (early_p, type, stmt, ++ arg0, arg1, ++ &seq); + if (!result) + return false; + +-- +2.25.1 + diff --git a/0063-Backport-Improve-match_simplify_replacement-in-phi-o.patch b/0063-Backport-Improve-match_simplify_replacement-in-phi-o.patch new file mode 100644 index 0000000000000000000000000000000000000000..7605e75ce058d681e1120b60196c95fda8254104 --- /dev/null +++ b/0063-Backport-Improve-match_simplify_replacement-in-phi-o.patch @@ -0,0 +1,259 @@ +From d212d216be0752370dbe7bc63bd75b3a9249e0b5 Mon Sep 17 00:00:00 2001 +From: Andrew Pinski +Date: Tue, 1 Jun 2021 06:48:05 +0000 +Subject: [PATCH 15/29] [Backport] Improve match_simplify_replacement in + phi-opt + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=c4574d23cb07340918793a5a98ae7bb2988b3791 + +This improves match_simplify_replace in phi-opt to handle the +case where there is one cheap (non-call) preparation statement in the +middle basic block similar to xor_replacement and others. +This allows to remove xor_replacement which it does too. + +OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. + +Thanks, +Andrew Pinski + +Changes since v1: +v3 - Just minor changes to using gimple_assign_lhs +instead of gimple_lhs and fixing a comment. +v2 - change the check on the preparation statement to +allow only assignments and no calls and only assignments +that feed into the phi. + +gcc/ChangeLog: + + PR tree-optimization/25290 + * tree-ssa-phiopt.c (xor_replacement): Delete. + (tree_ssa_phiopt_worker): Delete use of xor_replacement. + (match_simplify_replacement): Allow one cheap preparation + statement that can be moved to before the if. + +gcc/testsuite/ChangeLog: + + * gcc.dg/tree-ssa/pr96928-1.c: Fix testcase for now that ~ + happens on the outside of the bit_xor. +--- + gcc/tree-ssa-phiopt.c | 164 ++++++++++++++---------------------------- + 1 file changed, 52 insertions(+), 112 deletions(-) + +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 92aeb8415..51a2d3684 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see + #include "cfghooks.h" + #include "tree-pass.h" + #include "ssa.h" ++#include "tree-ssa.h" + #include "optabs-tree.h" + #include "insn-config.h" + #include "gimple-pretty-print.h" +@@ -63,8 +64,6 @@ static bool minmax_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); + static bool abs_replacement (basic_block, basic_block, + edge, edge, gimple *, tree, tree); +-static bool xor_replacement (basic_block, basic_block, +- edge, edge, gimple *, tree, tree); + static bool spaceship_replacement (basic_block, basic_block, + edge, edge, gphi *, tree, tree); + static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, +@@ -355,9 +354,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + cfgchanged = true; + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +- else if (!early_p +- && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) +- cfgchanged = true; + else if (!early_p + && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, + e2, phi, arg0, +@@ -841,14 +837,51 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + edge true_edge, false_edge; + gimple_seq seq = NULL; + tree result; +- +- if (!empty_block_p (middle_bb)) +- return false; ++ gimple *stmt_to_move = NULL; + + /* Special case A ? B : B as this will always simplify to B. */ + if (operand_equal_for_phi_arg_p (arg0, arg1)) + return false; + ++ /* If the basic block only has a cheap preparation statement, ++ allow it and move it once the transformation is done. */ ++ if (!empty_block_p (middle_bb)) ++ { ++ stmt_to_move = last_and_only_stmt (middle_bb); ++ if (!stmt_to_move) ++ return false; ++ ++ if (gimple_vuse (stmt_to_move)) ++ return false; ++ ++ if (gimple_could_trap_p (stmt_to_move) ++ || gimple_has_side_effects (stmt_to_move)) ++ return false; ++ ++ if (gimple_uses_undefined_value_p (stmt_to_move)) ++ return false; ++ ++ /* Allow assignments and not no calls. ++ As const calls don't match any of the above, yet they could ++ still have some side-effects - they could contain ++ gimple_could_trap_p statements, like floating point ++ exceptions or integer division by zero. See PR70586. ++ FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p ++ should handle this. */ ++ if (!is_gimple_assign (stmt_to_move)) ++ return false; ++ ++ tree lhs = gimple_assign_lhs (stmt_to_move); ++ gimple *use_stmt; ++ use_operand_p use_p; ++ ++ /* Allow only a statement which feeds into the phi. */ ++ if (!lhs || TREE_CODE (lhs) != SSA_NAME ++ || !single_imm_use (lhs, &use_p, &use_stmt) ++ || use_stmt != phi) ++ return false; ++ } ++ + /* At this point we know we have a GIMPLE_COND with two successors. + One successor is BB, the other successor is an empty block which + falls through into BB. +@@ -874,7 +907,17 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + return false; + + gsi = gsi_last_bb (cond_bb); +- ++ if (stmt_to_move) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "statement un-sinked:\n"); ++ print_gimple_stmt (dump_file, stmt_to_move, 0, ++ TDF_VOPS|TDF_MEMSYMS); ++ } ++ gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt_to_move); ++ gsi_move_before (&gsi1, &gsi); ++ } + if (seq) + gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); + +@@ -2474,109 +2517,6 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + +-/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */ +- +-static bool +-xor_replacement (basic_block cond_bb, basic_block middle_bb, +- edge e0 ATTRIBUTE_UNUSED, edge e1, +- gimple *phi, tree arg0, tree arg1) +-{ +- if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1))) +- return false; +- +- /* OTHER_BLOCK must have only one executable statement which must have the +- form arg0 = ~arg1 or arg1 = ~arg0. */ +- +- gimple *assign = last_and_only_stmt (middle_bb); +- /* If we did not find the proper one's complement assignment, then we cannot +- optimize. */ +- if (assign == NULL) +- return false; +- +- /* If we got here, then we have found the only executable statement +- in OTHER_BLOCK. If it is anything other than arg = ~arg1 or +- arg1 = ~arg0, then we cannot optimize. */ +- if (!is_gimple_assign (assign)) +- return false; +- +- if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR) +- return false; +- +- tree lhs = gimple_assign_lhs (assign); +- tree rhs = gimple_assign_rhs1 (assign); +- +- /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ +- if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0)) +- return false; +- +- gimple *cond = last_stmt (cond_bb); +- tree result = PHI_RESULT (phi); +- +- /* Only relationals comparing arg[01] against zero are interesting. */ +- enum tree_code cond_code = gimple_cond_code (cond); +- if (cond_code != LT_EXPR && cond_code != GE_EXPR) +- return false; +- +- /* Make sure the conditional is x OP 0. */ +- tree clhs = gimple_cond_lhs (cond); +- if (TREE_CODE (clhs) != SSA_NAME +- || !INTEGRAL_TYPE_P (TREE_TYPE (clhs)) +- || TYPE_UNSIGNED (TREE_TYPE (clhs)) +- || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1)) +- || !integer_zerop (gimple_cond_rhs (cond))) +- return false; +- +- /* We need to know which is the true edge and which is the false +- edge so that we know if have xor or inverted xor. */ +- edge true_edge, false_edge; +- extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); +- +- /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we +- will need to invert the result. Similarly for LT_EXPR if +- the false edge goes to OTHER_BLOCK. */ +- edge e; +- if (cond_code == GE_EXPR) +- e = true_edge; +- else +- e = false_edge; +- +- bool invert = e->dest == middle_bb; +- +- result = duplicate_ssa_name (result, NULL); +- +- gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); +- +- int prec = TYPE_PRECISION (TREE_TYPE (clhs)); +- gimple *new_stmt +- = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs, +- build_int_cst (integer_type_node, prec - 1)); +- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); +- +- if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs))) +- { +- new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)), +- NOP_EXPR, gimple_assign_lhs (new_stmt)); +- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); +- } +- lhs = gimple_assign_lhs (new_stmt); +- +- if (invert) +- { +- new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)), +- BIT_NOT_EXPR, rhs); +- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); +- rhs = gimple_assign_lhs (new_stmt); +- } +- +- new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs); +- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); +- +- replace_phi_edge_with_variable (cond_bb, e1, phi, result); +- +- /* Note that we optimized this PHI. */ +- return true; +-} +- + /* Auxiliary functions to determine the set of memory accesses which + can't trap because they are preceded by accesses to the same memory + portion. We do that for MEM_REFs, so we only need to track +-- +2.25.1 + diff --git a/0064-Backport-phiopt-Use-gphi-phi-instead-of-gimple-phi-s.patch b/0064-Backport-phiopt-Use-gphi-phi-instead-of-gimple-phi-s.patch new file mode 100644 index 0000000000000000000000000000000000000000..9c2c67f267eb8fc608e9d1ada8ad819f291bb65c --- /dev/null +++ b/0064-Backport-phiopt-Use-gphi-phi-instead-of-gimple-phi-s.patch @@ -0,0 +1,103 @@ +From 0d55d24aa4e47c40f74e0281d023089cfaafcf74 Mon Sep 17 00:00:00 2001 +From: Jakub Jelinek +Date: Thu, 6 May 2021 14:05:06 +0200 +Subject: [PATCH 16/29] [Backport] phiopt: Use gphi *phi instead of gimple *phi + some more + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=cfd65e8d5299a7cf7d2ecd92b0e24ea4cfb697d9 + +Various functions in phiopt are also called with a gphi * but use +gimple * argument for it. + +2021-05-06 Jakub Jelinek + + * tree-ssa-phiopt.c (value_replacement, minmax_replacement, + abs_replacement, xor_replacement, + cond_removal_in_popcount_clz_ctz_pattern, + replace_phi_edge_with_variable): Change type of phi argument from + gimple * to gphi *. +--- + gcc/tree-ssa-phiopt.c | 22 ++++++++++------------ + 1 file changed, 10 insertions(+), 12 deletions(-) + +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 51a2d3684..045a7b1b8 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -59,21 +59,21 @@ static bool match_simplify_replacement (basic_block, basic_block, + static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree, + gimple *); + static int value_replacement (basic_block, basic_block, +- edge, edge, gimple *, tree, tree); ++ edge, edge, gphi *, tree, tree); + static bool minmax_replacement (basic_block, basic_block, +- edge, edge, gimple *, tree, tree); ++ edge, edge, gphi *, tree, tree); + static bool abs_replacement (basic_block, basic_block, +- edge, edge, gimple *, tree, tree); ++ edge, edge, gphi *, tree, tree); + static bool spaceship_replacement (basic_block, basic_block, + edge, edge, gphi *, tree, tree); + static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, +- edge, edge, gimple *, ++ edge, edge, gphi *, + tree, tree); + static bool cond_store_replacement (basic_block, basic_block, edge, edge, + hash_set *); + static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); + static hash_set * get_non_trapping (); +-static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree); ++static void replace_phi_edge_with_variable (basic_block, edge, gphi *, tree); + static void hoist_adjacent_loads (basic_block, basic_block, + basic_block, basic_block); + static bool do_phiopt_pattern (basic_block, basic_block, basic_block); +@@ -389,7 +389,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + + static void + replace_phi_edge_with_variable (basic_block cond_block, +- edge e, gimple *phi, tree new_tree) ++ edge e, gphi *phi, tree new_tree) + { + basic_block bb = gimple_bb (phi); + basic_block block_to_remove; +@@ -1129,8 +1129,7 @@ absorbing_element_p (tree_code code, tree arg, bool right, tree rval) + + static int + value_replacement (basic_block cond_bb, basic_block middle_bb, +- edge e0, edge e1, gimple *phi, +- tree arg0, tree arg1) ++ edge e0, edge e1, gphi *phi, tree arg0, tree arg1) + { + gimple_stmt_iterator gsi; + gimple *cond; +@@ -1438,8 +1437,7 @@ value_replacement (basic_block cond_bb, basic_block middle_bb, + + static bool + minmax_replacement (basic_block cond_bb, basic_block middle_bb, +- edge e0, edge e1, gimple *phi, +- tree arg0, tree arg1) ++ edge e0, edge e1, gphi *phi, tree arg0, tree arg1) + { + tree result, type, rhs; + gcond *cond; +@@ -2240,7 +2238,7 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb, + static bool + cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + basic_block middle_bb, +- edge e1, edge e2, gimple *phi, ++ edge e1, edge e2, gphi *phi, + tree arg0, tree arg1) + { + gimple *cond; +@@ -2398,7 +2396,7 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + static bool + abs_replacement (basic_block cond_bb, basic_block middle_bb, + edge e0 ATTRIBUTE_UNUSED, edge e1, +- gimple *phi, tree arg0, tree arg1) ++ gphi *phi, tree arg0, tree arg1) + { + tree result; + gassign *new_stmt; +-- +2.25.1 + diff --git a/0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch b/0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch new file mode 100644 index 0000000000000000000000000000000000000000..03b41de05dd177951333409b5dcad6754347ac39 --- /dev/null +++ b/0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch @@ -0,0 +1,212 @@ +From 33dc778a34d7b93978efe922bb1b4583d8e6c4bb Mon Sep 17 00:00:00 2001 +From: Roger Sayle +Date: Mon, 2 Aug 2021 13:27:53 +0100 +Subject: [PATCH 17/29] [Backport] Optimize x ? bswap(x) : 0 in tree-ssa-phiopt + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f9fcf754825a1e01033336f84c18690aaa971a6f + +Many thanks again to Jakub Jelinek for a speedy fix for PR 101642. +Interestingly, that test case "bswap16(x) ? : x" also reveals a +missed optimization opportunity. The resulting "x ? bswap(x) : 0" +can be further simplified to just bswap(x). + +Conveniently, tree-ssa-phiopt.c already recognizes/optimizes the +related "x ? popcount(x) : 0", so this patch simply makes that +transformation make general, additionally handling bswap, parity, +ffs and clrsb. All of the required infrastructure is already +present thanks to Jakub previously adding support for clz/ctz. +To reflect this generalization, the name of the function is changed +from cond_removal_in_popcount_clz_ctz_pattern to the hopefully +equally descriptive cond_removal_in_builtin_zero_pattern. + +2021-08-02 Roger Sayle + +gcc/ChangeLog + * tree-ssa-phiopt.c (cond_removal_in_builtin_zero_pattern): + Renamed from cond_removal_in_popcount_clz_ctz_pattern. + Add support for BSWAP, FFS, PARITY and CLRSB builtins. + (tree_ssa_phiop_worker): Update call to function above. + +gcc/testsuite/ChangeLog + * gcc.dg/tree-ssa/phi-opt-25.c: New test case. +--- + gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c | 83 ++++++++++++++++++++++ + gcc/tree-ssa-phiopt.c | 37 +++++++--- + 2 files changed, 109 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c +new file mode 100644 +index 000000000..c52c92e1d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c +@@ -0,0 +1,83 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-optimized" } */ ++ ++unsigned short test_bswap16(unsigned short x) ++{ ++ return x ? __builtin_bswap16(x) : 0; ++} ++ ++unsigned int test_bswap32(unsigned int x) ++{ ++ return x ? __builtin_bswap32(x) : 0; ++} ++ ++unsigned long long test_bswap64(unsigned long long x) ++{ ++ return x ? __builtin_bswap64(x) : 0; ++} ++ ++int test_clrsb(int x) ++{ ++ return x ? __builtin_clrsb(x) : (__SIZEOF_INT__*8-1); ++} ++ ++int test_clrsbl(long x) ++{ ++ return x ? __builtin_clrsbl(x) : (__SIZEOF_LONG__*8-1); ++} ++ ++int test_clrsbll(long long x) ++{ ++ return x ? __builtin_clrsbll(x) : (__SIZEOF_LONG_LONG__*8-1); ++} ++ ++#if 0 ++/* BUILT_IN_FFS is transformed by match.pd */ ++int test_ffs(unsigned int x) ++{ ++ return x ? __builtin_ffs(x) : 0; ++} ++ ++int test_ffsl(unsigned long x) ++{ ++ return x ? __builtin_ffsl(x) : 0; ++} ++ ++int test_ffsll(unsigned long long x) ++{ ++ return x ? __builtin_ffsll(x) : 0; ++} ++#endif ++ ++int test_parity(int x) ++{ ++ return x ? __builtin_parity(x) : 0; ++} ++ ++int test_parityl(long x) ++{ ++ return x ? __builtin_parityl(x) : 0; ++} ++ ++int test_parityll(long long x) ++{ ++ return x ? __builtin_parityll(x) : 0; ++} ++ ++int test_popcount(int x) ++{ ++ return x ? __builtin_popcount(x) : 0; ++} ++ ++int test_popcountl(long x) ++{ ++ return x ? __builtin_popcountl(x) : 0; ++} ++ ++int test_popcountll(long long x) ++{ ++ return x ? __builtin_popcountll(x) : 0; ++} ++ ++/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */ ++ +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 045a7b1b8..21ac08145 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -66,9 +66,9 @@ static bool abs_replacement (basic_block, basic_block, + edge, edge, gphi *, tree, tree); + static bool spaceship_replacement (basic_block, basic_block, + edge, edge, gphi *, tree, tree); +-static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block, +- edge, edge, gphi *, +- tree, tree); ++static bool cond_removal_in_builtin_zero_pattern (basic_block, basic_block, ++ edge, edge, gphi *, ++ tree, tree); + static bool cond_store_replacement (basic_block, basic_block, edge, edge, + hash_set *); + static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); +@@ -355,9 +355,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p +- && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1, +- e2, phi, arg0, +- arg1)) ++ && cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2, ++ phi, arg0, arg1)) + cfgchanged = true; + else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +@@ -2204,7 +2203,8 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb, + return true; + } + +-/* Convert ++/* Optimize x ? __builtin_fun (x) : C, where C is __builtin_fun (0). ++ Convert + + + if (b_4(D) != 0) +@@ -2236,10 +2236,10 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb, + instead of 0 above it uses the value from that macro. */ + + static bool +-cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, +- basic_block middle_bb, +- edge e1, edge e2, gphi *phi, +- tree arg0, tree arg1) ++cond_removal_in_builtin_zero_pattern (basic_block cond_bb, ++ basic_block middle_bb, ++ edge e1, edge e2, gphi *phi, ++ tree arg0, tree arg1) + { + gimple *cond; + gimple_stmt_iterator gsi, gsi_from; +@@ -2287,6 +2287,12 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + int val = 0; + switch (cfn) + { ++ case CFN_BUILT_IN_BSWAP16: ++ case CFN_BUILT_IN_BSWAP32: ++ case CFN_BUILT_IN_BSWAP64: ++ case CFN_BUILT_IN_BSWAP128: ++ CASE_CFN_FFS: ++ CASE_CFN_PARITY: + CASE_CFN_POPCOUNT: + break; + CASE_CFN_CLZ: +@@ -2315,6 +2321,15 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb, + } + } + return false; ++ case BUILT_IN_CLRSB: ++ val = TYPE_PRECISION (integer_type_node) - 1; ++ break; ++ case BUILT_IN_CLRSBL: ++ val = TYPE_PRECISION (long_integer_type_node) - 1; ++ break; ++ case BUILT_IN_CLRSBLL: ++ val = TYPE_PRECISION (long_long_integer_type_node) - 1; ++ break; + default: + return false; + } +-- +2.25.1 + diff --git a/0066-Backport-tree-optimization-102880-make-PHI-OPT-recog.patch b/0066-Backport-tree-optimization-102880-make-PHI-OPT-recog.patch new file mode 100644 index 0000000000000000000000000000000000000000..0793aa1f67d8a50c340f75121826be016ceeee04 --- /dev/null +++ b/0066-Backport-tree-optimization-102880-make-PHI-OPT-recog.patch @@ -0,0 +1,251 @@ +From 77398954ce517aa011b7a254c7aa2858521b2093 Mon Sep 17 00:00:00 2001 +From: Richard Biener +Date: Mon, 15 Nov 2021 15:19:36 +0100 +Subject: [PATCH 18/29] [Backport] tree-optimization/102880 - make PHI-OPT + recognize more CFGs + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f98f373dd822b35c52356b753d528924e9f89678 + +This allows extra edges into the middle BB for the PHI-OPT +transforms using replace_phi_edge_with_variable that do not +end up moving stmts from that middle BB. This avoids regressing +gcc.dg/tree-ssa/ssa-hoist-4.c with the actual fix for PR102880 +where CFG cleanup has the choice to remove two forwarders and +picks "the wrong" leading to + + if (a > b) / + /\ / + / + / | + # PHI + +rather than + + if (a > b) | + /\ | + \ | + / \ | + # PHI + +but it's relatively straight-forward to support extra edges +into the middle-BB in paths ending in replace_phi_edge_with_variable +and that do not require moving stmts. That's because we really +only want to remove the edge from the condition to the middle BB. +Of course actually doing that means updating dominators in non-trival +ways which is why I kept the original code for the single edge +case and simply defer to CFG cleanup by adjusting the condition for +the complicated case. + +The testcase needs to be a GIMPLE one since it's quite unreliable +to produce the desired CFG. + +2021-11-15 Richard Biener + + PR tree-optimization/102880 + * tree-ssa-phiopt.c (tree_ssa_phiopt_worker): Push + single_pred (bb1) condition to places that really need it. + (match_simplify_replacement): Likewise. + (value_replacement): Likewise. + (replace_phi_edge_with_variable): Deal with extra edges + into the middle BB. + + * gcc.dg/tree-ssa/phi-opt-26.c: New testcase. +--- + gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c | 31 +++++++++ + gcc/tree-ssa-phiopt.c | 73 +++++++++++++--------- + 2 files changed, 75 insertions(+), 29 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c +new file mode 100644 +index 000000000..21aa66e38 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fgimple -fdump-tree-phiopt1" } */ ++ ++int __GIMPLE (ssa,startwith("phiopt")) ++foo (int a, int b, int flag) ++{ ++ int res; ++ ++ __BB(2): ++ if (flag_2(D) != 0) ++ goto __BB6; ++ else ++ goto __BB4; ++ ++ __BB(4): ++ if (a_3(D) > b_4(D)) ++ goto __BB7; ++ else ++ goto __BB6; ++ ++ __BB(6): ++ goto __BB7; ++ ++ __BB(7): ++ res_1 = __PHI (__BB4: a_3(D), __BB6: b_4(D)); ++ return res_1; ++} ++ ++/* We should be able to detect MAX despite the extra edge into ++ the middle BB. */ ++/* { dg-final { scan-tree-dump "MAX" "phiopt1" } } */ +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 21ac08145..079d29e74 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -219,7 +219,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + + /* If either bb1's succ or bb2 or bb2's succ is non NULL. */ + if (EDGE_COUNT (bb1->succs) == 0 +- || bb2 == NULL + || EDGE_COUNT (bb2->succs) == 0) + continue; + +@@ -279,14 +278,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + || (e1->flags & EDGE_FALLTHRU) == 0) + continue; + +- /* Also make sure that bb1 only have one predecessor and that it +- is bb. */ +- if (!single_pred_p (bb1) +- || single_pred (bb1) != bb) +- continue; +- + if (do_store_elim) + { ++ /* Also make sure that bb1 only have one predecessor and that it ++ is bb. */ ++ if (!single_pred_p (bb1) ++ || single_pred (bb1) != bb) ++ continue; ++ + /* bb1 is the middle block, bb2 the join block, bb the split block, + e1 the fallthrough edge from bb1 to bb2. We can't do the + optimization if the join block has more than two predecessors. */ +@@ -331,10 +330,11 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + node. */ + gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE); + +- gphi *newphi = factor_out_conditional_conversion (e1, e2, phi, +- arg0, arg1, +- cond_stmt); +- if (newphi != NULL) ++ gphi *newphi; ++ if (single_pred_p (bb1) ++ && (newphi = factor_out_conditional_conversion (e1, e2, phi, ++ arg0, arg1, ++ cond_stmt))) + { + phi = newphi; + /* factor_out_conditional_conversion may create a new PHI in +@@ -355,12 +355,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + else if (!early_p ++ && single_pred_p (bb1) + && cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2, + phi, arg0, arg1)) + cfgchanged = true; + else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; +- else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) ++ else if (single_pred_p (bb1) ++ && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; + } + } +@@ -391,35 +393,41 @@ replace_phi_edge_with_variable (basic_block cond_block, + edge e, gphi *phi, tree new_tree) + { + basic_block bb = gimple_bb (phi); +- basic_block block_to_remove; + gimple_stmt_iterator gsi; + + /* Change the PHI argument to new. */ + SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree); + + /* Remove the empty basic block. */ ++ edge edge_to_remove; + if (EDGE_SUCC (cond_block, 0)->dest == bb) ++ edge_to_remove = EDGE_SUCC (cond_block, 1); ++ else ++ edge_to_remove = EDGE_SUCC (cond_block, 0); ++ if (EDGE_COUNT (edge_to_remove->dest->preds) == 1) + { +- EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU; +- EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); +- EDGE_SUCC (cond_block, 0)->probability = profile_probability::always (); +- +- block_to_remove = EDGE_SUCC (cond_block, 1)->dest; ++ e->flags |= EDGE_FALLTHRU; ++ e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); ++ e->probability = profile_probability::always (); ++ delete_basic_block (edge_to_remove->dest); ++ ++ /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ ++ gsi = gsi_last_bb (cond_block); ++ gsi_remove (&gsi, true); + } + else + { +- EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU; +- EDGE_SUCC (cond_block, 1)->flags +- &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); +- EDGE_SUCC (cond_block, 1)->probability = profile_probability::always (); +- +- block_to_remove = EDGE_SUCC (cond_block, 0)->dest; ++ /* If there are other edges into the middle block make ++ CFG cleanup deal with the edge removal to avoid ++ updating dominators here in a non-trivial way. */ ++ gcond *cond = as_a (last_stmt (cond_block)); ++ if (edge_to_remove->flags & EDGE_TRUE_VALUE) ++ gimple_cond_make_false (cond); ++ else ++ gimple_cond_make_true (cond); + } +- delete_basic_block (block_to_remove); + +- /* Eliminate the COND_EXPR at the end of COND_BLOCK. */ +- gsi = gsi_last_bb (cond_block); +- gsi_remove (&gsi, true); ++ statistics_counter_event (cfun, "Replace PHI with variable", 1); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, +@@ -846,6 +854,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, + allow it and move it once the transformation is done. */ + if (!empty_block_p (middle_bb)) + { ++ if (!single_pred_p (middle_bb)) ++ return false; ++ + stmt_to_move = last_and_only_stmt (middle_bb); + if (!stmt_to_move) + return false; +@@ -1225,6 +1236,11 @@ value_replacement (basic_block cond_bb, basic_block middle_bb, + } + else + { ++ if (!single_pred_p (middle_bb)) ++ return 0; ++ statistics_counter_event (cfun, "Replace PHI with " ++ "variable/value_replacement", 1); ++ + /* Replace the PHI arguments with arg. */ + SET_PHI_ARG_DEF (phi, e0->dest_idx, arg); + SET_PHI_ARG_DEF (phi, e1->dest_idx, arg); +@@ -1239,7 +1255,6 @@ value_replacement (basic_block cond_bb, basic_block middle_bb, + } + return 1; + } +- + } + + /* Now optimize (x != 0) ? x + y : y to just x + y. */ +-- +2.25.1 + diff --git a/0067-Backport-tree-Add-vector_element_bits-_tree-PR94980-.patch b/0067-Backport-tree-Add-vector_element_bits-_tree-PR94980-.patch new file mode 100644 index 0000000000000000000000000000000000000000..e6f38b87df5720435e1c220341a4e80b76d14754 --- /dev/null +++ b/0067-Backport-tree-Add-vector_element_bits-_tree-PR94980-.patch @@ -0,0 +1,250 @@ +From a2f5e6f38fe7b5b32a252643b00dd2d7ab0e3fac Mon Sep 17 00:00:00 2001 +From: Richard Sandiford +Date: Tue, 12 May 2020 09:01:10 +0100 +Subject: [PATCH 19/29] [Backport] tree: Add vector_element_bits(_tree) + [PR94980 1/3] + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=d17a896da1e898928d337596d029f0ece0039d55 + +A lot of code that wants to know the number of bits in a vector +element gets that information from the element's TYPE_SIZE, +which is always equal to TYPE_SIZE_UNIT * BITS_PER_UNIT. +This doesn't work for SVE and AVX512-style packed boolean vectors, +where several elements can occupy a single byte. + +This patch introduces a new pair of helpers for getting the true +(possibly sub-byte) size. I made a token attempt to convert obvious +element size calculations, but I'm sure I missed some. + +2020-05-12 Richard Sandiford + +gcc/ + PR tree-optimization/94980 + * tree.h (vector_element_bits, vector_element_bits_tree): Declare. + * tree.c (vector_element_bits, vector_element_bits_tree): New. + * match.pd: Use the new functions instead of determining the + vector element size directly from TYPE_SIZE(_UNIT). + * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise. + * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Likewise. + * tree-vect-stmts.c (vect_is_simple_cond): Likewise. + * tree-vect-generic.c (expand_vector_piecewise): Likewise. + (expand_vector_conversion): Likewise. + (expand_vector_addition): Likewise for a TYPE_SIZE_UNIT used as + a divisor. Convert the dividend to bits to compensate. + * tree-vect-loop.c (vectorizable_live_operation): Call + vector_element_bits instead of open-coding it. +--- + gcc/ChangeLog | 17 +++++++++++++++++ + gcc/match.pd | 2 +- + gcc/tree-vect-data-refs.c | 2 +- + gcc/tree-vect-generic.c | 19 +++++++------------ + gcc/tree-vect-loop.c | 4 +--- + gcc/tree-vect-patterns.c | 3 +-- + gcc/tree-vect-stmts.c | 3 +-- + gcc/tree.c | 24 ++++++++++++++++++++++++ + gcc/tree.h | 2 ++ + 9 files changed, 55 insertions(+), 21 deletions(-) + +diff --git a/gcc/ChangeLog b/gcc/ChangeLog +index 3b1384e70..07aea9b86 100644 +--- a/gcc/ChangeLog ++++ b/gcc/ChangeLog +@@ -1,3 +1,20 @@ ++2020-05-12 Richard Sandiford ++ ++ PR tree-optimization/94980 ++ * tree.h (vector_element_bits, vector_element_bits_tree): Declare. ++ * tree.c (vector_element_bits, vector_element_bits_tree): New. ++ * match.pd: Use the new functions instead of determining the ++ vector element size directly from TYPE_SIZE(_UNIT). ++ * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise. ++ * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Likewise. ++ * tree-vect-stmts.c (vect_is_simple_cond): Likewise. ++ * tree-vect-generic.c (expand_vector_piecewise): Likewise. ++ (expand_vector_conversion): Likewise. ++ (expand_vector_addition): Likewise for a TYPE_SIZE_UNIT used as ++ a divisor. Convert the dividend to bits to compensate. ++ * tree-vect-loop.c (vectorizable_live_operation): Call ++ vector_element_bits instead of open-coding it. ++ + 2021-04-08 Release Manager + + * GCC 10.3.0 released. +diff --git a/gcc/match.pd b/gcc/match.pd +index 5899eea95..79a0228d2 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -6236,7 +6236,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + } + (if (ins) + (bit_insert { op0; } { ins; } +- { bitsize_int (at * tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)))); }) ++ { bitsize_int (at * vector_element_bits (type)); }) + (if (changed) + (vec_perm { op0; } { op1; } { op2; })))))))))) + +diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c +index d78b06455..e4466a4f3 100644 +--- a/gcc/tree-vect-data-refs.c ++++ b/gcc/tree-vect-data-refs.c +@@ -3709,7 +3709,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, + tree *offset_vectype_out) + { + unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type)); +- unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))); ++ unsigned int element_bits = vector_element_bits (vectype); + if (element_bits != memory_bits) + /* For now the vector elements must be the same width as the + memory elements. */ +diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c +index c10492034..37c3956a4 100644 +--- a/gcc/tree-vect-generic.c ++++ b/gcc/tree-vect-generic.c +@@ -276,8 +276,7 @@ expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, + tree part_width = TYPE_SIZE (inner_type); + tree index = bitsize_int (0); + int nunits = nunits_for_known_piecewise_op (type); +- int delta = tree_to_uhwi (part_width) +- / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type))); ++ int delta = tree_to_uhwi (part_width) / vector_element_bits (type); + int i; + location_t loc = gimple_location (gsi_stmt (*gsi)); + +@@ -357,8 +356,7 @@ expand_vector_addition (gimple_stmt_iterator *gsi, + elem_op_func f, elem_op_func f_parallel, + tree type, tree a, tree b, enum tree_code code) + { +- int parts_per_word = UNITS_PER_WORD +- / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); ++ int parts_per_word = BITS_PER_WORD / vector_element_bits (type); + + if (INTEGRAL_TYPE_P (TREE_TYPE (type)) + && parts_per_word >= 4 +@@ -1733,19 +1731,17 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) + optab optab1 = unknown_optab; + + gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type)); +- gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (ret_type)))); +- gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (arg_type)))); + if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type)) + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type))) + code = FIX_TRUNC_EXPR; + else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type)) + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type))) + code = FLOAT_EXPR; +- if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type))) +- < tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type)))) ++ unsigned int ret_elt_bits = vector_element_bits (ret_type); ++ unsigned int arg_elt_bits = vector_element_bits (arg_type); ++ if (ret_elt_bits < arg_elt_bits) + modifier = NARROW; +- else if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type))) +- > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type)))) ++ else if (ret_elt_bits > arg_elt_bits) + modifier = WIDEN; + + if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)) +@@ -1908,8 +1904,7 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) + tree part_width = TYPE_SIZE (compute_type); + tree index = bitsize_int (0); + int nunits = nunits_for_known_piecewise_op (arg_type); +- int delta = tree_to_uhwi (part_width) +- / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))); ++ int delta = tree_to_uhwi (part_width) / arg_elt_bits; + int i; + location_t loc = gimple_location (gsi_stmt (*gsi)); + +diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c +index 899b56087..7990e31de 100644 +--- a/gcc/tree-vect-loop.c ++++ b/gcc/tree-vect-loop.c +@@ -8059,9 +8059,7 @@ vectorizable_live_operation (stmt_vec_info stmt_info, + : gimple_get_lhs (stmt); + lhs_type = TREE_TYPE (lhs); + +- bitsize = (VECTOR_BOOLEAN_TYPE_P (vectype) +- ? bitsize_int (TYPE_PRECISION (TREE_TYPE (vectype))) +- : TYPE_SIZE (TREE_TYPE (vectype))); ++ bitsize = vector_element_bits_tree (vectype); + vec_bitsize = TYPE_SIZE (vectype); + + /* Get the vectorized lhs of STMT and the lane to use (counted in bits). */ +diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c +index 84d7ddb17..b076740ef 100644 +--- a/gcc/tree-vect-patterns.c ++++ b/gcc/tree-vect-patterns.c +@@ -4406,8 +4406,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out) + || dt == vect_constant_def)) + { + tree wide_scalar_type = build_nonstandard_integer_type +- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))), +- TYPE_UNSIGNED (rhs1_type)); ++ (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type)); + tree vectype3 = get_vectype_for_scalar_type (vinfo, + wide_scalar_type); + if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1))) +diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +index 4636b7ba2..0bdf9a547 100644 +--- a/gcc/tree-vect-stmts.c ++++ b/gcc/tree-vect-stmts.c +@@ -10717,8 +10717,7 @@ vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node, + && tree_int_cst_lt (TYPE_SIZE (scalar_type), + TYPE_SIZE (TREE_TYPE (vectype)))) + scalar_type = build_nonstandard_integer_type +- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))), +- TYPE_UNSIGNED (scalar_type)); ++ (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type)); + *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type, + slp_node); + } +diff --git a/gcc/tree.c b/gcc/tree.c +index 3e6647ae0..9a0cedf10 100644 +--- a/gcc/tree.c ++++ b/gcc/tree.c +@@ -13892,6 +13892,30 @@ vector_type_mode (const_tree t) + return mode; + } + ++/* Return the size in bits of each element of vector type TYPE. */ ++ ++unsigned int ++vector_element_bits (const_tree type) ++{ ++ gcc_checking_assert (VECTOR_TYPE_P (type)); ++ if (VECTOR_BOOLEAN_TYPE_P (type)) ++ return vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (type)), ++ TYPE_VECTOR_SUBPARTS (type)); ++ return tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type))); ++} ++ ++/* Calculate the size in bits of each element of vector type TYPE ++ and return the result as a tree of type bitsizetype. */ ++ ++tree ++vector_element_bits_tree (const_tree type) ++{ ++ gcc_checking_assert (VECTOR_TYPE_P (type)); ++ if (VECTOR_BOOLEAN_TYPE_P (type)) ++ return bitsize_int (vector_element_bits (type)); ++ return TYPE_SIZE (TREE_TYPE (type)); ++} ++ + /* Verify that basic properties of T match TV and thus T can be a variant of + TV. TV should be the more specified variant (i.e. the main variant). */ + +diff --git a/gcc/tree.h b/gcc/tree.h +index bddc6e528..c66207fa0 100644 +--- a/gcc/tree.h ++++ b/gcc/tree.h +@@ -1996,6 +1996,8 @@ class auto_suppress_location_wrappers + + extern machine_mode element_mode (const_tree); + extern machine_mode vector_type_mode (const_tree); ++extern unsigned int vector_element_bits (const_tree); ++extern tree vector_element_bits_tree (const_tree); + + /* The "canonical" type for this type node, which is used by frontends to + compare the type for equality with another type. If two types are +-- +2.25.1 + diff --git a/0068-Backport-Lower-VEC_COND_EXPR-into-internal-functions.patch b/0068-Backport-Lower-VEC_COND_EXPR-into-internal-functions.patch new file mode 100644 index 0000000000000000000000000000000000000000..3063956118642ff8cf635368893dc216d3eeb116 --- /dev/null +++ b/0068-Backport-Lower-VEC_COND_EXPR-into-internal-functions.patch @@ -0,0 +1,1063 @@ +From 3a45b2fc131e4639b05f62d6064bd964d129c19b Mon Sep 17 00:00:00 2001 +From: Martin Liska +Date: Mon, 9 Mar 2020 13:23:03 +0100 +Subject: [PATCH 20/29] [Backport] Lower VEC_COND_EXPR into internal functions. + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=502d63b6d6141597bb18fd23c87736a1b384cf8f + +gcc/ChangeLog: + + * Makefile.in: Add new file. + * expr.c (expand_expr_real_2): Add gcc_unreachable as we should + not meet this condition. + (do_store_flag): Likewise. + * gimplify.c (gimplify_expr): Gimplify first argument of + VEC_COND_EXPR to be a SSA name. + * internal-fn.c (vec_cond_mask_direct): New. + (vec_cond_direct): Likewise. + (vec_condu_direct): Likewise. + (vec_condeq_direct): Likewise. + (expand_vect_cond_optab_fn): New. + (expand_vec_cond_optab_fn): Likewise. + (expand_vec_condu_optab_fn): Likewise. + (expand_vec_condeq_optab_fn): Likewise. + (expand_vect_cond_mask_optab_fn): Likewise. + (expand_vec_cond_mask_optab_fn): Likewise. + (direct_vec_cond_mask_optab_supported_p): Likewise. + (direct_vec_cond_optab_supported_p): Likewise. + (direct_vec_condu_optab_supported_p): Likewise. + (direct_vec_condeq_optab_supported_p): Likewise. + * internal-fn.def (VCOND): New OPTAB. + (VCONDU): Likewise. + (VCONDEQ): Likewise. + (VCOND_MASK): Likewise. + * optabs.c (get_rtx_code): Make it global. + (expand_vec_cond_mask_expr): Removed. + (expand_vec_cond_expr): Removed. + * optabs.h (expand_vec_cond_expr): Likewise. + (vector_compare_rtx): Make it global. + * passes.def: Add new pass_gimple_isel pass. + * tree-cfg.c (verify_gimple_assign_ternary): Add check + for VEC_COND_EXPR about first argument. + * tree-pass.h (make_pass_gimple_isel): New. + * tree-ssa-forwprop.c (pass_forwprop::execute): Prevent + propagation of the first argument of a VEC_COND_EXPR. + * tree-ssa-reassoc.c (ovce_extract_ops): Support SSA_NAME as + first argument of a VEC_COND_EXPR. + (optimize_vec_cond_expr): Likewise. + * tree-vect-generic.c (expand_vector_divmod): Make SSA_NAME + for a first argument of created VEC_COND_EXPR. + (expand_vector_condition): Fix coding style. + * tree-vect-stmts.c (vectorizable_condition): Gimplify + first argument. + * gimple-isel.cc: New file. + +gcc/testsuite/ChangeLog: + + * g++.dg/vect/vec-cond-expr-eh.C: New test. +--- + gcc/Makefile.in | 2 + + gcc/expr.c | 25 +- + gcc/gimple-isel.cc | 244 +++++++++++++++++++ + gcc/gimplify.c | 15 +- + gcc/internal-fn.c | 89 +++++++ + gcc/internal-fn.def | 5 + + gcc/optabs.c | 124 +--------- + gcc/optabs.h | 7 +- + gcc/passes.def | 1 + + gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C | 17 ++ + gcc/tree-cfg.c | 2 +- + gcc/tree-pass.h | 1 + + gcc/tree-ssa-forwprop.c | 3 +- + gcc/tree-ssa-reassoc.c | 64 +++-- + gcc/tree-vect-generic.c | 45 ++-- + gcc/tree-vect-stmts.c | 8 +- + 16 files changed, 441 insertions(+), 211 deletions(-) + create mode 100644 gcc/gimple-isel.cc + create mode 100644 gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 2b2bf474a..3f06b8907 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1623,6 +1623,7 @@ OBJS = \ + tree-streamer-out.o \ + tree-tailcall.o \ + tree-vect-generic.o \ ++ gimple-isel.o \ + tree-vect-patterns.o \ + tree-vect-data-refs.o \ + tree-vect-stmts.o \ +@@ -2591,6 +2592,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ + $(srcdir)/dwarf2cfi.c \ + $(srcdir)/dwarf2out.c \ + $(srcdir)/tree-vect-generic.c \ ++ $(srcdir)/gimple-isel.cc \ + $(srcdir)/dojump.c $(srcdir)/emit-rtl.h \ + $(srcdir)/emit-rtl.c $(srcdir)/except.h $(srcdir)/explow.c $(srcdir)/expr.c \ + $(srcdir)/expr.h \ +diff --git a/gcc/expr.c b/gcc/expr.c +index d66fdd4e9..c468b5eb9 100644 +--- a/gcc/expr.c ++++ b/gcc/expr.c +@@ -9286,17 +9286,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, + if (temp != 0) + return temp; + +- /* For vector MIN , expand it a VEC_COND_EXPR +- and similarly for MAX . */ + if (VECTOR_TYPE_P (type)) +- { +- tree t0 = make_tree (type, op0); +- tree t1 = make_tree (type, op1); +- tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR, +- type, t0, t1); +- return expand_vec_cond_expr (type, comparison, t0, t1, +- original_target); +- } ++ gcc_unreachable (); + + /* At this point, a MEM target is no longer useful; we will get better + code without it. */ +@@ -9885,10 +9876,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, + return temp; + } + +- case VEC_COND_EXPR: +- target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target); +- return target; +- + case VEC_DUPLICATE_EXPR: + op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier); + target = expand_vector_broadcast (mode, op0); +@@ -12222,8 +12209,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode) + STRIP_NOPS (arg1); + + /* For vector typed comparisons emit code to generate the desired +- all-ones or all-zeros mask. Conveniently use the VEC_COND_EXPR +- expander for this. */ ++ all-ones or all-zeros mask. */ + if (TREE_CODE (ops->type) == VECTOR_TYPE) + { + tree ifexp = build2 (ops->code, ops->type, arg0, arg1); +@@ -12231,12 +12217,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode) + && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code)) + return expand_vec_cmp_expr (ops->type, ifexp, target); + else +- { +- tree if_true = constant_boolean_node (true, ops->type); +- tree if_false = constant_boolean_node (false, ops->type); +- return expand_vec_cond_expr (ops->type, ifexp, if_true, +- if_false, target); +- } ++ gcc_unreachable (); + } + + /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial +diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc +new file mode 100644 +index 000000000..97f920805 +--- /dev/null ++++ b/gcc/gimple-isel.cc +@@ -0,0 +1,244 @@ ++/* Schedule GIMPLE vector statements. ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "rtl.h" ++#include "tree.h" ++#include "gimple.h" ++#include "tree-pass.h" ++#include "ssa.h" ++#include "expmed.h" ++#include "optabs-tree.h" ++#include "tree-eh.h" ++#include "gimple-iterator.h" ++#include "gimplify-me.h" ++#include "gimplify.h" ++#include "tree-cfg.h" ++ ++/* Expand all VEC_COND_EXPR gimple assignments into calls to internal ++ function based on type of selected expansion. */ ++ ++static gimple * ++gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, ++ hash_map *vec_cond_ssa_name_uses) ++{ ++ tree lhs, op0a = NULL_TREE, op0b = NULL_TREE; ++ enum tree_code code; ++ enum tree_code tcode; ++ machine_mode cmp_op_mode; ++ bool unsignedp; ++ enum insn_code icode; ++ imm_use_iterator imm_iter; ++ ++ /* Only consider code == GIMPLE_ASSIGN. */ ++ gassign *stmt = dyn_cast (gsi_stmt (*gsi)); ++ if (!stmt) ++ return NULL; ++ ++ code = gimple_assign_rhs_code (stmt); ++ if (code != VEC_COND_EXPR) ++ return NULL; ++ ++ tree op0 = gimple_assign_rhs1 (stmt); ++ tree op1 = gimple_assign_rhs2 (stmt); ++ tree op2 = gimple_assign_rhs3 (stmt); ++ lhs = gimple_assign_lhs (stmt); ++ machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); ++ ++ gcc_assert (!COMPARISON_CLASS_P (op0)); ++ if (TREE_CODE (op0) == SSA_NAME) ++ { ++ unsigned int used_vec_cond_exprs = 0; ++ unsigned int *slot = vec_cond_ssa_name_uses->get (op0); ++ if (slot) ++ used_vec_cond_exprs = *slot; ++ else ++ { ++ gimple *use_stmt; ++ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0) ++ { ++ gassign *assign = dyn_cast (use_stmt); ++ if (assign != NULL ++ && gimple_assign_rhs_code (assign) == VEC_COND_EXPR ++ && gimple_assign_rhs1 (assign) == op0) ++ used_vec_cond_exprs++; ++ } ++ vec_cond_ssa_name_uses->put (op0, used_vec_cond_exprs); ++ } ++ ++ gassign *def_stmt = dyn_cast (SSA_NAME_DEF_STMT (op0)); ++ if (def_stmt) ++ { ++ tcode = gimple_assign_rhs_code (def_stmt); ++ op0a = gimple_assign_rhs1 (def_stmt); ++ op0b = gimple_assign_rhs2 (def_stmt); ++ ++ tree op0a_type = TREE_TYPE (op0a); ++ if (used_vec_cond_exprs >= 2 ++ && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type)) ++ != CODE_FOR_nothing) ++ && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode)) ++ { ++ /* Keep the SSA name and use vcond_mask. */ ++ tcode = TREE_CODE (op0); ++ } ++ } ++ else ++ tcode = TREE_CODE (op0); ++ } ++ else ++ tcode = TREE_CODE (op0); ++ ++ if (TREE_CODE_CLASS (tcode) != tcc_comparison) ++ { ++ gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0))); ++ if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0))) ++ != CODE_FOR_nothing) ++ return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2); ++ /* Fake op0 < 0. */ ++ else ++ { ++ gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0))) ++ == MODE_VECTOR_INT); ++ op0a = op0; ++ op0b = build_zero_cst (TREE_TYPE (op0)); ++ tcode = LT_EXPR; ++ } ++ } ++ cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a)); ++ unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a)); ++ ++ ++ gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode)) ++ && known_eq (GET_MODE_NUNITS (mode), ++ GET_MODE_NUNITS (cmp_op_mode))); ++ ++ icode = get_vcond_icode (mode, cmp_op_mode, unsignedp); ++ if (icode == CODE_FOR_nothing) ++ { ++ if (tcode == LT_EXPR ++ && op0a == op0 ++ && TREE_CODE (op0) == VECTOR_CST) ++ { ++ /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR ++ into a constant when only get_vcond_eq_icode is supported. ++ Verify < 0 and != 0 behave the same and change it to NE_EXPR. */ ++ unsigned HOST_WIDE_INT nelts; ++ if (!VECTOR_CST_NELTS (op0).is_constant (&nelts)) ++ { ++ if (VECTOR_CST_STEPPED_P (op0)) ++ gcc_unreachable (); ++ nelts = vector_cst_encoded_nelts (op0); ++ } ++ for (unsigned int i = 0; i < nelts; ++i) ++ if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1) ++ gcc_unreachable (); ++ tcode = NE_EXPR; ++ } ++ if (tcode == EQ_EXPR || tcode == NE_EXPR) ++ { ++ tree tcode_tree = build_int_cst (integer_type_node, tcode); ++ return gimple_build_call_internal (IFN_VCONDEQ, 5, op0a, op0b, op1, ++ op2, tcode_tree); ++ } ++ } ++ ++ gcc_assert (icode != CODE_FOR_nothing); ++ tree tcode_tree = build_int_cst (integer_type_node, tcode); ++ return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND, ++ 5, op0a, op0b, op1, op2, tcode_tree); ++} ++ ++ ++ ++/* Iterate all gimple statements and try to expand ++ VEC_COND_EXPR assignments. */ ++ ++static unsigned int ++gimple_expand_vec_cond_exprs (void) ++{ ++ gimple_stmt_iterator gsi; ++ basic_block bb; ++ bool cfg_changed = false; ++ hash_map vec_cond_ssa_name_uses; ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple *g = gimple_expand_vec_cond_expr (&gsi, ++ &vec_cond_ssa_name_uses); ++ if (g != NULL) ++ { ++ tree lhs = gimple_assign_lhs (gsi_stmt (gsi)); ++ gimple_set_lhs (g, lhs); ++ gsi_replace (&gsi, g, false); ++ } ++ } ++ } ++ ++ return cfg_changed ? TODO_cleanup_cfg : 0; ++} ++ ++namespace { ++ ++const pass_data pass_data_gimple_isel = ++{ ++ GIMPLE_PASS, /* type */ ++ "isel", /* name */ ++ OPTGROUP_VEC, /* optinfo_flags */ ++ TV_NONE, /* tv_id */ ++ PROP_cfg, /* properties_required */ ++ 0, /* properties_provided */ ++ 0, /* properties_destroyed */ ++ 0, /* todo_flags_start */ ++ TODO_update_ssa, /* todo_flags_finish */ ++}; ++ ++class pass_gimple_isel : public gimple_opt_pass ++{ ++public: ++ pass_gimple_isel (gcc::context *ctxt) ++ : gimple_opt_pass (pass_data_gimple_isel, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *) ++ { ++ return true; ++ } ++ ++ virtual unsigned int execute (function *) ++ { ++ return gimple_expand_vec_cond_exprs (); ++ } ++ ++}; // class pass_gimple_isel ++ ++} // anon namespace ++ ++gimple_opt_pass * ++make_pass_gimple_isel (gcc::context *ctxt) ++{ ++ return new pass_gimple_isel (ctxt); ++} ++ +diff --git a/gcc/gimplify.c b/gcc/gimplify.c +index 89a4ae087..16b2f4328 100644 +--- a/gcc/gimplify.c ++++ b/gcc/gimplify.c +@@ -14272,20 +14272,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, + } + + case VEC_COND_EXPR: +- { +- enum gimplify_status r0, r1, r2; +- +- r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p, +- post_p, is_gimple_condexpr, fb_rvalue); +- r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p, +- post_p, is_gimple_val, fb_rvalue); +- r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p, +- post_p, is_gimple_val, fb_rvalue); +- +- ret = MIN (MIN (r0, r1), r2); +- recalculate_side_effects (*expr_p); +- } +- break; ++ goto expr_3; + + case VEC_PERM_EXPR: + /* Classified as tcc_expression. */ +diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c +index 5e9aa6072..644f234e0 100644 +--- a/gcc/internal-fn.c ++++ b/gcc/internal-fn.c +@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see + #include "gimple-ssa.h" + #include "tree-phinodes.h" + #include "ssa-iterators.h" ++#include "explow.h" + + /* The names of each internal function, indexed by function number. */ + const char *const internal_fn_name_array[] = { +@@ -107,6 +108,10 @@ init_internal_fns () + #define mask_store_direct { 3, 2, false } + #define store_lanes_direct { 0, 0, false } + #define mask_store_lanes_direct { 0, 0, false } ++#define vec_cond_mask_direct { 0, 0, false } ++#define vec_cond_direct { 0, 0, false } ++#define vec_condu_direct { 0, 0, false } ++#define vec_condeq_direct { 0, 0, false } + #define scatter_store_direct { 3, 1, false } + #define unary_direct { 0, 0, true } + #define binary_direct { 0, 0, true } +@@ -2548,6 +2553,86 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab) + + #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn + ++/* Expand VCOND, VCONDU and VCONDEQ optab internal functions. ++ The expansion of STMT happens based on OPTAB table associated. */ ++ ++static void ++expand_vect_cond_optab_fn (internal_fn, gcall *stmt, convert_optab optab) ++{ ++ class expand_operand ops[6]; ++ insn_code icode; ++ tree lhs = gimple_call_lhs (stmt); ++ tree op0a = gimple_call_arg (stmt, 0); ++ tree op0b = gimple_call_arg (stmt, 1); ++ tree op1 = gimple_call_arg (stmt, 2); ++ tree op2 = gimple_call_arg (stmt, 3); ++ enum tree_code tcode = (tree_code) int_cst_value (gimple_call_arg (stmt, 4)); ++ ++ tree vec_cond_type = TREE_TYPE (lhs); ++ tree op_mode = TREE_TYPE (op0a); ++ bool unsignedp = TYPE_UNSIGNED (op_mode); ++ ++ machine_mode mode = TYPE_MODE (vec_cond_type); ++ machine_mode cmp_op_mode = TYPE_MODE (op_mode); ++ ++ icode = convert_optab_handler (optab, mode, cmp_op_mode); ++ rtx comparison ++ = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, icode, 4); ++ rtx rtx_op1 = expand_normal (op1); ++ rtx rtx_op2 = expand_normal (op2); ++ ++ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); ++ create_output_operand (&ops[0], target, mode); ++ create_input_operand (&ops[1], rtx_op1, mode); ++ create_input_operand (&ops[2], rtx_op2, mode); ++ create_fixed_operand (&ops[3], comparison); ++ create_fixed_operand (&ops[4], XEXP (comparison, 0)); ++ create_fixed_operand (&ops[5], XEXP (comparison, 1)); ++ expand_insn (icode, 6, ops); ++} ++ ++#define expand_vec_cond_optab_fn expand_vect_cond_optab_fn ++#define expand_vec_condu_optab_fn expand_vect_cond_optab_fn ++#define expand_vec_condeq_optab_fn expand_vect_cond_optab_fn ++ ++/* Expand VCOND_MASK optab internal function. ++ The expansion of STMT happens based on OPTAB table associated. */ ++ ++static void ++expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab) ++{ ++ class expand_operand ops[4]; ++ ++ tree lhs = gimple_call_lhs (stmt); ++ tree op0 = gimple_call_arg (stmt, 0); ++ tree op1 = gimple_call_arg (stmt, 1); ++ tree op2 = gimple_call_arg (stmt, 2); ++ tree vec_cond_type = TREE_TYPE (lhs); ++ ++ machine_mode mode = TYPE_MODE (vec_cond_type); ++ machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0)); ++ enum insn_code icode = convert_optab_handler (optab, mode, mask_mode); ++ rtx mask, rtx_op1, rtx_op2; ++ ++ gcc_assert (icode != CODE_FOR_nothing); ++ ++ mask = expand_normal (op0); ++ rtx_op1 = expand_normal (op1); ++ rtx_op2 = expand_normal (op2); ++ ++ mask = force_reg (mask_mode, mask); ++ rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1); ++ ++ rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); ++ create_output_operand (&ops[0], target, mode); ++ create_input_operand (&ops[1], rtx_op1, mode); ++ create_input_operand (&ops[2], rtx_op2, mode); ++ create_input_operand (&ops[3], mask, mask_mode); ++ expand_insn (icode, 4, ops); ++} ++ ++#define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn ++ + static void + expand_ABNORMAL_DISPATCHER (internal_fn, gcall *) + { +@@ -3131,6 +3216,10 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, + #define direct_mask_store_optab_supported_p direct_optab_supported_p + #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p + #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p ++#define direct_vec_cond_mask_optab_supported_p multi_vector_optab_supported_p ++#define direct_vec_cond_optab_supported_p multi_vector_optab_supported_p ++#define direct_vec_condu_optab_supported_p multi_vector_optab_supported_p ++#define direct_vec_condeq_optab_supported_p multi_vector_optab_supported_p + #define direct_scatter_store_optab_supported_p convert_optab_supported_p + #define direct_while_optab_supported_p convert_optab_supported_p + #define direct_fold_extract_optab_supported_p direct_optab_supported_p +diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def +index 1d190d492..0c6fc3711 100644 +--- a/gcc/internal-fn.def ++++ b/gcc/internal-fn.def +@@ -136,6 +136,11 @@ DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes) + DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0, + vec_mask_store_lanes, mask_store_lanes) + ++DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond) ++DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu) ++DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq) ++DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask) ++ + DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) + DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW, + check_raw_ptrs, check_ptrs) +diff --git a/gcc/optabs.c b/gcc/optabs.c +index c3751fdf7..64a1a1768 100644 +--- a/gcc/optabs.c ++++ b/gcc/optabs.c +@@ -5454,7 +5454,7 @@ get_rtx_code (enum tree_code tcode, bool unsignedp) + first comparison operand for insn ICODE. Do not generate the + compare instruction itself. */ + +-static rtx ++rtx + vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode, + tree t_op0, tree t_op1, bool unsignedp, + enum insn_code icode, unsigned int opno) +@@ -5821,128 +5821,6 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) + return tmp; + } + +-/* Generate insns for a VEC_COND_EXPR with mask, given its TYPE and its +- three operands. */ +- +-rtx +-expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2, +- rtx target) +-{ +- class expand_operand ops[4]; +- machine_mode mode = TYPE_MODE (vec_cond_type); +- machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0)); +- enum insn_code icode = get_vcond_mask_icode (mode, mask_mode); +- rtx mask, rtx_op1, rtx_op2; +- +- if (icode == CODE_FOR_nothing) +- return 0; +- +- mask = expand_normal (op0); +- rtx_op1 = expand_normal (op1); +- rtx_op2 = expand_normal (op2); +- +- mask = force_reg (mask_mode, mask); +- rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1); +- +- create_output_operand (&ops[0], target, mode); +- create_input_operand (&ops[1], rtx_op1, mode); +- create_input_operand (&ops[2], rtx_op2, mode); +- create_input_operand (&ops[3], mask, mask_mode); +- expand_insn (icode, 4, ops); +- +- return ops[0].value; +-} +- +-/* Generate insns for a VEC_COND_EXPR, given its TYPE and its +- three operands. */ +- +-rtx +-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2, +- rtx target) +-{ +- class expand_operand ops[6]; +- enum insn_code icode; +- rtx comparison, rtx_op1, rtx_op2; +- machine_mode mode = TYPE_MODE (vec_cond_type); +- machine_mode cmp_op_mode; +- bool unsignedp; +- tree op0a, op0b; +- enum tree_code tcode; +- +- if (COMPARISON_CLASS_P (op0)) +- { +- op0a = TREE_OPERAND (op0, 0); +- op0b = TREE_OPERAND (op0, 1); +- tcode = TREE_CODE (op0); +- } +- else +- { +- gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0))); +- if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0))) +- != CODE_FOR_nothing) +- return expand_vec_cond_mask_expr (vec_cond_type, op0, op1, +- op2, target); +- /* Fake op0 < 0. */ +- else +- { +- gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0))) +- == MODE_VECTOR_INT); +- op0a = op0; +- op0b = build_zero_cst (TREE_TYPE (op0)); +- tcode = LT_EXPR; +- } +- } +- cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a)); +- unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a)); +- +- +- gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode)) +- && known_eq (GET_MODE_NUNITS (mode), +- GET_MODE_NUNITS (cmp_op_mode))); +- +- icode = get_vcond_icode (mode, cmp_op_mode, unsignedp); +- if (icode == CODE_FOR_nothing) +- { +- if (tcode == LT_EXPR +- && op0a == op0 +- && TREE_CODE (op0) == VECTOR_CST) +- { +- /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR +- into a constant when only get_vcond_eq_icode is supported. +- Verify < 0 and != 0 behave the same and change it to NE_EXPR. */ +- unsigned HOST_WIDE_INT nelts; +- if (!VECTOR_CST_NELTS (op0).is_constant (&nelts)) +- { +- if (VECTOR_CST_STEPPED_P (op0)) +- return 0; +- nelts = vector_cst_encoded_nelts (op0); +- } +- for (unsigned int i = 0; i < nelts; ++i) +- if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1) +- return 0; +- tcode = NE_EXPR; +- } +- if (tcode == EQ_EXPR || tcode == NE_EXPR) +- icode = get_vcond_eq_icode (mode, cmp_op_mode); +- if (icode == CODE_FOR_nothing) +- return 0; +- } +- +- comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, +- icode, 4); +- rtx_op1 = expand_normal (op1); +- rtx_op2 = expand_normal (op2); +- +- create_output_operand (&ops[0], target, mode); +- create_input_operand (&ops[1], rtx_op1, mode); +- create_input_operand (&ops[2], rtx_op2, mode); +- create_fixed_operand (&ops[3], comparison); +- create_fixed_operand (&ops[4], XEXP (comparison, 0)); +- create_fixed_operand (&ops[5], XEXP (comparison, 1)); +- expand_insn (icode, 6, ops); +- return ops[0].value; +-} +- + /* Generate VEC_SERIES_EXPR , returning a value of mode VMODE. + Use TARGET for the result if nonnull and convenient. */ + +diff --git a/gcc/optabs.h b/gcc/optabs.h +index 5bd19503a..7c2ec257c 100644 +--- a/gcc/optabs.h ++++ b/gcc/optabs.h +@@ -321,9 +321,6 @@ extern rtx expand_vec_perm_const (machine_mode, rtx, rtx, + /* Generate code for vector comparison. */ + extern rtx expand_vec_cmp_expr (tree, tree, rtx); + +-/* Generate code for VEC_COND_EXPR. */ +-extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx); +- + /* Generate code for VEC_SERIES_EXPR. */ + extern rtx expand_vec_series_expr (machine_mode, rtx, rtx, rtx); + +@@ -364,5 +361,9 @@ extern void expand_jump_insn (enum insn_code icode, unsigned int nops, + class expand_operand *ops); + + extern enum rtx_code get_rtx_code (enum tree_code tcode, bool unsignedp); ++extern rtx vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode, ++ tree t_op0, tree t_op1, bool unsignedp, ++ enum insn_code icode, unsigned int opno); ++ + + #endif /* GCC_OPTABS_H */ +diff --git a/gcc/passes.def b/gcc/passes.def +index 94554cc1d..5a62819cc 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -403,6 +403,7 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_cleanup_eh); + NEXT_PASS (pass_lower_resx); + NEXT_PASS (pass_nrv); ++ NEXT_PASS (pass_gimple_isel); + NEXT_PASS (pass_cleanup_cfg_post_optimizing); + NEXT_PASS (pass_warn_function_noreturn); + NEXT_PASS (pass_gen_hsail); +diff --git a/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C b/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C +new file mode 100644 +index 000000000..00fe24224 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-fnon-call-exceptions" } */ ++ ++typedef double v2df __attribute__((vector_size(16))); ++ ++v2df foo (v2df a, v2df b, v2df c, v2df d) ++{ ++ try ++ { ++ v2df res = a < b ? c : d; ++ return res; ++ } ++ catch (...) ++ { ++ return (v2df){}; ++ } ++} +diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c +index 1af59fc6f..d82fe23d8 100644 +--- a/gcc/tree-cfg.c ++++ b/gcc/tree-cfg.c +@@ -4196,7 +4196,7 @@ verify_gimple_assign_ternary (gassign *stmt) + return true; + } + +- if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR) ++ if ((rhs_code == COND_EXPR + ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1)) + || !is_gimple_val (rhs2) + || !is_gimple_val (rhs3)) +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 09dd9b289..1c620b28e 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -631,6 +631,7 @@ extern gimple_opt_pass *make_pass_local_fn_summary (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_lower_vaarg (gcc::context *ctxt); ++extern gimple_opt_pass *make_pass_gimple_isel (gcc::context *ctxt); + + /* Current optimization pass. */ + extern opt_pass *current_pass; +diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c +index 3161d2e39..ba0b55f4a 100644 +--- a/gcc/tree-ssa-forwprop.c ++++ b/gcc/tree-ssa-forwprop.c +@@ -3131,8 +3131,7 @@ pass_forwprop::execute (function *fun) + tree rhs1 = gimple_assign_rhs1 (stmt); + enum tree_code code = gimple_assign_rhs_code (stmt); + +- if (code == COND_EXPR +- || code == VEC_COND_EXPR) ++ if (code == COND_EXPR) + { + /* In this case the entire COND_EXPR is in rhs1. */ + if (forward_propagate_into_cond (&gsi)) +diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c +index af8df8563..5f978ac78 100644 +--- a/gcc/tree-ssa-reassoc.c ++++ b/gcc/tree-ssa-reassoc.c +@@ -3830,7 +3830,8 @@ optimize_range_tests (enum tree_code opcode, + to type of comparison. */ + + static tree_code +-ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type) ++ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type, ++ tree *lhs, tree *rhs, gassign **vcond) + { + if (TREE_CODE (var) != SSA_NAME) + return ERROR_MARK; +@@ -3838,6 +3839,8 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type) + gassign *stmt = dyn_cast (SSA_NAME_DEF_STMT (var)); + if (stmt == NULL) + return ERROR_MARK; ++ if (*vcond) ++ *vcond = stmt; + + /* ??? If we start creating more COND_EXPR, we could perform + this same optimization with them. For now, simplify. */ +@@ -3846,9 +3849,20 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type) + + tree cond = gimple_assign_rhs1 (stmt); + tree_code cmp = TREE_CODE (cond); +- if (TREE_CODE_CLASS (cmp) != tcc_comparison) ++ if (cmp != SSA_NAME) + return ERROR_MARK; + ++ gassign *assign = dyn_cast (SSA_NAME_DEF_STMT (cond)); ++ if (stmt == NULL ++ || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison) ++ return ERROR_MARK; ++ ++ cmp = gimple_assign_rhs_code (assign); ++ if (lhs) ++ *lhs = gimple_assign_rhs1 (assign); ++ if (rhs) ++ *rhs = gimple_assign_rhs2 (assign); ++ + /* ??? For now, allow only canonical true and false result vectors. + We could expand this to other constants should the need arise, + but at the moment we don't create them. */ +@@ -3869,7 +3883,7 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type) + + /* Success! */ + if (rets) +- *rets = stmt; ++ *rets = assign; + if (reti) + *reti = inv; + if (type) +@@ -3893,10 +3907,11 @@ optimize_vec_cond_expr (tree_code opcode, vec *ops) + { + tree elt0 = (*ops)[i]->op; + +- gassign *stmt0; ++ gassign *stmt0, *vcond0; + bool invert; +- tree type; +- tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type); ++ tree type, lhs0, rhs0; ++ tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0, ++ &rhs0, &vcond0); + if (cmp0 == ERROR_MARK) + continue; + +@@ -3904,26 +3919,20 @@ optimize_vec_cond_expr (tree_code opcode, vec *ops) + { + tree &elt1 = (*ops)[j]->op; + +- gassign *stmt1; +- tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL); ++ gassign *stmt1, *vcond1; ++ tree lhs1, rhs1; ++ tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1, ++ &rhs1, &vcond1); + if (cmp1 == ERROR_MARK) + continue; + +- tree cond0 = gimple_assign_rhs1 (stmt0); +- tree x0 = TREE_OPERAND (cond0, 0); +- tree y0 = TREE_OPERAND (cond0, 1); +- +- tree cond1 = gimple_assign_rhs1 (stmt1); +- tree x1 = TREE_OPERAND (cond1, 0); +- tree y1 = TREE_OPERAND (cond1, 1); +- + tree comb; + if (opcode == BIT_AND_EXPR) +- comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1, +- y1); ++ comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0, ++ cmp1, lhs1, rhs1); + else if (opcode == BIT_IOR_EXPR) +- comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1, +- y1); ++ comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0, ++ cmp1, lhs1, rhs1); + else + gcc_unreachable (); + if (comb == NULL) +@@ -3933,19 +3942,22 @@ optimize_vec_cond_expr (tree_code opcode, vec *ops) + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Transforming "); +- print_generic_expr (dump_file, cond0); ++ print_generic_expr (dump_file, gimple_assign_lhs (stmt0)); + fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|'); +- print_generic_expr (dump_file, cond1); ++ print_generic_expr (dump_file, gimple_assign_lhs (stmt1)); + fprintf (dump_file, " into "); + print_generic_expr (dump_file, comb); + fputc ('\n', dump_file); + } + +- gimple_assign_set_rhs1 (stmt0, comb); ++ gimple_stmt_iterator gsi = gsi_for_stmt (vcond0); ++ tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE, ++ true, GSI_SAME_STMT); + if (invert) +- std::swap (*gimple_assign_rhs2_ptr (stmt0), +- *gimple_assign_rhs3_ptr (stmt0)); +- update_stmt (stmt0); ++ swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0), ++ gimple_assign_rhs3_ptr (vcond0)); ++ gimple_assign_set_rhs1 (vcond0, exp); ++ update_stmt (vcond0); + + elt1 = error_mark_node; + any_changes = true; +diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c +index 37c3956a4..0ec4412bc 100644 +--- a/gcc/tree-vect-generic.c ++++ b/gcc/tree-vect-generic.c +@@ -693,12 +693,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, + if (addend == NULL_TREE + && expand_vec_cond_expr_p (type, type, LT_EXPR)) + { +- tree zero, cst, cond, mask_type; +- gimple *stmt; ++ tree zero, cst, mask_type, mask; ++ gimple *stmt, *cond; + + mask_type = truth_type_for (type); + zero = build_zero_cst (type); +- cond = build2 (LT_EXPR, mask_type, op0, zero); ++ mask = make_ssa_name (mask_type); ++ cond = gimple_build_assign (mask, LT_EXPR, op0, zero); ++ gsi_insert_before (gsi, cond, GSI_SAME_STMT); + tree_vector_builder vec (type, nunits, 1); + for (i = 0; i < nunits; i++) + vec.quick_push (build_int_cst (TREE_TYPE (type), +@@ -706,8 +708,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, + << shifts[i]) - 1)); + cst = vec.build (); + addend = make_ssa_name (type); +- stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond, +- cst, zero); ++ stmt ++ = gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero); + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); + } + } +@@ -949,21 +951,28 @@ expand_vector_condition (gimple_stmt_iterator *gsi) + tree index = bitsize_int (0); + tree comp_width = width; + tree comp_index = index; +- int i; + location_t loc = gimple_location (gsi_stmt (*gsi)); ++ tree_code code = TREE_CODE (a); + +- if (!is_gimple_val (a)) ++ if (code == SSA_NAME) + { +- gcc_assert (COMPARISON_CLASS_P (a)); +- a_is_comparison = true; +- a1 = TREE_OPERAND (a, 0); +- a2 = TREE_OPERAND (a, 1); +- comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); +- comp_width = TYPE_SIZE (comp_inner_type); ++ gimple *assign = SSA_NAME_DEF_STMT (a); ++ if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison) ++ { ++ a_is_comparison = true; ++ a1 = gimple_assign_rhs1 (assign); ++ a2 = gimple_assign_rhs2 (assign); ++ code = gimple_assign_rhs_code (assign); ++ comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); ++ comp_width = vector_element_bits_tree (TREE_TYPE (a1)); ++ } + } + +- if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a))) +- return; ++ if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code)) ++ { ++ gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST); ++ return; ++ } + + /* Handle vector boolean types with bitmasks. If there is a comparison + and we can expand the comparison into the vector boolean bitmask, +@@ -986,7 +995,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi) + : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a)))) + { + if (a_is_comparison) +- a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2); ++ a = gimplify_build2 (gsi, code, type, a1, a2); + a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b); + a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a); + a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c); +@@ -1017,7 +1026,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi) + + int nunits = nunits_for_known_piecewise_op (type); + vec_alloc (v, nunits); +- for (i = 0; i < nunits; i++) ++ for (int i = 0; i < nunits; i++) + { + tree aa, result; + tree bb = tree_vec_extract (gsi, inner_type, b, width, index); +@@ -1028,7 +1037,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi) + comp_width, comp_index); + tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, + comp_width, comp_index); +- aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2); ++ aa = fold_build2 (code, cond_type, aa1, aa2); + } + else if (a_is_scalar_bitmask) + { +diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +index 0bdf9a547..2c2197022 100644 +--- a/gcc/tree-vect-stmts.c ++++ b/gcc/tree-vect-stmts.c +@@ -11100,8 +11100,12 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, + { + vec_cond_rhs = vec_oprnds1[i]; + if (bitop1 == NOP_EXPR) +- vec_compare = build2 (cond_code, vec_cmp_type, +- vec_cond_lhs, vec_cond_rhs); ++ { ++ gimple_seq stmts = NULL; ++ vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type, ++ vec_cond_lhs, vec_cond_rhs); ++ gsi_insert_before (gsi, stmts, GSI_SAME_STMT); ++ } + else + { + new_temp = make_ssa_name (vec_cmp_type); +-- +2.25.1 + diff --git a/0069-Backport-gimple-match-Add-a-gimple_extract_op-functi.patch b/0069-Backport-gimple-match-Add-a-gimple_extract_op-functi.patch new file mode 100644 index 0000000000000000000000000000000000000000..18dbcc57d4178ecfb6471f2118a2c6654ad3beff --- /dev/null +++ b/0069-Backport-gimple-match-Add-a-gimple_extract_op-functi.patch @@ -0,0 +1,379 @@ +From 21d265af074726b166e08301a2f847c474fcb680 Mon Sep 17 00:00:00 2001 +From: Richard Sandiford +Date: Tue, 30 Nov 2021 09:52:24 +0000 +Subject: [PATCH 21/29] [Backport] gimple-match: Add a gimple_extract_op + function + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=33973fa754de1f95d459bfca66c0d80deec36537 + +code_helper and gimple_match_op seem like generally useful ways +of summing up a gimple_assign or gimple_call (or gimple_cond). +This patch adds a gimple_extract_op function that can be used +for that. + +gcc/ + * gimple-match.h (code_helper): Add functions for querying whether + the code represents an internal_fn or a built_in_function. + Provide explicit conversion operators for both cases. + (gimple_extract_op): Declare. + * gimple-match-head.c (gimple_extract): New function, extracted from... + (gimple_simplify): ...here. + (gimple_extract_op): New function. +--- + gcc/gimple-match-head.c | 219 ++++++++++++++++++++-------------------- + gcc/gimple-match.h | 27 +++++ + 2 files changed, 135 insertions(+), 111 deletions(-) + +diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c +index 9b3e7298d..c1dea1734 100644 +--- a/gcc/gimple-match-head.c ++++ b/gcc/gimple-match-head.c +@@ -884,12 +884,20 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op, + return true; + } + +-/* The main STMT based simplification entry. It is used by the fold_stmt +- and the fold_stmt_to_constant APIs. */ ++/* Common subroutine of gimple_extract_op and gimple_simplify. Try to ++ describe STMT in RES_OP, returning true on success. Before recording ++ an operand, call: + +-bool +-gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, +- tree (*valueize)(tree), tree (*top_valueize)(tree)) ++ - VALUEIZE_CONDITION for a COND_EXPR condition ++ - VALUEIZE_OP for every other top-level operand ++ ++ Both routines take a tree argument and returns a tree. */ ++ ++template ++inline bool ++gimple_extract (gimple *stmt, gimple_match_op *res_op, ++ ValueizeOp valueize_op, ++ ValueizeCondition valueize_condition) + { + switch (gimple_code (stmt)) + { +@@ -905,101 +913,50 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, + || code == VIEW_CONVERT_EXPR) + { + tree op0 = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); +- bool valueized = false; +- op0 = do_valueize (op0, top_valueize, valueized); +- res_op->set_op (code, type, op0); +- return (gimple_resimplify1 (seq, res_op, valueize) +- || valueized); ++ res_op->set_op (code, type, valueize_op (op0)); ++ return true; + } + else if (code == BIT_FIELD_REF) + { + tree rhs1 = gimple_assign_rhs1 (stmt); +- tree op0 = TREE_OPERAND (rhs1, 0); +- bool valueized = false; +- op0 = do_valueize (op0, top_valueize, valueized); ++ tree op0 = valueize_op (TREE_OPERAND (rhs1, 0)); + res_op->set_op (code, type, op0, + TREE_OPERAND (rhs1, 1), + TREE_OPERAND (rhs1, 2), + REF_REVERSE_STORAGE_ORDER (rhs1)); +- if (res_op->reverse) +- return valueized; +- return (gimple_resimplify3 (seq, res_op, valueize) +- || valueized); ++ return true; + } +- else if (code == SSA_NAME +- && top_valueize) ++ else if (code == SSA_NAME) + { + tree op0 = gimple_assign_rhs1 (stmt); +- tree valueized = top_valueize (op0); +- if (!valueized || op0 == valueized) +- return false; +- res_op->set_op (TREE_CODE (op0), type, valueized); ++ res_op->set_op (TREE_CODE (op0), type, valueize_op (op0)); + return true; + } + break; + case GIMPLE_UNARY_RHS: + { + tree rhs1 = gimple_assign_rhs1 (stmt); +- bool valueized = false; +- rhs1 = do_valueize (rhs1, top_valueize, valueized); +- res_op->set_op (code, type, rhs1); +- return (gimple_resimplify1 (seq, res_op, valueize) +- || valueized); ++ res_op->set_op (code, type, valueize_op (rhs1)); ++ return true; + } + case GIMPLE_BINARY_RHS: + { +- tree rhs1 = gimple_assign_rhs1 (stmt); +- tree rhs2 = gimple_assign_rhs2 (stmt); +- bool valueized = false; +- rhs1 = do_valueize (rhs1, top_valueize, valueized); +- rhs2 = do_valueize (rhs2, top_valueize, valueized); ++ tree rhs1 = valueize_op (gimple_assign_rhs1 (stmt)); ++ tree rhs2 = valueize_op (gimple_assign_rhs2 (stmt)); + res_op->set_op (code, type, rhs1, rhs2); +- return (gimple_resimplify2 (seq, res_op, valueize) +- || valueized); ++ return true; + } + case GIMPLE_TERNARY_RHS: + { +- bool valueized = false; + tree rhs1 = gimple_assign_rhs1 (stmt); +- /* If this is a [VEC_]COND_EXPR first try to simplify an +- embedded GENERIC condition. */ +- if (code == COND_EXPR +- || code == VEC_COND_EXPR) +- { +- if (COMPARISON_CLASS_P (rhs1)) +- { +- tree lhs = TREE_OPERAND (rhs1, 0); +- tree rhs = TREE_OPERAND (rhs1, 1); +- lhs = do_valueize (lhs, top_valueize, valueized); +- rhs = do_valueize (rhs, top_valueize, valueized); +- gimple_match_op res_op2 (res_op->cond, TREE_CODE (rhs1), +- TREE_TYPE (rhs1), lhs, rhs); +- if ((gimple_resimplify2 (seq, &res_op2, valueize) +- || valueized) +- && res_op2.code.is_tree_code ()) +- { +- valueized = true; +- if (TREE_CODE_CLASS ((enum tree_code) res_op2.code) +- == tcc_comparison) +- rhs1 = build2 (res_op2.code, TREE_TYPE (rhs1), +- res_op2.ops[0], res_op2.ops[1]); +- else if (res_op2.code == SSA_NAME +- || res_op2.code == INTEGER_CST +- || res_op2.code == VECTOR_CST) +- rhs1 = res_op2.ops[0]; +- else +- valueized = false; +- } +- } +- } +- tree rhs2 = gimple_assign_rhs2 (stmt); +- tree rhs3 = gimple_assign_rhs3 (stmt); +- rhs1 = do_valueize (rhs1, top_valueize, valueized); +- rhs2 = do_valueize (rhs2, top_valueize, valueized); +- rhs3 = do_valueize (rhs3, top_valueize, valueized); ++ if (code == COND_EXPR && COMPARISON_CLASS_P (rhs1)) ++ rhs1 = valueize_condition (rhs1); ++ else ++ rhs1 = valueize_op (rhs1); ++ tree rhs2 = valueize_op (gimple_assign_rhs2 (stmt)); ++ tree rhs3 = valueize_op (gimple_assign_rhs3 (stmt)); + res_op->set_op (code, type, rhs1, rhs2, rhs3); +- return (gimple_resimplify3 (seq, res_op, valueize) +- || valueized); ++ return true; + } + default: + gcc_unreachable (); +@@ -1013,7 +970,6 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, + && gimple_call_num_args (stmt) >= 1 + && gimple_call_num_args (stmt) <= 5) + { +- bool valueized = false; + combined_fn cfn; + if (gimple_call_internal_p (stmt)) + cfn = as_combined_fn (gimple_call_internal_fn (stmt)); +@@ -1023,7 +979,7 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, + if (!fn) + return false; + +- fn = do_valueize (fn, top_valueize, valueized); ++ fn = valueize_op (fn); + if (TREE_CODE (fn) != ADDR_EXPR + || TREE_CODE (TREE_OPERAND (fn, 0)) != FUNCTION_DECL) + return false; +@@ -1039,47 +995,17 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, + unsigned int num_args = gimple_call_num_args (stmt); + res_op->set_op (cfn, TREE_TYPE (gimple_call_lhs (stmt)), num_args); + for (unsigned i = 0; i < num_args; ++i) +- { +- tree arg = gimple_call_arg (stmt, i); +- res_op->ops[i] = do_valueize (arg, top_valueize, valueized); +- } +- if (internal_fn_p (cfn) +- && try_conditional_simplification (as_internal_fn (cfn), +- res_op, seq, valueize)) +- return true; +- switch (num_args) +- { +- case 1: +- return (gimple_resimplify1 (seq, res_op, valueize) +- || valueized); +- case 2: +- return (gimple_resimplify2 (seq, res_op, valueize) +- || valueized); +- case 3: +- return (gimple_resimplify3 (seq, res_op, valueize) +- || valueized); +- case 4: +- return (gimple_resimplify4 (seq, res_op, valueize) +- || valueized); +- case 5: +- return (gimple_resimplify5 (seq, res_op, valueize) +- || valueized); +- default: +- gcc_unreachable (); +- } ++ res_op->ops[i] = valueize_op (gimple_call_arg (stmt, i)); ++ return true; + } + break; + + case GIMPLE_COND: + { +- tree lhs = gimple_cond_lhs (stmt); +- tree rhs = gimple_cond_rhs (stmt); +- bool valueized = false; +- lhs = do_valueize (lhs, top_valueize, valueized); +- rhs = do_valueize (rhs, top_valueize, valueized); ++ tree lhs = valueize_op (gimple_cond_lhs (stmt)); ++ tree rhs = valueize_op (gimple_cond_rhs (stmt)); + res_op->set_op (gimple_cond_code (stmt), boolean_type_node, lhs, rhs); +- return (gimple_resimplify2 (seq, res_op, valueize) +- || valueized); ++ return true; + } + + default: +@@ -1089,6 +1015,77 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, + return false; + } + ++/* Try to describe STMT in RES_OP, returning true on success. ++ For GIMPLE_CONDs, describe the condition that is being tested. ++ For GIMPLE_ASSIGNs, describe the rhs of the assignment. ++ For GIMPLE_CALLs, describe the call. */ ++ ++bool ++gimple_extract_op (gimple *stmt, gimple_match_op *res_op) ++{ ++ auto nop = [](tree op) { return op; }; ++ return gimple_extract (stmt, res_op, nop, nop); ++} ++ ++/* The main STMT based simplification entry. It is used by the fold_stmt ++ and the fold_stmt_to_constant APIs. */ ++ ++bool ++gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq, ++ tree (*valueize)(tree), tree (*top_valueize)(tree)) ++{ ++ bool valueized = false; ++ auto valueize_op = [&](tree op) ++ { ++ return do_valueize (op, top_valueize, valueized); ++ }; ++ auto valueize_condition = [&](tree op) -> tree ++ { ++ bool cond_valueized = false; ++ tree lhs = do_valueize (TREE_OPERAND (op, 0), top_valueize, ++ cond_valueized); ++ tree rhs = do_valueize (TREE_OPERAND (op, 1), top_valueize, ++ cond_valueized); ++ gimple_match_op res_op2 (res_op->cond, TREE_CODE (op), ++ TREE_TYPE (op), lhs, rhs); ++ if ((gimple_resimplify2 (seq, &res_op2, valueize) ++ || cond_valueized) ++ && res_op2.code.is_tree_code ()) ++ { ++ if (TREE_CODE_CLASS ((tree_code) res_op2.code) == tcc_comparison) ++ { ++ valueized = true; ++ return build2 (res_op2.code, TREE_TYPE (op), ++ res_op2.ops[0], res_op2.ops[1]); ++ } ++ else if (res_op2.code == SSA_NAME ++ || res_op2.code == INTEGER_CST ++ || res_op2.code == VECTOR_CST) ++ { ++ valueized = true; ++ return res_op2.ops[0]; ++ } ++ } ++ return valueize_op (op); ++ }; ++ ++ if (!gimple_extract (stmt, res_op, valueize_op, valueize_condition)) ++ return false; ++ ++ if (res_op->code.is_internal_fn ()) ++ { ++ internal_fn ifn = internal_fn (res_op->code); ++ if (try_conditional_simplification (ifn, res_op, seq, valueize)) ++ return true; ++ } ++ ++ if (!res_op->reverse ++ && res_op->num_ops ++ && res_op->resimplify (seq, valueize)) ++ return true; ++ ++ return valueized; ++} + + /* Helper for the autogenerated code, valueize OP. */ + +diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h +index 097898aed..39858c45f 100644 +--- a/gcc/gimple-match.h ++++ b/gcc/gimple-match.h +@@ -33,13 +33,39 @@ public: + code_helper (combined_fn fn) : rep (-(int) fn) {} + operator tree_code () const { return (tree_code) rep; } + operator combined_fn () const { return (combined_fn) -rep; } ++ explicit operator internal_fn () const; ++ explicit operator built_in_function () const; + bool is_tree_code () const { return rep > 0; } + bool is_fn_code () const { return rep < 0; } ++ bool is_internal_fn () const; ++ bool is_builtin_fn () const; + int get_rep () const { return rep; } + private: + int rep; + }; + ++inline code_helper::operator internal_fn () const ++{ ++ return as_internal_fn (combined_fn (*this)); ++} ++ ++inline code_helper::operator built_in_function () const ++{ ++ return as_builtin_fn (combined_fn (*this)); ++} ++ ++inline bool ++code_helper::is_internal_fn () const ++{ ++ return is_fn_code () && internal_fn_p (combined_fn (*this)); ++} ++ ++inline bool ++code_helper::is_builtin_fn () const ++{ ++ return is_fn_code () && builtin_fn_p (combined_fn (*this)); ++} ++ + /* Represents the condition under which an operation should happen, + and the value to use otherwise. The condition applies elementwise + (as for VEC_COND_EXPR) if the values are vectors. */ +@@ -333,6 +359,7 @@ gimple_simplified_result_is_gimple_val (const gimple_match_op *op) + + extern tree (*mprts_hook) (gimple_match_op *); + ++bool gimple_extract_op (gimple *, gimple_match_op *); + bool gimple_simplify (gimple *, gimple_match_op *, gimple_seq *, + tree (*)(tree), tree (*)(tree)); + tree maybe_push_res_to_seq (gimple_match_op *, gimple_seq *, +-- +2.25.1 + diff --git a/0070-Backport-aarch64-Fix-subs_compare_2.c-regression-PR1.patch b/0070-Backport-aarch64-Fix-subs_compare_2.c-regression-PR1.patch new file mode 100644 index 0000000000000000000000000000000000000000..3d763a430feed0ebaf4caa7eca8ed20dc2de406e --- /dev/null +++ b/0070-Backport-aarch64-Fix-subs_compare_2.c-regression-PR1.patch @@ -0,0 +1,1004 @@ +From f6b6948de1d836b594ad388388b7121dd7a702cb Mon Sep 17 00:00:00 2001 +From: Richard Sandiford +Date: Tue, 15 Feb 2022 18:09:35 +0000 +Subject: [PATCH 22/29] [Backport] aarch64: Fix subs_compare_2.c regression + [PR100874] +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=8e84b2b37a541b27feea69769fc314d534464ebd + +subs_compare_2.c tests that we can use a SUBS+CSEL sequence for: + +unsigned int +foo (unsigned int a, unsigned int b) +{ + unsigned int x = a - 4; + if (a < 4) + return x; + else + return 0; +} + +As Andrew notes in the PR, this is effectively MIN (x, 4) - 4, +and it is now recognised as such by phiopt. Previously it was +if-converted in RTL instead. + +I tried to look for ways to generalise this to other situations +and to other ?:-style operations, not just max and min. However, +for general ?: we tend to push an outer “- CST” into the arms of +the ?: -- at least if one of them simplifies -- so I didn't find +any useful abstraction. + +This patch therefore adds a pattern specifically for +max/min(a,cst)-cst. I'm not thrilled at having to do this, +but it seems like the least worst fix in the circumstances. +Also, max(a,cst)-cst for unsigned a is a useful saturating +subtraction idiom and so is arguably worth its own code +for that reason. + +gcc/ + PR target/100874 + * config/aarch64/aarch64-protos.h (aarch64_maxmin_plus_const): + Declare. + * config/aarch64/aarch64.cc (aarch64_maxmin_plus_const): New function. + * config/aarch64/aarch64.md (*aarch64_minmax_plus): New pattern. + +gcc/testsuite/ + * gcc.target/aarch64/max_plus_1.c: New test. + * gcc.target/aarch64/max_plus_2.c: Likewise. + * gcc.target/aarch64/max_plus_3.c: Likewise. + * gcc.target/aarch64/max_plus_4.c: Likewise. + * gcc.target/aarch64/max_plus_5.c: Likewise. + * gcc.target/aarch64/max_plus_6.c: Likewise. + * gcc.target/aarch64/max_plus_7.c: Likewise. + * gcc.target/aarch64/min_plus_1.c: Likewise. + * gcc.target/aarch64/min_plus_2.c: Likewise. + * gcc.target/aarch64/min_plus_3.c: Likewise. + * gcc.target/aarch64/min_plus_4.c: Likewise. + * gcc.target/aarch64/min_plus_5.c: Likewise. + * gcc.target/aarch64/min_plus_6.c: Likewise. + * gcc.target/aarch64/min_plus_7.c: Likewise. +--- + gcc/config/aarch64/aarch64-protos.h | 1 + + gcc/config/aarch64/aarch64.c | 104 ++++++++++++ + gcc/config/aarch64/aarch64.md | 27 ++++ + gcc/testsuite/gcc.target/aarch64/max_plus_1.c | 149 ++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/max_plus_2.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/max_plus_3.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/max_plus_4.c | 30 ++++ + gcc/testsuite/gcc.target/aarch64/max_plus_5.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/max_plus_6.c | 9 ++ + gcc/testsuite/gcc.target/aarch64/max_plus_7.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/min_plus_1.c | 149 ++++++++++++++++++ + gcc/testsuite/gcc.target/aarch64/min_plus_2.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/min_plus_3.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/min_plus_4.c | 30 ++++ + gcc/testsuite/gcc.target/aarch64/min_plus_5.c | 35 ++++ + gcc/testsuite/gcc.target/aarch64/min_plus_6.c | 9 ++ + gcc/testsuite/gcc.target/aarch64/min_plus_7.c | 35 ++++ + 17 files changed, 788 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_6.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/max_plus_7.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_1.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_2.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_3.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_4.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_5.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_6.c + create mode 100644 gcc/testsuite/gcc.target/aarch64/min_plus_7.c + +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 226f3a8ff..9b6d309a7 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -696,6 +696,7 @@ bool aarch64_legitimate_address_p (machine_mode, rtx, bool, + aarch64_addr_query_type = ADDR_QUERY_M); + machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); + rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); ++bool aarch64_maxmin_plus_const (rtx_code, rtx *, bool); + rtx aarch64_load_tp (rtx); + + void aarch64_expand_compare_and_swap (rtx op[]); +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index f78942b04..85dbd3898 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -3038,6 +3038,110 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y, + return aarch64_gen_compare_reg (code, x, y); + } + ++/* Consider the operation: ++ ++ OPERANDS[0] = CODE (OPERANDS[1], OPERANDS[2]) + OPERANDS[3] ++ ++ where: ++ ++ - CODE is [SU]MAX or [SU]MIN ++ - OPERANDS[2] and OPERANDS[3] are constant integers ++ - OPERANDS[3] is a positive or negative shifted 12-bit immediate ++ - all operands have mode MODE ++ ++ Decide whether it is possible to implement the operation using: ++ ++ SUBS , OPERANDS[1], -OPERANDS[3] ++ or ++ ADDS , OPERANDS[1], OPERANDS[3] ++ ++ followed by: ++ ++ OPERANDS[0], , [wx]zr, ++ ++ where is one of CSEL, CSINV or CSINC. Return true if so. ++ If GENERATE_P is true, also update OPERANDS as follows: ++ ++ OPERANDS[4] = -OPERANDS[3] ++ OPERANDS[5] = the rtl condition representing ++ OPERANDS[6] = ++ OPERANDS[7] = 0 for CSEL, -1 for CSINV or 1 for CSINC. */ ++bool ++aarch64_maxmin_plus_const (rtx_code code, rtx *operands, bool generate_p) ++{ ++ signop sgn = (code == UMAX || code == UMIN ? UNSIGNED : SIGNED); ++ rtx dst = operands[0]; ++ rtx maxmin_op = operands[2]; ++ rtx add_op = operands[3]; ++ machine_mode mode = GET_MODE (dst); ++ ++ /* max (x, y) - z == (x >= y + 1 ? x : y) - z ++ == (x >= y ? x : y) - z ++ == (x > y ? x : y) - z ++ == (x > y - 1 ? x : y) - z ++ ++ min (x, y) - z == (x <= y - 1 ? x : y) - z ++ == (x <= y ? x : y) - z ++ == (x < y ? x : y) - z ++ == (x < y + 1 ? x : y) - z ++ ++ Check whether z is in { y - 1, y, y + 1 } and pick the form(s) for ++ which x is compared with z. Set DIFF to y - z. Thus the supported ++ combinations are as follows, with DIFF being the value after the ":": ++ ++ max (x, y) - z == x >= y + 1 ? x - (y + 1) : -1 [z == y + 1] ++ == x >= y ? x - y : 0 [z == y] ++ == x > y ? x - y : 0 [z == y] ++ == x > y - 1 ? x - (y - 1) : 1 [z == y - 1] ++ ++ min (x, y) - z == x <= y - 1 ? x - (y - 1) : 1 [z == y - 1] ++ == x <= y ? x - y : 0 [z == y] ++ == x < y ? x - y : 0 [z == y] ++ == x < y + 1 ? x - (y + 1) : -1 [z == y + 1]. */ ++ auto maxmin_val = rtx_mode_t (maxmin_op, mode); ++ auto add_val = rtx_mode_t (add_op, mode); ++ auto sub_val = wi::neg (add_val); ++ auto diff = wi::sub (maxmin_val, sub_val); ++ if (!(diff == 0 ++ || (diff == 1 && wi::gt_p (maxmin_val, sub_val, sgn)) ++ || (diff == -1 && wi::lt_p (maxmin_val, sub_val, sgn)))) ++ return false; ++ ++ if (!generate_p) ++ return true; ++ ++ rtx_code cmp; ++ switch (code) ++ { ++ case SMAX: ++ cmp = diff == 1 ? GT : GE; ++ break; ++ case UMAX: ++ cmp = diff == 1 ? GTU : GEU; ++ break; ++ case SMIN: ++ cmp = diff == -1 ? LT : LE; ++ break; ++ case UMIN: ++ cmp = diff == -1 ? LTU : LEU; ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ rtx cc = gen_rtx_REG (CCmode, CC_REGNUM); ++ ++ operands[4] = immed_wide_int_const (sub_val, mode); ++ operands[5] = gen_rtx_fmt_ee (cmp, VOIDmode, cc, const0_rtx); ++ if (can_create_pseudo_p ()) ++ operands[6] = gen_reg_rtx (mode); ++ else ++ operands[6] = dst; ++ operands[7] = immed_wide_int_const (diff, mode); ++ ++ return true; ++} ++ ++ + /* Build the SYMBOL_REF for __tls_get_addr. */ + + static GTY(()) rtx tls_get_addr_libfunc; +diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md +index ee80261f1..7c2562f49 100644 +--- a/gcc/config/aarch64/aarch64.md ++++ b/gcc/config/aarch64/aarch64.md +@@ -4499,6 +4499,33 @@ + } + ) + ++;; Implement MAX/MIN (A, B) - C using SUBS/ADDS followed by CSEL/CSINV/CSINC. ++;; See aarch64_maxmin_plus_const for details about the supported cases. ++(define_insn_and_split "*aarch64_minmax_plus" ++ [(set (match_operand:GPI 0 "register_operand" "=r") ++ (plus:GPI ++ (MAXMIN:GPI ++ (match_operand:GPI 1 "register_operand" "r") ++ (match_operand:GPI 2 "const_int_operand")) ++ (match_operand:GPI 3 "aarch64_plus_immediate"))) ++ (clobber (reg:CC CC_REGNUM))] ++ "aarch64_maxmin_plus_const (, operands, false)" ++ "#" ++ "&& 1" ++ [(parallel ++ [(set (reg:CC CC_REGNUM) ++ (compare:CC (match_dup 1) (match_dup 4))) ++ (set (match_dup 6) ++ (plus:GPI (match_dup 1) (match_dup 3)))]) ++ (set (match_dup 0) ++ (if_then_else:GPI (match_dup 5) (match_dup 6) (match_dup 7)))] ++ { ++ if (!aarch64_maxmin_plus_const (, operands, true)) ++ gcc_unreachable (); ++ } ++ [(set_attr "length" "8")] ++) ++ + ;; ------------------------------------------------------------------- + ;; Logical operations + ;; ------------------------------------------------------------------- +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_1.c b/gcc/testsuite/gcc.target/aarch64/max_plus_1.c +new file mode 100644 +index 000000000..ef336aeec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_1.c +@@ -0,0 +1,149 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** adds (w[0-9]+), w0, #4 ++** csel w0, \1, wzr, g[te] ++** ret ++*/ ++/* ++** f2: ++** adds (w[0-9]+), w0, #4 ++** csel w0, \1, wzr, g[te] ++** ret ++*/ ++/* ++** f3: ++** adds (w[0-9]+), w0, #5 ++** csinc w0, \1, wzr, gt ++** ret ++*/ ++/* ++** f4: ++** adds (w[0-9]+), w0, #3 ++** csinv w0, \1, wzr, ge ++** ret ++*/ ++ ++#ifndef TYPE ++#define TYPE int32_t ++#define TYPE_MIN INT32_MIN ++#define TYPE_MAX INT32_MAX ++#define VALUE -4 ++#endif ++ ++#include ++ ++TYPE __attribute__((noipa)) ++f1 (TYPE x) ++{ ++ return (x > VALUE ? x - VALUE : 0); ++} ++ ++TYPE __attribute__((noipa)) ++f2 (TYPE x) ++{ ++ return (x > VALUE ? x : VALUE) - VALUE; ++} ++ ++TYPE __attribute__((noipa)) ++f3 (TYPE x) ++{ ++ return (x > VALUE ? x : VALUE) - (VALUE - 1); ++} ++ ++TYPE __attribute__((noipa)) ++f4 (TYPE x) ++{ ++ return (x > VALUE ? x : VALUE) - (VALUE + 1); ++} ++ ++TYPE __attribute__((noipa)) ++f5 (TYPE x) ++{ ++ return (x > VALUE ? x : VALUE) - (VALUE + 2); ++} ++ ++TYPE __attribute__((noipa)) ++f6 (TYPE x) ++{ ++ return (x > VALUE ? x : VALUE) - (VALUE - 2); ++} ++ ++int ++main (void) ++{ ++ TYPE max_test = TYPE_MAX; ++ if (TYPE_MIN < 0 && VALUE < 0) ++ max_test += VALUE; ++ ++ if (f1 (TYPE_MIN) != 0) ++ __builtin_abort (); ++ if (f1 (VALUE - 1) != 0) ++ __builtin_abort (); ++ if (f1 (VALUE) != 0) ++ __builtin_abort (); ++ if (f1 (VALUE + 1) != 1) ++ __builtin_abort (); ++ if (f1 (max_test) != max_test - VALUE) ++ __builtin_abort (); ++ ++ if (f2 (TYPE_MIN) != 0) ++ __builtin_abort (); ++ if (f2 (VALUE - 1) != 0) ++ __builtin_abort (); ++ if (f2 (VALUE) != 0) ++ __builtin_abort (); ++ if (f2 (VALUE + 1) != 1) ++ __builtin_abort (); ++ if (f2 (max_test) != max_test - VALUE) ++ __builtin_abort (); ++ ++ if (f3 (TYPE_MIN) != 1) ++ __builtin_abort (); ++ if (f3 (VALUE - 1) != 1) ++ __builtin_abort (); ++ if (f3 (VALUE) != 1) ++ __builtin_abort (); ++ if (f3 (VALUE + 1) != 2) ++ __builtin_abort (); ++ if (f3 (max_test - 1) != max_test - VALUE) ++ __builtin_abort (); ++ ++ if (f4 (TYPE_MIN) != -1) ++ __builtin_abort (); ++ if (f4 (VALUE - 1) != -1) ++ __builtin_abort (); ++ if (f4 (VALUE) != -1) ++ __builtin_abort (); ++ if (f4 (VALUE + 1) != 0) ++ __builtin_abort (); ++ if (f4 (max_test) != max_test - VALUE - 1) ++ __builtin_abort (); ++ ++ if (f5 (TYPE_MIN) != -2) ++ __builtin_abort (); ++ if (f5 (VALUE - 1) != -2) ++ __builtin_abort (); ++ if (f5 (VALUE) != -2) ++ __builtin_abort (); ++ if (f5 (VALUE + 1) != -1) ++ __builtin_abort (); ++ if (f5 (max_test) != max_test - VALUE - 2) ++ __builtin_abort (); ++ ++ if (f6 (TYPE_MIN) != 2) ++ __builtin_abort (); ++ if (f6 (VALUE - 1) != 2) ++ __builtin_abort (); ++ if (f6 (VALUE) != 2) ++ __builtin_abort (); ++ if (f6 (VALUE + 1) != 3) ++ __builtin_abort (); ++ if (VALUE <= max_test - 2 && f6 (max_test - 2) != max_test - VALUE) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_2.c b/gcc/testsuite/gcc.target/aarch64/max_plus_2.c +new file mode 100644 +index 000000000..a2a1295d9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_2.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** adds (x[0-9]+), x0, #4094 ++** csel x0, \1, xzr, g[te] ++** ret ++*/ ++/* ++** f2: ++** adds (x[0-9]+), x0, #4094 ++** csel x0, \1, xzr, g[te] ++** ret ++*/ ++/* ++** f3: ++** adds (x[0-9]+), x0, #4095 ++** csinc x0, \1, xzr, gt ++** ret ++*/ ++/* ++** f4: ++** adds (x[0-9]+), x0, #4093 ++** csinv x0, \1, xzr, ge ++** ret ++*/ ++ ++#define TYPE int64_t ++#define TYPE_MIN INT64_MIN ++#define TYPE_MAX INT64_MAX ++#define VALUE -4094 ++ ++#include "max_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_3.c b/gcc/testsuite/gcc.target/aarch64/max_plus_3.c +new file mode 100644 +index 000000000..a9792ecc9 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_3.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** adds (w[0-9]+), w0, #4095 ++** csel w0, \1, wzr, g[te] ++** ret ++*/ ++/* ++** f2: ++** adds (w[0-9]+), w0, #4095 ++** csel w0, \1, wzr, g[te] ++** ret ++*/ ++/* ++** f3: ++** adds (w[0-9]+), w0, #4096 ++** csinc w0, \1, wzr, gt ++** ret ++*/ ++/* ++** f4: ++** adds (w[0-9]+), w0, #4094 ++** csinv w0, \1, wzr, ge ++** ret ++*/ ++ ++#define TYPE int32_t ++#define TYPE_MIN INT32_MIN ++#define TYPE_MAX INT32_MAX ++#define VALUE -4095 ++ ++#include "max_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_4.c b/gcc/testsuite/gcc.target/aarch64/max_plus_4.c +new file mode 100644 +index 000000000..5090fa101 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_4.c +@@ -0,0 +1,30 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** adds (x[0-9]+), x0, #4096 ++** csel x0, \1, xzr, g[te] ++** ret ++*/ ++/* ++** f2: ++** adds (x[0-9]+), x0, #4096 ++** csel x0, \1, xzr, g[te] ++** ret ++*/ ++/* f3 out of range */ ++/* ++** f4: ++** adds (x[0-9]+), x0, #4095 ++** csinv x0, \1, xzr, ge ++** ret ++*/ ++ ++#define TYPE int64_t ++#define TYPE_MIN INT64_MIN ++#define TYPE_MAX INT64_MAX ++#define VALUE -4096 ++ ++#include "max_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_5.c b/gcc/testsuite/gcc.target/aarch64/max_plus_5.c +new file mode 100644 +index 000000000..63f3b3442 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_5.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** adds (w[0-9]+), w0, #4095 ++** csel w0, \1, wzr, (cs|hi) ++** ret ++*/ ++/* ++** f2: ++** adds (w[0-9]+), w0, #4095 ++** csel w0, \1, wzr, (cs|hi) ++** ret ++*/ ++/* ++** f3: ++** adds (w[0-9]+), w0, #4096 ++** csinc w0, \1, wzr, hi ++** ret ++*/ ++/* ++** f4: ++** adds (w[0-9]+), w0, #4094 ++** csinv w0, \1, wzr, cs ++** ret ++*/ ++ ++#define TYPE uint32_t ++#define TYPE_MIN 0 ++#define TYPE_MAX UINT32_MAX ++#define VALUE (uint32_t)-4095 ++ ++#include "max_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_6.c b/gcc/testsuite/gcc.target/aarch64/max_plus_6.c +new file mode 100644 +index 000000000..ad592c690 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_6.c +@@ -0,0 +1,9 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++ ++#define TYPE uint64_t ++#define TYPE_MIN 0 ++#define TYPE_MAX UINT64_MAX ++#define VALUE (uint64_t)-2 ++ ++#include "max_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/max_plus_7.c b/gcc/testsuite/gcc.target/aarch64/max_plus_7.c +new file mode 100644 +index 000000000..ac9f27dec +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/max_plus_7.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** adds (x[0-9]+), x0, #3 ++** csel x0, \1, xzr, (cs|hi) ++** ret ++*/ ++/* ++** f2: ++** adds (x[0-9]+), x0, #3 ++** csel x0, \1, xzr, (cs|hi) ++** ret ++*/ ++/* ++** f3: ++** adds (x[0-9]+), x0, #4 ++** csinc x0, \1, xzr, hi ++** ret ++*/ ++/* ++** f4: ++** adds (x[0-9]+), x0, #2 ++** csinv x0, \1, xzr, cs ++** ret ++*/ ++ ++#define TYPE uint64_t ++#define TYPE_MIN 0 ++#define TYPE_MAX UINT64_MAX ++#define VALUE (uint64_t)-3 ++ ++#include "max_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_1.c b/gcc/testsuite/gcc.target/aarch64/min_plus_1.c +new file mode 100644 +index 000000000..f4c9106df +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_1.c +@@ -0,0 +1,149 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** subs (w[0-9]+), w0, #?4 ++** csel w0, \1, wzr, l[te] ++** ret ++*/ ++/* ++** f2: ++** subs (w[0-9]+), w0, #?4 ++** csel w0, \1, wzr, l[te] ++** ret ++*/ ++/* ++** f3: ++** subs (w[0-9]+), w0, #?3 ++** csinc w0, \1, wzr, le ++** ret ++*/ ++/* ++** f4: ++** subs (w[0-9]+), w0, #?5 ++** csinv w0, \1, wzr, lt ++** ret ++*/ ++ ++#ifndef TYPE ++#define TYPE int32_t ++#define TYPE_MIN INT32_MIN ++#define TYPE_MAX INT32_MAX ++#define VALUE 4 ++#endif ++ ++#include ++ ++TYPE __attribute__((noipa)) ++f1 (TYPE x) ++{ ++ return (x < VALUE ? x - VALUE : 0); ++} ++ ++TYPE __attribute__((noipa)) ++f2 (TYPE x) ++{ ++ return (x < VALUE ? x : VALUE) - VALUE; ++} ++ ++TYPE __attribute__((noipa)) ++f3 (TYPE x) ++{ ++ return (x < VALUE ? x : VALUE) - (VALUE - 1); ++} ++ ++TYPE __attribute__((noipa)) ++f4 (TYPE x) ++{ ++ return (x < VALUE ? x : VALUE) - (VALUE + 1); ++} ++ ++TYPE __attribute__((noipa)) ++f5 (TYPE x) ++{ ++ return (x < VALUE ? x : VALUE) - (VALUE + 2); ++} ++ ++TYPE __attribute__((noipa)) ++f6 (TYPE x) ++{ ++ return (x < VALUE ? x : VALUE) - (VALUE - 2); ++} ++ ++int ++main (void) ++{ ++ TYPE min_test = TYPE_MIN; ++ if (TYPE_MIN < 0 && VALUE > 0) ++ min_test += VALUE; ++ ++ if (f1 (min_test) != min_test - VALUE) ++ __builtin_abort (); ++ if (f1 (VALUE - 1) != -1) ++ __builtin_abort (); ++ if (f1 (VALUE) != 0) ++ __builtin_abort (); ++ if (f1 (VALUE + 1) != 0) ++ __builtin_abort (); ++ if (f1 (TYPE_MAX) != 0) ++ __builtin_abort (); ++ ++ if (f2 (min_test) != min_test - VALUE) ++ __builtin_abort (); ++ if (f2 (VALUE - 1) != -1) ++ __builtin_abort (); ++ if (f2 (VALUE) != 0) ++ __builtin_abort (); ++ if (f2 (VALUE + 1) != 0) ++ __builtin_abort (); ++ if (f2 (TYPE_MAX) != 0) ++ __builtin_abort (); ++ ++ if (f3 (min_test) != min_test - VALUE + 1) ++ __builtin_abort (); ++ if (f3 (VALUE - 1) != 0) ++ __builtin_abort (); ++ if (f3 (VALUE) != 1) ++ __builtin_abort (); ++ if (f3 (VALUE + 1) != 1) ++ __builtin_abort (); ++ if (f3 (TYPE_MAX) != 1) ++ __builtin_abort (); ++ ++ if (f4 (min_test + 1) != min_test - VALUE) ++ __builtin_abort (); ++ if (f4 (VALUE - 1) != -2) ++ __builtin_abort (); ++ if (f4 (VALUE) != -1) ++ __builtin_abort (); ++ if (f4 (VALUE + 1) != -1) ++ __builtin_abort (); ++ if (f4 (TYPE_MAX) != -1) ++ __builtin_abort (); ++ ++ if (VALUE >= min_test + 2 && f5 (min_test + 2) != min_test - VALUE) ++ __builtin_abort (); ++ if (f5 (VALUE - 1) != -3) ++ __builtin_abort (); ++ if (f5 (VALUE) != -2) ++ __builtin_abort (); ++ if (f5 (VALUE + 1) != -2) ++ __builtin_abort (); ++ if (f5 (TYPE_MAX) != -2) ++ __builtin_abort (); ++ ++ if (f6 (min_test) != min_test - VALUE + 2) ++ __builtin_abort (); ++ if (f6 (VALUE - 1) != 1) ++ __builtin_abort (); ++ if (f6 (VALUE) != 2) ++ __builtin_abort (); ++ if (f6 (VALUE + 1) != 2) ++ __builtin_abort (); ++ if (f6 (TYPE_MAX) != 2) ++ __builtin_abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_2.c b/gcc/testsuite/gcc.target/aarch64/min_plus_2.c +new file mode 100644 +index 000000000..bc0141b72 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_2.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** subs (x[0-9]+), x0, #?4094 ++** csel x0, \1, xzr, l[te] ++** ret ++*/ ++/* ++** f2: ++** subs (x[0-9]+), x0, #?4094 ++** csel x0, \1, xzr, l[te] ++** ret ++*/ ++/* ++** f3: ++** subs (x[0-9]+), x0, #?4093 ++** csinc x0, \1, xzr, le ++** ret ++*/ ++/* ++** f4: ++** subs (x[0-9]+), x0, #?4095 ++** csinv x0, \1, xzr, lt ++** ret ++*/ ++ ++#define TYPE int64_t ++#define TYPE_MIN INT64_MIN ++#define TYPE_MAX INT64_MAX ++#define VALUE 4094 ++ ++#include "min_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_3.c b/gcc/testsuite/gcc.target/aarch64/min_plus_3.c +new file mode 100644 +index 000000000..1808e4b0c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_3.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** subs (w[0-9]+), w0, #?4095 ++** csel w0, \1, wzr, l[te] ++** ret ++*/ ++/* ++** f2: ++** subs (w[0-9]+), w0, #?4095 ++** csel w0, \1, wzr, l[te] ++** ret ++*/ ++/* ++** f3: ++** subs (w[0-9]+), w0, #?4094 ++** csinc w0, \1, wzr, le ++** ret ++*/ ++/* ++** f4: ++** subs (w[0-9]+), w0, #?4096 ++** csinv w0, \1, wzr, lt ++** ret ++*/ ++ ++#define TYPE int32_t ++#define TYPE_MIN INT32_MIN ++#define TYPE_MAX INT32_MAX ++#define VALUE 4095 ++ ++#include "min_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_4.c b/gcc/testsuite/gcc.target/aarch64/min_plus_4.c +new file mode 100644 +index 000000000..6c581fed6 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_4.c +@@ -0,0 +1,30 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** subs (x[0-9]+), x0, #?4096 ++** csel x0, \1, xzr, l[te] ++** ret ++*/ ++/* ++** f2: ++** subs (x[0-9]+), x0, #?4096 ++** csel x0, \1, xzr, l[te] ++** ret ++*/ ++/* ++** f3: ++** subs (x[0-9]+), x0, #?4095 ++** csinc x0, \1, xzr, le ++** ret ++*/ ++/* f4 out of range */ ++ ++#define TYPE int64_t ++#define TYPE_MIN INT64_MIN ++#define TYPE_MAX INT64_MAX ++#define VALUE 4096 ++ ++#include "min_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_5.c b/gcc/testsuite/gcc.target/aarch64/min_plus_5.c +new file mode 100644 +index 000000000..97542d507 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_5.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** subs (w[0-9]+), w0, #?4095 ++** csel w0, \1, wzr, (cc|ls) ++** ret ++*/ ++/* ++** f2: ++** subs (w[0-9]+), w0, #?4095 ++** csel w0, \1, wzr, (cc|ls) ++** ret ++*/ ++/* ++** f3: ++** subs (w[0-9]+), w0, #?4094 ++** csinc w0, \1, wzr, ls ++** ret ++*/ ++/* ++** f4: ++** subs (w[0-9]+), w0, #?4096 ++** csinv w0, \1, wzr, cc ++** ret ++*/ ++ ++#define TYPE uint32_t ++#define TYPE_MIN 0 ++#define TYPE_MAX UINT32_MAX ++#define VALUE 4095 ++ ++#include "min_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_6.c b/gcc/testsuite/gcc.target/aarch64/min_plus_6.c +new file mode 100644 +index 000000000..176533cb2 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_6.c +@@ -0,0 +1,9 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++ ++#define TYPE uint64_t ++#define TYPE_MIN 0 ++#define TYPE_MAX UINT64_MAX ++#define VALUE 1 ++ ++#include "min_plus_1.c" +diff --git a/gcc/testsuite/gcc.target/aarch64/min_plus_7.c b/gcc/testsuite/gcc.target/aarch64/min_plus_7.c +new file mode 100644 +index 000000000..d6a217a51 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/min_plus_7.c +@@ -0,0 +1,35 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 --save-temps" } */ ++/* { dg-final { check-function-bodies "**" "" "" } } */ ++ ++/* ++** f1: ++** subs (x[0-9]+), x0, #?2 ++** csel x0, \1, xzr, (cc|ls) ++** ret ++*/ ++/* ++** f2: ++** subs (x[0-9]+), x0, #?2 ++** csel x0, \1, xzr, (cc|ls) ++** ret ++*/ ++/* ++** f3: ++** subs (x[0-9]+), x0, #?1 ++** csinc x0, \1, xzr, ls ++** ret ++*/ ++/* ++** f4: ++** subs (x[0-9]+), x0, #?3 ++** csinv x0, \1, xzr, cc ++** ret ++*/ ++ ++#define TYPE uint64_t ++#define TYPE_MIN 0 ++#define TYPE_MAX UINT64_MAX ++#define VALUE 2 ++ ++#include "min_plus_1.c" +-- +2.25.1 + diff --git a/0071-PHIOPT-Disable-the-match-A-CST1-0-when-the-CST1-is-n.patch b/0071-PHIOPT-Disable-the-match-A-CST1-0-when-the-CST1-is-n.patch new file mode 100644 index 0000000000000000000000000000000000000000..c308ce0a2dddf48605171858b6e6ed9b560924ed --- /dev/null +++ b/0071-PHIOPT-Disable-the-match-A-CST1-0-when-the-CST1-is-n.patch @@ -0,0 +1,31 @@ +From b57c55b282e7a9a7b2cc0d3843e58fd7998685e6 Mon Sep 17 00:00:00 2001 +From: zhongyunde +Date: Fri, 4 Nov 2022 23:19:44 +0800 +Subject: [PATCH 23/29] [PHIOPT] Disable the match A?CST1:0 when the CST1 is + negitive value + +Fix the regression of gcc.target/aarch64/sve/vcond_3.c + +gcc: + * match.pd (A?CST1:CST2): Disable the simplifcations A? (-CST1):0 +--- + gcc/match.pd | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/gcc/match.pd b/gcc/match.pd +index 79a0228d2..fc1a34dd3 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3347,7 +3347,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (if (integer_onep (@1)) + (convert (convert:boolean_type_node @0))) + /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */ +- (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1)) ++ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (TREE_TYPE (@1)) ++ && integer_pow2p (@1)) + (with { + tree shift = build_int_cst (integer_type_node, tree_log2 (@1)); + } +-- +2.25.1 + diff --git a/0072-Struct-Reorg-Merge-struct_layout-pass-into-struct_re.patch b/0072-Struct-Reorg-Merge-struct_layout-pass-into-struct_re.patch new file mode 100644 index 0000000000000000000000000000000000000000..3816504e90c0389ca88369c876494d4addb1db32 --- /dev/null +++ b/0072-Struct-Reorg-Merge-struct_layout-pass-into-struct_re.patch @@ -0,0 +1,1770 @@ +From 6a7b9e30955e0da5258d8c4ab8de611c8a5653a5 Mon Sep 17 00:00:00 2001 +From: liyancheng <412998149@qq.com> +Date: Thu, 3 Nov 2022 20:11:18 +0800 +Subject: [PATCH 24/29] [Struct Reorg] Merge struct_layout pass into + struct_reorg + +1. Merge struct_layout pass into struct_reorg +2. Merge srmode and into struct_layout_opt_level +3. Adapt to all relevant deja tests +--- + gcc/common.opt | 2 +- + gcc/ipa-struct-reorg/ipa-struct-reorg.c | 316 ++++++++---------- + gcc/opts.c | 15 +- + gcc/passes.def | 1 - + gcc/symbol-summary.h | 4 +- + gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c | 2 +- + .../gcc.dg/struct/dfe_ele_minus_verify.c | 2 +- + .../gcc.dg/struct/dfe_extr_board_init.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c | 2 +- + .../gcc.dg/struct/dfe_extr_mv_udc_core.c | 2 +- + .../gcc.dg/struct/dfe_extr_tcp_usrreq.c | 2 +- + .../gcc.dg/struct/dfe_extr_ui_main.c | 2 +- + .../gcc.dg/struct/dfe_mem_ref_offset.c | 2 +- + .../struct/dfe_mul_layer_ptr_record_bug.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c | 2 +- + .../gcc.dg/struct/dfe_ptr_negate_expr.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c | 2 +- + .../struct/rf_DTE_struct_instance_field.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c | 2 +- + .../gcc.dg/struct/rf_check_ptr_layers_bug.c | 2 +- + .../gcc.dg/struct/rf_create_fields_bug.c | 2 +- + .../gcc.dg/struct/rf_create_new_func_bug.c | 2 +- + .../gcc.dg/struct/rf_ele_minus_verify.c | 2 +- + .../gcc.dg/struct/rf_escape_by_base.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c | 2 +- + .../gcc.dg/struct/rf_mem_ref_offset.c | 2 +- + .../struct/rf_mul_layer_ptr_record_bug.c | 2 +- + .../gcc.dg/struct/rf_pass_conflict.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c | 2 +- + .../gcc.dg/struct/rf_ptr_negate_expr.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c | 2 +- + .../gcc.dg/struct/rf_rescusive_type.c | 2 +- + .../struct/rf_rewrite_assign_more_cmp.c | 2 +- + .../gcc.dg/struct/rf_rewrite_cond_bug.c | 2 +- + .../gcc.dg/struct/rf_rewrite_cond_more_cmp.c | 2 +- + .../gcc.dg/struct/rf_rewrite_phi_bug.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_visible_func.c | 2 +- + .../gcc.dg/struct/rf_void_ptr_param_func.c | 2 +- + gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c | 2 +- + gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 4 +- + gcc/testsuite/gcc.dg/struct/struct_reorg-1.c | 2 +- + gcc/testsuite/gcc.dg/struct/struct_reorg-2.c | 2 +- + gcc/testsuite/gcc.dg/struct/struct_reorg-3.c | 2 +- + gcc/testsuite/gcc.dg/struct/struct_reorg-4.c | 2 +- + .../gcc.dg/struct/w_prof_global_array.c | 2 +- + .../gcc.dg/struct/w_prof_global_var.c | 2 +- + .../gcc.dg/struct/w_prof_local_array.c | 2 +- + .../gcc.dg/struct/w_prof_local_var.c | 2 +- + .../gcc.dg/struct/w_prof_single_str_global.c | 2 +- + gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c | 2 +- + .../gcc.dg/struct/w_ratio_cold_str.c | 2 +- + .../gcc.dg/struct/wo_prof_array_field.c | 2 +- + .../struct/wo_prof_array_through_pointer.c | 2 +- + .../gcc.dg/struct/wo_prof_double_malloc.c | 2 +- + .../gcc.dg/struct/wo_prof_empty_str.c | 2 +- + .../struct/wo_prof_escape_arg_to_local.c | 2 +- + .../struct/wo_prof_escape_substr_array.c | 2 +- + .../gcc.dg/struct/wo_prof_global_array.c | 2 +- + .../gcc.dg/struct/wo_prof_global_var.c | 2 +- + .../gcc.dg/struct/wo_prof_local_array.c | 2 +- + .../gcc.dg/struct/wo_prof_local_var.c | 2 +- + .../gcc.dg/struct/wo_prof_malloc_size_var-1.c | 2 +- + .../gcc.dg/struct/wo_prof_malloc_size_var.c | 2 +- + .../struct/wo_prof_mult_field_peeling.c | 2 +- + .../gcc.dg/struct/wo_prof_single_str_global.c | 2 +- + .../gcc.dg/struct/wo_prof_single_str_local.c | 2 +- + .../gcc.dg/struct/wo_prof_two_strs.c | 2 +- + gcc/timevar.def | 1 - + gcc/tree-pass.h | 1 - + gcc/tree.c | 4 +- + 76 files changed, 222 insertions(+), 260 deletions(-) + +diff --git a/gcc/common.opt b/gcc/common.opt +index be7bfee60..ad147f7a9 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1881,7 +1881,7 @@ Common Ignore + Does nothing. Preserved for backward compatibility. + + fipa-reorder-fields +-Common Report Var(flag_ipa_struct_layout) Init(0) Optimization ++Common Report Var(flag_ipa_reorder_fields) Init(0) Optimization + Perform structure fields reorder optimizations. + + fipa-struct-reorg +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c +index 54c20ca3f..08cb51fee 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c +@@ -232,12 +232,6 @@ is_from_void_ptr_parm (tree ssa_name) + && VOID_POINTER_P (TREE_TYPE (ssa_name))); + } + +-enum srmode +-{ +- NORMAL = 0, +- COMPLETE_STRUCT_RELAYOUT, +- STRUCT_LAYOUT_OPTIMIZE +-}; + + /* Enum the struct layout optimize level, + which should be the same as the option -fstruct-reorg=. */ +@@ -245,16 +239,17 @@ enum srmode + enum struct_layout_opt_level + { + NONE = 0, +- STRUCT_REORG, +- STRUCT_REORDER_FIELDS, +- DEAD_FIELD_ELIMINATION ++ STRUCT_SPLIT = 1 << 0, ++ COMPLETE_STRUCT_RELAYOUT = 1 << 1, ++ STRUCT_REORDER_FIELDS = 1 << 2, ++ DEAD_FIELD_ELIMINATION = 1 << 3 + }; + + static bool is_result_of_mult (tree arg, tree *num, tree struct_size); + bool isptrptr (tree type); + void get_base (tree &base, tree expr); + +-srmode current_mode; ++static unsigned int current_layout_opt_level; + + hash_map replace_type_map; + +@@ -607,7 +602,7 @@ void + srtype::simple_dump (FILE *f) + { + print_generic_expr (f, type); +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + fprintf (f, "(%d)", TYPE_UID (type)); + } +@@ -656,7 +651,7 @@ srfield::create_new_fields (tree newtype[max_split], + tree newfields[max_split], + tree newlast[max_split]) + { +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + create_new_optimized_fields (newtype, newfields, newlast); + return; +@@ -857,7 +852,7 @@ srtype::create_new_type (void) + we are not splitting the struct into two clusters, + then just return false and don't change the type. */ + if (!createnewtype && maxclusters == 0 +- && current_mode != STRUCT_LAYOUT_OPTIMIZE) ++ && current_layout_opt_level < STRUCT_REORDER_FIELDS) + { + newtype[0] = type; + return false; +@@ -885,8 +880,7 @@ srtype::create_new_type (void) + sprintf(id, "%d", i); + if (tname) + { +- name = concat (tname, current_mode == STRUCT_LAYOUT_OPTIMIZE +- ? ".slo." : ".reorg.", id, NULL); ++ name = concat (tname, ".reorg.", id, NULL); + TYPE_NAME (newtype[i]) = build_decl (UNKNOWN_LOCATION, TYPE_DECL, + get_identifier (name), newtype[i]); + free (name); +@@ -896,8 +890,7 @@ srtype::create_new_type (void) + for (unsigned i = 0; i < fields.length (); i++) + { + srfield *f = fields[i]; +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE +- && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ if (current_layout_opt_level & DEAD_FIELD_ELIMINATION + && !(f->field_access & READ_FIELD)) + continue; + f->create_new_fields (newtype, newfields, newlast); +@@ -921,13 +914,12 @@ srtype::create_new_type (void) + + warn_padded = save_warn_padded; + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && replace_type_map.get (this->newtype[0]) == NULL) + replace_type_map.put (this->newtype[0], this->type); + if (dump_file) + { +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE +- && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ if (current_layout_opt_level & DEAD_FIELD_ELIMINATION + && has_dead_field ()) + fprintf (dump_file, "Dead field elimination.\n"); + } +@@ -1046,8 +1038,7 @@ srfunction::create_new_decls (void) + sprintf(id, "%d", j); + if (tname) + { +- name = concat (tname, current_mode == STRUCT_LAYOUT_OPTIMIZE +- ? ".slo." : ".reorg.", id, NULL); ++ name = concat (tname, ".reorg.", id, NULL); + new_name = get_identifier (name); + free (name); + } +@@ -1266,7 +1257,7 @@ public: + { + } + +- unsigned execute (enum srmode mode); ++ unsigned execute (unsigned int opt); + void mark_type_as_escape (tree type, escape_type, gimple *stmt = NULL); + + // fields +@@ -2796,7 +2787,7 @@ escape_type escape_type_volatile_array_or_ptrptr (tree type) + return escape_volatile; + if (isarraytype (type)) + return escape_array; +- if (isptrptr (type) && (current_mode != STRUCT_LAYOUT_OPTIMIZE)) ++ if (isptrptr (type) && (current_layout_opt_level < STRUCT_REORDER_FIELDS)) + return escape_ptr_ptr; + return does_not_escape; + } +@@ -2817,14 +2808,13 @@ ipa_struct_reorg::record_field_type (tree field, srtype *base_srtype) + field_srfield->type = field_srtype; + field_srtype->add_field_site (field_srfield); + } +- if (field_srtype == base_srtype && current_mode != COMPLETE_STRUCT_RELAYOUT +- && current_mode != STRUCT_LAYOUT_OPTIMIZE) ++ if (field_srtype == base_srtype && current_layout_opt_level == STRUCT_SPLIT) + { + base_srtype->mark_escape (escape_rescusive_type, NULL); + } + /* Types of non-pointer field are difficult to track the correctness + of the rewrite when it used by the escaped type. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE (field_type) == RECORD_TYPE) + { + field_srtype->mark_escape (escape_instance_field, NULL); +@@ -2859,7 +2849,7 @@ ipa_struct_reorg::record_struct_field_types (tree base_type, + } + /* Types of non-pointer field are difficult to track the correctness + of the rewrite when it used by the escaped type. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE (field_type) == RECORD_TYPE) + { + base_srtype->mark_escape (escape_instance_field, NULL); +@@ -3043,8 +3033,7 @@ ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) + + /* Separate instance is hard to trace in complete struct + relayout optimization. */ +- if ((current_mode == COMPLETE_STRUCT_RELAYOUT +- || current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT + && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE) + { + e = escape_separate_instance; +@@ -3149,7 +3138,7 @@ ipa_struct_reorg::find_vars (gimple *stmt) + /* Add a safe func mechanism. */ + bool l_find = true; + bool r_find = true; +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + l_find = !(current_function->is_safe_func + && TREE_CODE (lhs) == SSA_NAME +@@ -3195,7 +3184,7 @@ ipa_struct_reorg::find_vars (gimple *stmt) + } + } + } +- else if ((current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ else if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS) + && (gimple_assign_rhs_code (stmt) == LE_EXPR + || gimple_assign_rhs_code (stmt) == LT_EXPR + || gimple_assign_rhs_code (stmt) == GE_EXPR +@@ -3206,7 +3195,7 @@ ipa_struct_reorg::find_vars (gimple *stmt) + find_var (gimple_assign_rhs2 (stmt), stmt); + } + /* find void ssa_name from stmt such as: _2 = _1 - old_arcs_1. */ +- else if ((current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ else if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS) + && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR + && types_compatible_p ( + TYPE_MAIN_VARIANT (TREE_TYPE (gimple_assign_rhs1 (stmt))), +@@ -3418,8 +3407,7 @@ ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt) + default: + break; + } +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE +- && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION) ++ if (current_layout_opt_level & DEAD_FIELD_ELIMINATION) + { + /* Look for loads and stores. */ + walk_stmt_load_store_ops (stmt, this, find_field_p_load, +@@ -3590,11 +3578,12 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + size_def_stmt = SSA_NAME_DEF_STMT (arg); + } + else if (rhs_code == NEGATE_EXPR +- && current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ && current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + return trace_calculate_negate (size_def_stmt, num, struct_size); + } +- else if (rhs_code == NOP_EXPR && current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ else if (rhs_code == NOP_EXPR ++ && current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + return trace_calculate_diff (size_def_stmt, num); + } +@@ -3614,17 +3603,17 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + bool + ipa_struct_reorg::handled_allocation_stmt (gimple *stmt) + { +- if ((current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS) + && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))) + { + return true; + } +- if ((current_mode == COMPLETE_STRUCT_RELAYOUT) ++ if ((current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT) + && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) + return true; +- if ((current_mode == NORMAL) ++ if ((current_layout_opt_level == STRUCT_SPLIT) + && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) +@@ -3750,7 +3739,7 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple + /* x_1 = y.x_nodes; void *x; + Directly mark the structure pointer type assigned + to the void* variable as escape. */ +- else if (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ else if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE (side) == SSA_NAME + && VOID_POINTER_P (TREE_TYPE (side)) + && SSA_NAME_VAR (side) +@@ -4017,7 +4006,7 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + and doesn't mark escape follow.). */ + /* _1 = MEM[(struct arc_t * *)a_1]. + then base a_1: ssa_name - pointer_type - integer_type. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + bool is_int_ptr = POINTER_TYPE_P (TREE_TYPE (base)) + && (TREE_CODE (inner_type (TREE_TYPE (base))) +@@ -4081,7 +4070,7 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + /* Escape the operation of fetching field with pointer offset such as: + *(&(t->right)) = malloc (0); -> MEM[(struct node * *)_1 + 8B] = malloc (0); + */ +- if (current_mode != NORMAL ++ if (current_layout_opt_level > STRUCT_SPLIT + && (TREE_CODE (expr) == MEM_REF) && (offset != 0)) + { + gcc_assert (can_escape); +@@ -4233,7 +4222,7 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt) + /* callee_func (_1, _2); + Check the callee func, instead of current func. */ + if (!(free_or_realloc +- || (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ || (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && safe_functions.contains ( + node->get_edge (stmt)->callee))) + && VOID_POINTER_P (argtypet)) +@@ -4265,14 +4254,7 @@ ipa_struct_reorg::record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt) + realpart, imagpart, address, escape_from_base)) + return; + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) +- { +- if (!opt_for_fn (current_function_decl, flag_ipa_struct_layout)) +- { +- type->mark_escape (escape_non_optimize, stmt); +- } +- } +- else ++ if (current_layout_opt_level > NONE) + { + if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg)) + { +@@ -4379,7 +4361,7 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl, + void + ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) + { +- if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ if (current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT + && handled_allocation_stmt (stmt)) + { + tree arg0 = gimple_call_arg (stmt, 0); +@@ -4490,7 +4472,7 @@ ipa_struct_reorg::check_definition_call (srdecl *decl, vec &worklist) + check_type_and_push (gimple_call_arg (stmt, 0), decl, worklist, stmt); + } + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + if (!handled_allocation_stmt (stmt)) + { +@@ -4544,7 +4526,8 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist) + } + return; + } +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE && SSA_NAME_VAR (ssa_name) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS ++ && SSA_NAME_VAR (ssa_name) + && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (ssa_name)))) + { + type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name)); +@@ -4631,7 +4614,7 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec< + { + /* In Complete Struct Relayout opti, if lhs type is the same + as rhs type, we could return without any harm. */ +- if (current_mode == COMPLETE_STRUCT_RELAYOUT) ++ if (current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT) + { + return; + } +@@ -4645,7 +4628,7 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec< + if (!get_type_field (other, base, indirect, type1, field, + realpart, imagpart, address, escape_from_base)) + { +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + /* release INTEGER_TYPE cast to struct pointer. */ + bool cast_from_int_ptr = current_function->is_safe_func && base +@@ -4703,7 +4686,8 @@ get_base (tree &base, tree expr) + void + ipa_struct_reorg::check_ptr_layers (tree a_expr, tree b_expr, gimple* stmt) + { +- if (current_mode != STRUCT_LAYOUT_OPTIMIZE || current_function->is_safe_func ++ if (current_layout_opt_level < STRUCT_REORDER_FIELDS ++ || current_function->is_safe_func + || !(POINTER_TYPE_P (TREE_TYPE (a_expr))) + || !(POINTER_TYPE_P (TREE_TYPE (b_expr))) + || !handled_type (TREE_TYPE (a_expr)) +@@ -4779,12 +4763,9 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, vec &worklist) + tree rhs2 = gimple_cond_rhs (stmt); + tree orhs = rhs1; + enum tree_code code = gimple_cond_code (stmt); +- if ((current_mode == NORMAL && (code != EQ_EXPR && code != NE_EXPR)) +- || (current_mode == COMPLETE_STRUCT_RELAYOUT +- && (code != EQ_EXPR && code != NE_EXPR +- && code != LT_EXPR && code != LE_EXPR +- && code != GT_EXPR && code != GE_EXPR)) +- || (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ if ((current_layout_opt_level == STRUCT_SPLIT ++ && (code != EQ_EXPR && code != NE_EXPR)) ++ || (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT + && (code != EQ_EXPR && code != NE_EXPR + && code != LT_EXPR && code != LE_EXPR + && code != GT_EXPR && code != GE_EXPR))) +@@ -4818,15 +4799,12 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, vec &worklist) + tree rhs2 = gimple_assign_rhs2 (stmt); + tree orhs = rhs1; + enum tree_code code = gimple_assign_rhs_code (stmt); +- if ((current_mode == NORMAL && (code != EQ_EXPR && code != NE_EXPR)) +- || (current_mode == COMPLETE_STRUCT_RELAYOUT ++ if ((current_layout_opt_level == STRUCT_SPLIT ++ && (code != EQ_EXPR && code != NE_EXPR)) ++ || (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT + && (code != EQ_EXPR && code != NE_EXPR + && code != LT_EXPR && code != LE_EXPR +- && code != GT_EXPR && code != GE_EXPR)) +- || (current_mode == STRUCT_LAYOUT_OPTIMIZE +- && (code != EQ_EXPR && code != NE_EXPR +- && code != LT_EXPR && code != LE_EXPR +- && code != GT_EXPR && code != GE_EXPR))) ++ && code != GT_EXPR && code != GE_EXPR))) + { + mark_expr_escape (rhs1, escape_non_eq, stmt); + mark_expr_escape (rhs2, escape_non_eq, stmt); +@@ -4945,11 +4923,11 @@ ipa_struct_reorg::record_function (cgraph_node *node) + escapes = escape_marked_as_used; + else if (!node->local) + { +- if (current_mode != STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level < STRUCT_REORDER_FIELDS) + { + escapes = escape_visible_function; + } +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE && node->externally_visible) ++ else if (node->externally_visible) + { + escapes = escape_visible_function; + } +@@ -4959,14 +4937,7 @@ ipa_struct_reorg::record_function (cgraph_node *node) + else if (!tree_versionable_function_p (node->decl)) + escapes = escape_noclonable_function; + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) +- { +- if (!opt_for_fn (node->decl, flag_ipa_struct_layout)) +- { +- escapes = escape_non_optimize; +- } +- } +- else if (current_mode == NORMAL || current_mode == COMPLETE_STRUCT_RELAYOUT) ++ if (current_layout_opt_level > NONE) + { + if (!opt_for_fn (node->decl, flag_ipa_struct_reorg)) + { +@@ -4978,10 +4949,10 @@ ipa_struct_reorg::record_function (cgraph_node *node) + gimple_stmt_iterator si; + + /* Add a safe func mechanism. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + current_function->is_safe_func = safe_functions.contains (node); +- if (dump_file) ++ if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nfunction %s/%u: is_safe_func = %d\n", + node->name (), node->order, +@@ -5194,7 +5165,7 @@ ipa_struct_reorg::record_accesses (void) + } + + /* Add a safe func mechanism. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + record_safe_func_with_void_ptr_parm (); + } +@@ -5392,8 +5363,7 @@ ipa_struct_reorg::propagate_escape_via_empty_with_no_original (void) + void + ipa_struct_reorg::prune_escaped_types (void) + { +- if (current_mode != COMPLETE_STRUCT_RELAYOUT +- && current_mode != STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level == STRUCT_SPLIT) + { + /* Detect recusive types and mark them as escaping. */ + detect_cycles (); +@@ -5401,7 +5371,7 @@ ipa_struct_reorg::prune_escaped_types (void) + mark them as escaping. */ + propagate_escape (); + } +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + propagate_escape_via_original (); + propagate_escape_via_empty_with_no_original (); +@@ -5461,7 +5431,7 @@ ipa_struct_reorg::prune_escaped_types (void) + if (function->args.is_empty () + && function->decls.is_empty () + && function->globals.is_empty () +- && current_mode != STRUCT_LAYOUT_OPTIMIZE) ++ && current_layout_opt_level < STRUCT_REORDER_FIELDS) + { + delete function; + functions.ordered_remove (i); +@@ -5489,7 +5459,7 @@ ipa_struct_reorg::prune_escaped_types (void) + /* The escape type is not deleted in STRUCT_LAYOUT_OPTIMIZE, + Then the type that contains the escaped type fields + can find complete information. */ +- if (current_mode != STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level < STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < types.length ();) + { +@@ -5539,7 +5509,7 @@ ipa_struct_reorg::create_new_types (void) + for (unsigned i = 0; i < types.length (); i++) + newtypes += types[i]->create_new_type (); + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < types.length (); i++) + { +@@ -5561,14 +5531,31 @@ ipa_struct_reorg::create_new_types (void) + } + } + +- if (dump_file) ++ if (current_layout_opt_level == STRUCT_SPLIT) + { +- if (newtypes) +- fprintf (dump_file, "\nNumber of structures to transform is %d\n", newtypes); +- else +- fprintf (dump_file, "\nNo structures to transform.\n"); ++ if (dump_file) ++ { ++ if (newtypes) ++ fprintf (dump_file, "\nNumber of structures to transform in" ++ " struct split is %d\n", newtypes); ++ else ++ fprintf (dump_file, "\nNo structures to transform in" ++ " struct split.\n"); ++ } ++ } ++ else ++ { ++ if (dump_file) ++ { ++ if (newtypes) ++ fprintf (dump_file, "\nNumber of structures to transform" ++ " is %d\n", newtypes); ++ else ++ fprintf (dump_file, "\nNo structures to transform.\n"); ++ } + } + ++ + return newtypes != 0; + } + +@@ -5663,8 +5650,7 @@ ipa_struct_reorg::create_new_args (cgraph_node *new_node) + char *name = NULL; + if (tname) + { +- name = concat (tname, current_mode == STRUCT_LAYOUT_OPTIMIZE +- ? ".slo.0" : ".reorg.0", NULL); ++ name = concat (tname, ".reorg.0", NULL); + new_name = get_identifier (name); + free (name); + } +@@ -5751,9 +5737,7 @@ ipa_struct_reorg::create_new_functions (void) + } + statistics_counter_event (NULL, "Create new function", 1); + new_node = node->create_version_clone_with_body ( +- vNULL, NULL, NULL, NULL, NULL, +- current_mode == STRUCT_LAYOUT_OPTIMIZE +- ? "slo" : "struct_reorg"); ++ vNULL, NULL, NULL, NULL, NULL, "struct_reorg"); + new_node->can_change_signature = node->can_change_signature; + new_node->make_local (); + f->newnode = new_node; +@@ -5871,7 +5855,7 @@ ipa_struct_reorg::rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_ + newbase1 = build_fold_addr_expr (newbase1); + if (indirect) + { +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + /* Supports the MEM_REF offset. + _1 = MEM[(struct arc *)ap_1 + 72B].flow; +@@ -5927,8 +5911,7 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + { + bool remove = false; + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE +- && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ if (current_layout_opt_level & DEAD_FIELD_ELIMINATION + && remove_dead_field_stmt (gimple_assign_lhs (stmt))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) +@@ -5964,10 +5947,10 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + return remove; + } + +- if ((current_mode != STRUCT_LAYOUT_OPTIMIZE ++ if ((current_layout_opt_level < STRUCT_REORDER_FIELDS + && (gimple_assign_rhs_code (stmt) == EQ_EXPR + || gimple_assign_rhs_code (stmt) == NE_EXPR)) +- || (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ || (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) + == tcc_comparison))) + { +@@ -5977,7 +5960,7 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + tree newrhs2[max_split]; + tree_code rhs_code = gimple_assign_rhs_code (stmt); + tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && rhs_code != EQ_EXPR && rhs_code != NE_EXPR) + { + code = rhs_code; +@@ -6024,8 +6007,9 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + _6 = _4 + _5; + _5 = (long unsigned int) _3; + _3 = _1 - old_2. */ +- if (current_mode != STRUCT_LAYOUT_OPTIMIZE +- || (current_mode == STRUCT_LAYOUT_OPTIMIZE && (num != NULL))) ++ if (current_layout_opt_level < STRUCT_REORDER_FIELDS ++ || (current_layout_opt_level >= STRUCT_REORDER_FIELDS ++ && (num != NULL))) + { + num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num); + } +@@ -6053,7 +6037,7 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + } + + /* Support POINTER_DIFF_EXPR rewriting. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR) + { + tree rhs1 = gimple_assign_rhs1 (stmt); +@@ -6240,7 +6224,8 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + srfunction *f = find_function (node); + + /* Add a safe func mechanism. */ +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE && f && f->is_safe_func) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS ++ && f && f->is_safe_func) + { + tree expr = gimple_call_arg (stmt, 0); + tree newexpr[max_split]; +@@ -6367,9 +6352,9 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) + tree_code rhs_code = gimple_cond_code (stmt); + + /* Handle only equals or not equals conditionals. */ +- if ((current_mode != STRUCT_LAYOUT_OPTIMIZE ++ if ((current_layout_opt_level < STRUCT_REORDER_FIELDS + && (rhs_code != EQ_EXPR && rhs_code != NE_EXPR)) +- || (current_mode == STRUCT_LAYOUT_OPTIMIZE ++ || (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE_CLASS (rhs_code) != tcc_comparison)) + return false; + tree lhs = gimple_cond_lhs (stmt); +@@ -6429,7 +6414,7 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) + bool + ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *) + { +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + /* Delete debug gimple now. */ + return true; +@@ -6593,7 +6578,7 @@ ipa_struct_reorg::rewrite_functions (void) + then don't rewrite any accesses. */ + if (!create_new_types ()) + { +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < functions.length (); i++) + { +@@ -6612,7 +6597,7 @@ ipa_struct_reorg::rewrite_functions (void) + return 0; + } + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE && dump_file) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS && dump_file) + { + fprintf (dump_file, "=========== all created newtypes: ===========\n\n"); + dump_newtypes (dump_file); +@@ -6622,13 +6607,13 @@ ipa_struct_reorg::rewrite_functions (void) + { + retval = TODO_remove_functions; + create_new_functions (); +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + prune_escaped_types (); + } + } + +- if (current_mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < functions.length (); i++) + { +@@ -6794,13 +6779,13 @@ ipa_struct_reorg::execute_struct_relayout (void) + } + + unsigned int +-ipa_struct_reorg::execute (enum srmode mode) ++ipa_struct_reorg::execute (unsigned int opt) + { + unsigned int ret = 0; + +- if (mode == NORMAL || mode == STRUCT_LAYOUT_OPTIMIZE) ++ if (opt != COMPLETE_STRUCT_RELAYOUT) + { +- current_mode = mode; ++ current_layout_opt_level = opt; + /* If there is a top-level inline-asm, + the pass immediately returns. */ + if (symtab->first_asm_symbol ()) +@@ -6809,20 +6794,20 @@ ipa_struct_reorg::execute (enum srmode mode) + } + record_accesses (); + prune_escaped_types (); +- if (current_mode == NORMAL) ++ if (opt == STRUCT_SPLIT) + { + analyze_types (); + } + + ret = rewrite_functions (); + } +- else if (mode == COMPLETE_STRUCT_RELAYOUT) ++ else // do COMPLETE_STRUCT_RELAYOUT + { + if (dump_file) + { + fprintf (dump_file, "\n\nTry Complete Struct Relayout:\n"); + } +- current_mode = COMPLETE_STRUCT_RELAYOUT; ++ current_layout_opt_level = COMPLETE_STRUCT_RELAYOUT; + if (symtab->first_asm_symbol ()) + { + return 0; +@@ -6861,67 +6846,48 @@ public: + virtual unsigned int execute (function *) + { + unsigned int ret = 0; +- ret = ipa_struct_reorg ().execute (NORMAL); +- if (!ret) ++ unsigned int ret_reorg = 0; ++ unsigned int level = 0; ++ switch (struct_layout_optimize_level) + { +- ret = ipa_struct_reorg ().execute (COMPLETE_STRUCT_RELAYOUT); ++ case 3: level |= DEAD_FIELD_ELIMINATION; ++ // FALLTHRU ++ case 2: level |= STRUCT_REORDER_FIELDS; ++ // FALLTHRU ++ case 1: ++ level |= COMPLETE_STRUCT_RELAYOUT; ++ level |= STRUCT_SPLIT; ++ break; ++ case 0: break; ++ default: gcc_unreachable (); + } +- return ret; +- } + +-}; // class pass_ipa_struct_reorg +- +-bool +-pass_ipa_struct_reorg::gate (function *) +-{ +- return (optimize >= 3 +- && flag_ipa_struct_reorg +- /* Don't bother doing anything if the program has errors. */ +- && !seen_error () +- && flag_lto_partition == LTO_PARTITION_ONE +- /* Only enable struct optimizations in C since other +- languages' grammar forbid. */ +- && lang_c_p () +- /* Only enable struct optimizations in lto or whole_program. */ +- && (in_lto_p || flag_whole_program)); +-} ++ /* Preserved for backward compatibility, reorder fields needs run before ++ struct split and complete struct relayout. */ ++ if (flag_ipa_reorder_fields && level < STRUCT_REORDER_FIELDS) ++ ret = ipa_struct_reorg ().execute (STRUCT_REORDER_FIELDS); + +-const pass_data pass_data_ipa_struct_layout = +-{ +- SIMPLE_IPA_PASS, // type +- "struct_layout", // name +- OPTGROUP_NONE, // optinfo_flags +- TV_IPA_STRUCT_LAYOUT, // tv_id +- 0, // properties_required +- 0, // properties_provided +- 0, // properties_destroyed +- 0, // todo_flags_start +- 0, // todo_flags_finish +-}; ++ if (level >= STRUCT_REORDER_FIELDS) ++ ret = ipa_struct_reorg ().execute (level); + +-class pass_ipa_struct_layout : public simple_ipa_opt_pass +-{ +-public: +- pass_ipa_struct_layout (gcc::context *ctxt) +- : simple_ipa_opt_pass (pass_data_ipa_struct_layout, ctxt) +- {} ++ if (level >= COMPLETE_STRUCT_RELAYOUT) ++ { ++ /* Preserved for backward compatibility. */ ++ ret_reorg = ipa_struct_reorg ().execute (STRUCT_SPLIT); ++ if (!ret_reorg) ++ ret_reorg = ipa_struct_reorg ().execute (COMPLETE_STRUCT_RELAYOUT); ++ } + +- /* opt_pass methods: */ +- virtual bool gate (function *); +- virtual unsigned int execute (function *) +- { +- unsigned int ret = 0; +- ret = ipa_struct_reorg ().execute (STRUCT_LAYOUT_OPTIMIZE); +- return ret; ++ return ret | ret_reorg; + } + +-}; // class pass_ipa_struct_layout ++}; // class pass_ipa_struct_reorg + + bool +-pass_ipa_struct_layout::gate (function *) ++pass_ipa_struct_reorg::gate (function *) + { + return (optimize >= 3 +- && flag_ipa_struct_layout ++ && flag_ipa_struct_reorg + /* Don't bother doing anything if the program has errors. */ + && !seen_error () + && flag_lto_partition == LTO_PARTITION_ONE +@@ -6939,9 +6905,3 @@ make_pass_ipa_struct_reorg (gcc::context *ctxt) + { + return new pass_ipa_struct_reorg (ctxt); + } +- +-simple_ipa_opt_pass * +-make_pass_ipa_struct_layout (gcc::context *ctxt) +-{ +- return new pass_ipa_struct_layout (ctxt); +-} +diff --git a/gcc/opts.c b/gcc/opts.c +index c3877c24e..f12b13599 100644 +--- a/gcc/opts.c ++++ b/gcc/opts.c +@@ -2696,15 +2696,20 @@ common_handle_option (struct gcc_options *opts, + break; + + case OPT_fipa_struct_reorg_: +- opts->x_struct_layout_optimize_level = value; +- if (value > 1) +- { +- SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_struct_layout, value); +- } + /* No break here - do -fipa-struct-reorg processing. */ + /* FALLTHRU. */ + case OPT_fipa_struct_reorg: + opts->x_flag_ipa_struct_reorg = value; ++ if (value && !opts->x_struct_layout_optimize_level) ++ { ++ /* Using the -fipa-struct-reorg option is equivalent to using ++ -fipa-struct-reorg=1. */ ++ opts->x_struct_layout_optimize_level = 1; ++ } ++ break; ++ ++ case OPT_fipa_reorder_fields: ++ SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_struct_reorg, value); + break; + + case OPT_fprofile_generate_: +diff --git a/gcc/passes.def b/gcc/passes.def +index 94554cc1d..f3b6048d8 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -175,7 +175,6 @@ along with GCC; see the file COPYING3. If not see + INSERT_PASSES_AFTER (all_late_ipa_passes) + NEXT_PASS (pass_materialize_all_clones); + NEXT_PASS (pass_ipa_pta); +- NEXT_PASS (pass_ipa_struct_layout); + /* FIXME: this should a normal IP pass */ + NEXT_PASS (pass_ipa_struct_reorg); + NEXT_PASS (pass_omp_simd_clone); +diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h +index f62222a96..1e7341b24 100644 +--- a/gcc/symbol-summary.h ++++ b/gcc/symbol-summary.h +@@ -61,7 +61,7 @@ protected: + { + /* In structure optimizatons, we call new to ensure that + the allocated memory is initialized to 0. */ +- if (flag_ipa_struct_layout || flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg) + return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T () + : new T (); + /* Call gcc_internal_because we do not want to call finalizer for +@@ -77,7 +77,7 @@ protected: + ggc_delete (item); + else + { +- if (flag_ipa_struct_layout || flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg) + delete item; + else + m_allocator.remove (item); +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c +index 4261d2352..afa181e07 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c +@@ -83,4 +83,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c +index 42d38c63a..c87db2aba 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c +@@ -57,4 +57,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c +index 4e52564b6..d217f7bd8 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c +@@ -74,4 +74,4 @@ LBF_DFU_If_Needed (void) + } + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c +index 894e9f460..e56bf467b 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c +@@ -74,4 +74,4 @@ claw_snd_conn_req (struct net_device *dev, __u8 link) + return rc; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c +index 13a226ee8..c86c4bb3c 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c +@@ -53,4 +53,4 @@ dtrace_bcmp (const void *s1, const void *s2, size_t len) + return (0); + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c +index 1fff2cb9d..8484d29d2 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c +@@ -159,4 +159,4 @@ gc_gray_mark (mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) + return children; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c +index 0f577667c..300b2dac4 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c +@@ -123,4 +123,4 @@ hpsa_cmd_dev_match (struct ctlr_info *h, struct CommandList *c, + return match; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c +index 9801f87f1..9397b98ea 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c +@@ -79,4 +79,4 @@ ep0_reset (struct mv_udc *udc) + } + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c +index 5570c762e..0ae75e13e 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c +@@ -55,4 +55,4 @@ tcp_usr_listen (struct socket *so, struct proc *p) + COMMON_END (PRU_LISTEN); + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c +index 50ab9cc24..512fb37a7 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c +@@ -58,4 +58,4 @@ UI_LoadMods () + } + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c +index 53583fe82..0dea5517c 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c +@@ -55,4 +55,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c +index fd675ec2e..00bd911c1 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c +@@ -27,4 +27,4 @@ main() { + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c +index 600e7908b..0cfa6554e 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c +@@ -68,4 +68,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 3 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 3 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c +index f411364a7..4a7069244 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c +@@ -52,4 +52,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c +index a4e723763..b91efe10f 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c +@@ -52,4 +52,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c +index 882a695b0..1b6a462e2 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c +@@ -72,4 +72,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c +index 20ecee545..346c71264 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c +@@ -91,4 +91,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c +index ad879fc11..8eb16c8d6 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c +@@ -21,4 +21,4 @@ main() + { + g(); + } +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c +index f0c9d8f39..7d7641f01 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c +@@ -79,4 +79,4 @@ main() + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c +index fa5e6c2d0..63fb3f828 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c +@@ -53,4 +53,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c +index 2966869e7..8c431e15f 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c +@@ -57,4 +57,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c +index b74b9e5e9..efc95a4cd 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c +@@ -80,4 +80,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c +index cf85c6109..75fc10575 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c +@@ -69,4 +69,4 @@ main() + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c +index 61fd9f755..9fb06877b 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c +@@ -55,4 +55,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c +index 2c115da02..e8eb0eaa0 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c +@@ -27,4 +27,4 @@ main() { + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c +index c7646d8b7..bd535afd0 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c +@@ -106,4 +106,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c +index 01c000375..11393a197 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c +@@ -84,4 +84,4 @@ main () + return cnt; + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c +index f962163fe..d601fae64 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c +@@ -68,4 +68,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c +index 6558b1797..4d5f25aa1 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c +@@ -52,4 +52,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c +index 6d528ed5b..b3891fde9 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c +@@ -31,4 +31,4 @@ main () + printf (" Tree.\n"); + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c +index e95cf2e5d..4df79e4f0 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c +@@ -52,4 +52,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c +index cb4054522..49d2106d1 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c +@@ -55,4 +55,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c +index 38bddbae5..f71c7894f 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c +@@ -54,4 +54,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c +index 86034f042..721cee2c6 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c +@@ -62,4 +62,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c +index aae7c4bc9..3871d3d99 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c +@@ -69,4 +69,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c +index 8672e7552..5ad206433 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c +@@ -55,4 +55,4 @@ main() + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c +index 2d67434a0..a002f9889 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c +@@ -78,4 +78,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c +index a8cf2b63c..f77a062bd 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c +@@ -89,4 +89,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c +index b6cba3c34..cba6225a5 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c +@@ -51,4 +51,4 @@ main() + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_layout" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c +index fb135ef0b..e3d219fe1 100644 +--- a/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c ++++ b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c +@@ -22,4 +22,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "No structures to transform in struct split." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +index ac5585813..2eebef768 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp ++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +@@ -64,8 +64,6 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout + "" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program" + + # -fipa-struct-reorg=2 +-gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf*.c]] \ +- "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program" + gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] \ + "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program" + gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] \ +@@ -80,6 +78,8 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/csr_*.c]] \ + "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program" + gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout.c]] \ + "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program" ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf*.c]] \ ++ "" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program" + + # -fipa-struct-reorg=3 + gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dfe*.c]] \ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c +index 23444fe8b..a73ff8e7e 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c +@@ -27,4 +27,4 @@ int main() + return g (); + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "No structures to transform in struct split." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c +index 44babd35b..d7ab7d21c 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c +@@ -26,4 +26,4 @@ int main() + assert (f(1, 2) == 3); + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 2" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c +index 2d1f95c99..9e5b192eb 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c +@@ -25,4 +25,4 @@ int main() + f (NULL, NULL, 1); + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c +index e5a8a6c84..27b4b56e0 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c +@@ -56,4 +56,4 @@ main (void) + return 0; + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "No structures to transform in struct split." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c +index 733413a94..9e0f84da8 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c +@@ -26,4 +26,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c +index 0ef686e74..c868347e3 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c +@@ -39,4 +39,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c +index 23a53be53..185ff3125 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c +@@ -34,4 +34,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c +index 0cbb172f2..6294fb2a2 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c +@@ -37,4 +37,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c +index f900b1349..3ca4e0e71 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c +@@ -28,4 +28,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c +index 13b4cdc70..ac99b9e62 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c +@@ -61,4 +61,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 2" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c +index dcc545964..afa145a57 100644 +--- a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c ++++ b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c +@@ -40,4 +40,4 @@ main () + + /*--------------------------------------------------------------------------*/ + /* Arrays are not handled. */ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c +index 6d6375fc1..7fa6ae275 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c +@@ -23,4 +23,4 @@ int main() + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c +index 9d3213408..b3bde5836 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c +@@ -35,4 +35,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c +index d79992a53..f2bb82b94 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c +@@ -26,4 +26,4 @@ int main() + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c +index ee9b0d765..0685cf8fe 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c +@@ -41,4 +41,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "No structures to transform" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "No structures to transform in struct split" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c +index 9ebb2b4cc..1a0a5a9c6 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c +@@ -40,5 +40,5 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ + +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c +index 60d2466e1..9533538c4 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c +@@ -30,4 +30,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c +index 1c5a3aa15..100a93868 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c +@@ -29,4 +29,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c +index a0d1467fe..669d0b886 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c +@@ -42,4 +42,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c +index 6c24e1c8b..ce6c1544c 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c +@@ -37,4 +37,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c +index 8f2f8143f..eca2ebf32 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c +@@ -40,4 +40,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c +index 98bf01a6d..6f8f94d7d 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c +@@ -44,4 +44,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c +index 66b0f967c..2ca729d1f 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c +@@ -44,4 +44,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c +index d28bcfb02..6000b2919 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c +@@ -39,4 +39,4 @@ main () + + /*--------------------------------------------------------------------------*/ + /* Two more fields structure is not splitted. */ +-/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "No structures to transform in struct split." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c +index 37a6a43a8..f4a103409 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c +@@ -31,4 +31,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c +index ca9a8efcf..0c97173eb 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c +@@ -31,4 +31,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "No structures to transform" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "No structures to transform in struct split" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c +index cba92e995..bc8eacc77 100644 +--- a/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c +@@ -64,4 +64,4 @@ main () + } + + /*--------------------------------------------------------------------------*/ +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in struct split is 2" "struct_reorg" } } */ +diff --git a/gcc/timevar.def b/gcc/timevar.def +index e9866ebf0..2814b14f2 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -80,7 +80,6 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp") + DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") + DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") + DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") +-DEFTIMEVAR (TV_IPA_STRUCT_LAYOUT , "ipa struct layout optimization") + DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") + DEFTIMEVAR (TV_IPA_EXTEND_AUTO_PROFILE, "ipa extend auto profile") + DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 09dd9b289..2b4864b89 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -510,7 +510,6 @@ extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); +-extern simple_ipa_opt_pass *make_pass_ipa_struct_layout (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_extend_auto_profile (gcc::context + *ctxt); +diff --git a/gcc/tree.c b/gcc/tree.c +index 84a440b35..8bbd54e0d 100644 +--- a/gcc/tree.c ++++ b/gcc/tree.c +@@ -5222,7 +5222,7 @@ fld_simplified_type_name (tree type) + /* Simplify type will cause that struct A and struct A within + struct B are different type pointers, so skip it in structure + optimizations. */ +- if ((flag_ipa_struct_layout || flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg + && lang_c_p () + && flag_lto_partition == LTO_PARTITION_ONE + && (in_lto_p || flag_whole_program)) +@@ -5469,7 +5469,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld) + /* Simplify type will cause that struct A and struct A within + struct B are different type pointers, so skip it in structure + optimizations. */ +- if ((flag_ipa_struct_layout || flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg + && lang_c_p () + && flag_lto_partition == LTO_PARTITION_ONE + && (in_lto_p || flag_whole_program)) +-- +2.25.1 + diff --git a/0073-PHIOPT-Add-A-B-op-CST-B-match-and-simplify-optimizat.patch b/0073-PHIOPT-Add-A-B-op-CST-B-match-and-simplify-optimizat.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a63b9cb3efeba9643d69533c0f5a74a63e25ff5 --- /dev/null +++ b/0073-PHIOPT-Add-A-B-op-CST-B-match-and-simplify-optimizat.patch @@ -0,0 +1,89 @@ +From 9af03694082c462bee86c167c78717089a93a188 Mon Sep 17 00:00:00 2001 +From: zhongyunde +Date: Sat, 5 Nov 2022 13:22:33 +0800 +Subject: [PATCH 25/29] [PHIOPT] Add A ? B op CST : B match and simplify + optimizations + + Refer to commit b6bdd7a4, use pattern match to simple + A ? B op CST : B (where CST is power of 2) simplifications. + Fixes the 1st issue of https://gitee.com/openeuler/gcc/issues/I5TSG0?from=project-issue. + + gcc/ + * match.pd (A ? B op CST : B): Add simplifcations for A ? B op POW2 : B + + gcc/testsuite/ + * gcc.dg/pr107190.c: New test. +--- + gcc/match.pd | 21 +++++++++++++++++++++ + gcc/testsuite/gcc.dg/pr107190.c | 27 +++++++++++++++++++++++++++ + 2 files changed, 48 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/pr107190.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index fc1a34dd3..5c5b5f89e 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3383,6 +3383,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++(if (canonicalize_math_p ()) ++/* These patterns are mostly used by PHIOPT to move some operations outside of ++ the if statements. They should be done late because it gives jump threading ++ and few other passes to reduce what is going on. */ ++/* a ? x op C : x -> x op (a << log2(C)) when C is power of 2. */ ++ (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate) ++ (simplify ++ (cond @0 (op:s @1 integer_pow2p@2) @1) ++ /* powerof2cst */ ++ (if (INTEGRAL_TYPE_P (type)) ++ (with { ++ tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); ++ } ++ (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; }))) ++ ) ++ ) ++ ) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c +new file mode 100644 +index 000000000..235b2761a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr107190.c +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */ ++ ++# define BN_BITS4 32 ++# define BN_MASK2 (0xffffffffffffffffL) ++# define BN_MASK2l (0xffffffffL) ++# define BN_MASK2h (0xffffffff00000000L) ++# define BN_MASK2h1 (0xffffffff80000000L) ++# define LBITS(a) ((a)&BN_MASK2l) ++# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) ++# define L2HBITS(a) (((a)< +Date: Wed, 9 Nov 2022 17:04:13 +0800 +Subject: [PATCH 26/29] [FORWPROP] Fold series of instructions into mul + + Merge the low part of series instructions into mul + + gcc/ + * match.pd: Add simplifcations for low part of mul + * common.opt: Add new option fmerge-mull enable with -O2 + * opts.c: default_options_table + + gcc/testsuite/ + * g++.dg/tree-ssa/mull64.C: New test. +--- + gcc/common.opt | 4 +++ + gcc/match.pd | 27 ++++++++++++++++++++ + gcc/opts.c | 1 + + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++ + 4 files changed, 66 insertions(+) + create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C + +diff --git a/gcc/common.opt b/gcc/common.opt +index ad147f7a9..6a7f66624 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2069,6 +2069,10 @@ fmerge-debug-strings + Common Report Var(flag_merge_debug_strings) Init(1) + Attempt to merge identical debug strings across compilation units. + ++fmerge-mull ++Common Report Var(flag_merge_mull) Init(0) Optimization ++Attempt to merge series instructions into mul. ++ + fmessage-length= + Common RejectNegative Joined UInteger + -fmessage-length= Limit diagnostics to characters per line. 0 suppresses line-wrapping. +diff --git a/gcc/match.pd b/gcc/match.pd +index 5c5b5f89e..f6c5befd7 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3404,6 +3404,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP1 to fold some operations into more ++ simple IR. The following scenario should be matched: ++ In0Lo = In0(D) & 4294967295; ++ In0Hi = In0(D) >> 32; ++ In1Lo = In1(D) & 4294967295; ++ In1Hi = In1(D) >> 32; ++ Addc = In0Lo * In1Hi + In0Hi * In1Lo; ++ addc32 = Addc << 32; ++ ResLo = In0Lo * In1Lo + addc32 */ ++(simplify ++ (plus:c (mult @4 @5) ++ (lshift ++ (plus:c ++ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3)) ++ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2))) ++ INTEGER_CST@3 ++ ) ++ ) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) ++ && TYPE_PRECISION (type) == 64) ++ (mult (convert:type @0) (convert:type @1)) ++ ) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +diff --git a/gcc/opts.c b/gcc/opts.c +index f12b13599..751965e46 100644 +--- a/gcc/opts.c ++++ b/gcc/opts.c +@@ -511,6 +511,7 @@ static const struct default_options default_options_table[] = + { OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP }, + { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, ++ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 }, + + /* -O2 and above optimizations, but not -Os or -Og. */ + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 }, +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +new file mode 100644 +index 000000000..2a3b74604 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */ ++ ++# define BN_BITS4 32 ++# define BN_MASK2 (0xffffffffffffffffL) ++# define BN_MASK2l (0xffffffffL) ++# define BN_MASK2h (0xffffffff00000000L) ++# define BN_MASK2h1 (0xffffffff80000000L) ++# define LBITS(a) ((a)&BN_MASK2l) ++# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) ++# define L2HBITS(a) (((a)< +Date: Fri, 11 Nov 2022 11:30:37 +0800 +Subject: [PATCH 27/29] [FORWPROP] Fold series of instructions into umulh + + Merge the high part of series instructions into umulh + + gcc/ + * match.pd: Add simplifcations for high part of umulh + + gcc/testsuite/ + * g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4 +--- + gcc/match.pd | 56 ++++++++++++++++++++++++++ + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 5 ++- + 2 files changed, 59 insertions(+), 2 deletions(-) + +diff --git a/gcc/match.pd b/gcc/match.pd +index f6c5befd7..433682afb 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3404,6 +3404,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP4 to move some operations outside of ++ the if statements. They should be done late because it gives jump threading ++ and few other passes to reduce what is going on. */ ++/* Mul64 is defined as a multiplication algorithm which compute two 64-bit ++ integers to one 128-bit integer. Try to match the high part of mul pattern ++ after the low part of mul pattern is simplified. The following scenario ++ should be matched: ++ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { ++ In0Lo = In0(D) & 4294967295; -- bit_and@4 SSA_NAME@0 @2 ++ In0Hi = In0(D) >> 32; -- rshift@5 SSA_NAME@0 @3 ++ In1Lo = In1(D) & 4294967295; -- bit_and@6 SSA_NAME@1 INTEGER_CST@2 ++ In1Hi = In1(D) >> 32; -- rshift@7 SSA_NAME@1 INTEGER_CST@3 ++ Mull_01 = In0Hi * In1Lo; -- mult@8 @5 @6 ++ Addc = In0Lo * In1Hi + Mull_01; -- plus@9 (mult (@4 @7) @8 ++ AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7)) ++ addc32 = Addc << 32; -- lshift@10 @9 @3 ++ ResLo = In0(D) * In1(D); -- mult @0 @1 ++ ResHi = ((long unsigned int) (addc32 > ResLo)) + ++ (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH; ++ } */ ++(simplify ++ (plus:c ++ (plus:c ++ (convert ++ (gt (lshift@10 @9 @3) ++ (mult:c @0 @1))) ++ (lshift ++ (convert ++ (gt @8 @9)) ++ @3)) ++ (plus:c@11 ++ (rshift ++ (plus:c@9 ++ (mult:c (bit_and@4 SSA_NAME@0 @2) @7) ++ (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) ++ @3) ++ (mult:c (rshift@5 SSA_NAME@0 @3) ++ (rshift@7 SSA_NAME@1 INTEGER_CST@3)) ++ ) ++ ) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) ++ && TYPE_PRECISION (type) == 64) ++ (with { ++ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); ++ tree shift = build_int_cst (integer_type_node, 64); ++ } ++ (convert:type (rshift ++ (mult (convert:i128_type @0) ++ (convert:i128_type @1)) ++ { shift; }))) ++ ) ++) ++#endif ++ + #if GIMPLE + /* These patterns are mostly used by FORWPROP1 to fold some operations into more + simple IR. The following scenario should be matched: +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +index 2a3b74604..f61cf5e6f 100644 +--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */ ++/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */ + + # define BN_BITS4 32 + # define BN_MASK2 (0xffffffffffffffffL) +@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1, + retHi = m11; + } + +-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */ ++/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */ ++/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */ +-- +2.25.1 + diff --git a/0076-Struct-Reorg-Fix-speccpu2006-462-double-free-I60YUV.patch b/0076-Struct-Reorg-Fix-speccpu2006-462-double-free-I60YUV.patch new file mode 100644 index 0000000000000000000000000000000000000000..8ff62a525f1e5fb105f60860c87dcf91456e708f --- /dev/null +++ b/0076-Struct-Reorg-Fix-speccpu2006-462-double-free-I60YUV.patch @@ -0,0 +1,38 @@ +From b669b4512e8425f4d752ef76bf61097cf40d9b35 Mon Sep 17 00:00:00 2001 +From: zgat <1071107108@qq.com> +Date: Thu, 17 Nov 2022 02:55:48 +0000 +Subject: [PATCH 28/29] [Struct Reorg] Fix speccpu2006 462 double free #I60YUV + modify gcc/tree.c. Normal operation speccpu 462 after modifed + +Signed-off-by: zgat <1071107108@qq.com> +--- + gcc/tree.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/gcc/tree.c b/gcc/tree.c +index 2a532d15a..a61788651 100644 +--- a/gcc/tree.c ++++ b/gcc/tree.c +@@ -5224,8 +5224,7 @@ fld_simplified_type_name (tree type) + optimizations. */ + if (flag_ipa_struct_reorg + && lang_c_p () +- && flag_lto_partition == LTO_PARTITION_ONE +- && (in_lto_p || flag_whole_program)) ++ && flag_lto_partition == LTO_PARTITION_ONE) + return TYPE_NAME (type); + + if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL) +@@ -5471,8 +5470,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld) + optimizations. */ + if (flag_ipa_struct_reorg + && lang_c_p () +- && flag_lto_partition == LTO_PARTITION_ONE +- && (in_lto_p || flag_whole_program)) ++ && flag_lto_partition == LTO_PARTITION_ONE) + return t; + if (POINTER_TYPE_P (t)) + return fld_incomplete_type_of (t, fld); +-- +2.25.1 + diff --git a/0077-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch b/0077-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch new file mode 100644 index 0000000000000000000000000000000000000000..94e16cbc41dcb81ecf1e156d67802222a8c98ac7 --- /dev/null +++ b/0077-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch @@ -0,0 +1,1193 @@ +From 0445301c09926a20d5e02809b2cd35bddc9fa50e Mon Sep 17 00:00:00 2001 +From: liyancheng <412998149@qq.com> +Date: Wed, 9 Nov 2022 21:00:04 +0800 +Subject: [PATCH 29/29] [Struct Reorg] Add Safe Structure Pointer Compression + +Safe structure pointer compression allows safely compressing pointers +stored in structure to reduce the size of structure. +Add flag -fipa-struct-reorg=4 to enable safe structure pointer compression. +--- + gcc/common.opt | 5 +- + gcc/ipa-struct-reorg/ipa-struct-reorg.c | 905 +++++++++++++++++++++++- + gcc/ipa-struct-reorg/ipa-struct-reorg.h | 4 + + gcc/params.opt | 4 + + 4 files changed, 877 insertions(+), 41 deletions(-) + +diff --git a/gcc/common.opt b/gcc/common.opt +index 6a7f66624..c9b099817 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1889,8 +1889,9 @@ Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization + Perform structure layout optimizations. + + fipa-struct-reorg= +-Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 3) +--fipa-struct-reorg=[0,1,2,3] adding none, struct-reorg, reorder-fields, dfe optimizations. ++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 4) ++-fipa-struct-reorg=[0,1,2,3,4] adding none, struct-reorg, reorder-fields, ++dfe, safe-pointer-compression optimizations. + + fipa-extend-auto-profile + Common Report Var(flag_ipa_extend_auto_profile) +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c +index 08cb51fee..3550411dc 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c +@@ -83,6 +83,7 @@ along with GCC; see the file COPYING3. If not see + #include "gimple-iterator.h" + #include "gimple-walk.h" + #include "cfg.h" ++#include "cfghooks.h" /* For split_block. */ + #include "ssa.h" + #include "tree-dfa.h" + #include "fold-const.h" +@@ -145,7 +146,27 @@ namespace { + using namespace struct_reorg; + using namespace struct_relayout; + +-/* Return true iff TYPE is stdarg va_list type. */ ++static void ++set_var_attributes (tree var) ++{ ++ if (!var) ++ return; ++ gcc_assert (TREE_CODE (var) == VAR_DECL); ++ ++ DECL_ARTIFICIAL (var) = 1; ++ DECL_EXTERNAL (var) = 0; ++ TREE_STATIC (var) = 1; ++ TREE_PUBLIC (var) = 0; ++ TREE_USED (var) = 1; ++ DECL_CONTEXT (var) = NULL_TREE; ++ TREE_THIS_VOLATILE (var) = 0; ++ TREE_ADDRESSABLE (var) = 0; ++ TREE_READONLY (var) = 0; ++ if (is_global_var (var)) ++ set_decl_tls_model (var, TLS_MODEL_NONE); ++} ++ ++/* Return true if TYPE is stdarg va_list type. */ + + static inline bool + is_va_list_type (tree type) +@@ -242,9 +263,15 @@ enum struct_layout_opt_level + STRUCT_SPLIT = 1 << 0, + COMPLETE_STRUCT_RELAYOUT = 1 << 1, + STRUCT_REORDER_FIELDS = 1 << 2, +- DEAD_FIELD_ELIMINATION = 1 << 3 ++ DEAD_FIELD_ELIMINATION = 1 << 3, ++ POINTER_COMPRESSION_SAFE = 1 << 4 + }; + ++/* Defines the target pointer size of compressed pointer, which should be 8, ++ 16, 32. */ ++ ++static int compressed_size = 32; ++ + static bool is_result_of_mult (tree arg, tree *num, tree struct_size); + bool isptrptr (tree type); + void get_base (tree &base, tree expr); +@@ -366,7 +393,10 @@ srtype::srtype (tree type) + : type (type), + chain_type (false), + escapes (does_not_escape), ++ pc_gptr (NULL_TREE), + visited (false), ++ pc_candidate (false), ++ has_legal_alloc_num (false), + has_alloc_array (0) + { + for (int i = 0; i < max_split; i++) +@@ -447,6 +477,31 @@ srtype::mark_escape (escape_type e, gimple *stmt) + } + } + ++/* Create a global header for compressed struct. */ ++ ++void ++srtype::create_global_ptr_for_pc () ++{ ++ if (!pc_candidate || pc_gptr != NULL_TREE) ++ return; ++ ++ const char *type_name = get_type_name (type); ++ gcc_assert (type_name != NULL); ++ ++ char *gptr_name = concat (type_name, "_pc", NULL); ++ tree new_name = get_identifier (gptr_name); ++ tree new_type = build_pointer_type (newtype[0]); ++ tree new_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, new_name, new_type); ++ set_var_attributes (new_var); ++ pc_gptr = new_var; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nType: %s has create global header for pointer" ++ " compression: %s\n", type_name, gptr_name); ++ ++ free (gptr_name); ++} ++ + /* Add FIELD to the list of fields that use this type. */ + + void +@@ -790,20 +845,31 @@ srfield::create_new_optimized_fields (tree newtype[max_split], + fields.safe_push (field); + } + +- DECL_NAME (field) = DECL_NAME (fielddecl); + if (type == NULL) + { ++ DECL_NAME (field) = DECL_NAME (fielddecl); + /* Common members do not need to reconstruct. + Otherwise, int* -> int** or void* -> void**. */ + TREE_TYPE (field) = nt; ++ SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl)); ++ } ++ else if (type->pc_candidate) ++ { ++ const char *old_name = IDENTIFIER_POINTER (DECL_NAME (fielddecl)); ++ char *new_name = concat (old_name, "_pc", NULL); ++ DECL_NAME (field) = get_identifier (new_name); ++ free (new_name); ++ TREE_TYPE (field) = make_unsigned_type (compressed_size); ++ SET_DECL_ALIGN (field, compressed_size); + } + else + { +- TREE_TYPE (field) +- = reconstruct_complex_type (TREE_TYPE (fielddecl), nt); ++ DECL_NAME (field) = DECL_NAME (fielddecl); ++ TREE_TYPE (field) = reconstruct_complex_type (TREE_TYPE (fielddecl), nt); ++ SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl)); + } ++ + DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl); +- SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl)); + DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl); + TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl); + DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl); +@@ -923,6 +989,10 @@ srtype::create_new_type (void) + && has_dead_field ()) + fprintf (dump_file, "Dead field elimination.\n"); + } ++ ++ if (pc_candidate && pc_gptr == NULL_TREE) ++ create_global_ptr_for_pc (); ++ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Created %d types:\n", maxclusters); +@@ -1341,6 +1411,30 @@ public: + void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt); + unsigned execute_struct_relayout (void); + bool remove_dead_field_stmt (tree lhs); ++ ++ // Pointer compression methods: ++ void check_and_prune_struct_for_pointer_compression (void); ++ void try_rewrite_with_pointer_compression (gassign *, gimple_stmt_iterator *, ++ tree, tree, tree &, tree &); ++ bool safe_void_cmp_p (tree, srtype *); ++ bool pc_candidate_st_type_p (tree); ++ bool pc_candidate_tree_p (tree); ++ bool pc_type_conversion_candidate_p (tree); ++ bool pc_direct_rewrite_chance_p (tree, tree &); ++ bool compress_candidate_with_check (gimple_stmt_iterator *, tree, tree &); ++ bool compress_candidate (gassign *, gimple_stmt_iterator *, tree, tree &); ++ bool decompress_candidate_with_check (gimple_stmt_iterator *, tree, tree &); ++ bool decompress_candidate (gimple_stmt_iterator *, tree, tree, tree &, ++ tree &); ++ srtype *get_compression_candidate_type (tree); ++ tree compress_ptr_to_offset (tree, srtype *, gimple_stmt_iterator *); ++ tree decompress_offset_to_ptr (tree, srtype *, gimple_stmt_iterator *); ++ basic_block create_bb_for_compress_candidate (basic_block, tree, srtype *, ++ tree &); ++ basic_block create_bb_for_decompress_candidate (basic_block, tree, srtype *, ++ tree &); ++ basic_block create_bb_for_compress_nullptr (basic_block, tree &); ++ basic_block create_bb_for_decompress_nullptr (basic_block, tree, tree &); + }; + + struct ipa_struct_relayout +@@ -1391,29 +1485,6 @@ namespace { + + /* Methods for ipa_struct_relayout. */ + +-static void +-set_var_attributes (tree var) +-{ +- if (!var) +- { +- return; +- } +- gcc_assert (TREE_CODE (var) == VAR_DECL); +- +- DECL_ARTIFICIAL (var) = 1; +- DECL_EXTERNAL (var) = 0; +- TREE_STATIC (var) = 1; +- TREE_PUBLIC (var) = 0; +- TREE_USED (var) = 1; +- DECL_CONTEXT (var) = NULL; +- TREE_THIS_VOLATILE (var) = 0; +- TREE_ADDRESSABLE (var) = 0; +- TREE_READONLY (var) = 0; +- if (is_global_var (var)) +- { +- set_decl_tls_model (var, TLS_MODEL_NONE); +- } +-} + + tree + ipa_struct_relayout::create_new_vars (tree type, const char *name) +@@ -3135,6 +3206,19 @@ ipa_struct_reorg::find_vars (gimple *stmt) + records the right value _1 declaration. */ + find_var (gimple_assign_rhs1 (stmt), stmt); + ++ /* Pointer types from non-zero pointer need to be escaped in pointer ++ compression and complete relayout. ++ e.g _1->t = (struct *) 0x400000. */ ++ if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT ++ && TREE_CODE (lhs) == COMPONENT_REF ++ && TREE_CODE (TREE_TYPE (lhs)) == POINTER_TYPE ++ && TREE_CODE (rhs) == INTEGER_CST ++ && !integer_zerop (rhs)) ++ { ++ mark_type_as_escape (inner_type (TREE_TYPE (lhs)), ++ escape_cast_int, stmt); ++ } ++ + /* Add a safe func mechanism. */ + bool l_find = true; + bool r_find = true; +@@ -3603,14 +3687,15 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + bool + ipa_struct_reorg::handled_allocation_stmt (gimple *stmt) + { +- if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS) ++ if ((current_layout_opt_level & STRUCT_REORDER_FIELDS) + && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))) + { + return true; + } +- if ((current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT) ++ if ((current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT ++ || current_layout_opt_level & POINTER_COMPRESSION_SAFE) + && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) + return true; + if ((current_layout_opt_level == STRUCT_SPLIT) +@@ -3737,15 +3822,20 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple + } + } + /* x_1 = y.x_nodes; void *x; +- Directly mark the structure pointer type assigned +- to the void* variable as escape. */ ++ Mark the structure pointer type assigned ++ to the void* variable as escape. Unless the void* is only used to compare ++ with variables of the same type. */ + else if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE (side) == SSA_NAME + && VOID_POINTER_P (TREE_TYPE (side)) + && SSA_NAME_VAR (side) + && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (side)))) + { +- mark_type_as_escape (TREE_TYPE (other), escape_cast_void, stmt); ++ if (current_layout_opt_level < POINTER_COMPRESSION_SAFE ++ || !safe_void_cmp_p (side, type)) ++ { ++ mark_type_as_escape (TREE_TYPE (other), escape_cast_void, stmt); ++ } + } + + check_ptr_layers (side, other, stmt); +@@ -4361,7 +4451,7 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl, + void + ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) + { +- if (current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT ++ if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT + && handled_allocation_stmt (stmt)) + { + tree arg0 = gimple_call_arg (stmt, 0); +@@ -4388,6 +4478,22 @@ ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) + ? type->has_alloc_array + : type->has_alloc_array + 1; + } ++ if (current_layout_opt_level & POINTER_COMPRESSION_SAFE ++ && TREE_CODE (arg0) == INTEGER_CST) ++ { ++ /* Only known size during compilation can be optimized ++ at this level. */ ++ unsigned HOST_WIDE_INT max_alloc_size = 0; ++ switch (compressed_size) ++ { ++ case 8: max_alloc_size = 0xff; break; // max of uint8 ++ case 16: max_alloc_size = 0xffff; break; // max of uint16 ++ case 32: max_alloc_size = 0xffffffff; break; // max of uint32 ++ default: gcc_unreachable (); break; ++ } ++ if (tree_to_uhwi (arg0) < max_alloc_size) ++ type->has_legal_alloc_num = true; ++ } + } + } + +@@ -4530,7 +4636,11 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist) + && SSA_NAME_VAR (ssa_name) + && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (ssa_name)))) + { +- type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name)); ++ if (current_layout_opt_level < POINTER_COMPRESSION_SAFE ++ || !safe_void_cmp_p (ssa_name, type)) ++ { ++ type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name)); ++ } + } + gimple *stmt = SSA_NAME_DEF_STMT (ssa_name); + +@@ -5509,6 +5619,8 @@ ipa_struct_reorg::create_new_types (void) + for (unsigned i = 0; i < types.length (); i++) + newtypes += types[i]->create_new_type (); + ++ /* Some new types may not have been created at create_new_type (), so ++ recreate new type for all struct fields. */ + if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < types.length (); i++) +@@ -5519,9 +5631,18 @@ ipa_struct_reorg::create_new_types (void) + for (unsigned j = 0; j < fields->length (); j++) + { + tree field = (*fields)[j]; +- TREE_TYPE (field) +- = reconstruct_complex_type (TREE_TYPE (field), +- types[i]->newtype[0]); ++ if (types[i]->pc_candidate) ++ { ++ TREE_TYPE (field) ++ = make_unsigned_type (compressed_size); ++ SET_DECL_ALIGN (field, compressed_size); ++ } ++ else ++ { ++ TREE_TYPE (field) ++ = reconstruct_complex_type (TREE_TYPE (field), ++ types[i]->newtype[0]); ++ } + } + } + } +@@ -5906,6 +6027,556 @@ ipa_struct_reorg::rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_ + return true; + } + ++/* Emit a series of gimples to compress the pointer to the index relative to ++ the global header. The basic blocks where gsi is located must have at least ++ one stmt. */ ++ ++tree ++ipa_struct_reorg::compress_ptr_to_offset (tree xhs, srtype *type, ++ gimple_stmt_iterator *gsi) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nCompress candidate pointer:\n"); ++ print_generic_expr (dump_file, xhs); ++ fprintf (dump_file, "\nto offset:\n"); ++ } ++ ++ /* Emit gimple _X1 = ptr - gptr. */ ++ tree pointer_addr = fold_convert (long_unsigned_type_node, xhs); ++ tree gptr_addr = fold_convert (long_unsigned_type_node, type->pc_gptr); ++ tree step1 = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node, ++ pointer_addr, gptr_addr); ++ ++ /* Emit gimple _X2 = _X1 / sizeof (struct). */ ++ tree step2 = gimplify_build2 (gsi, TRUNC_DIV_EXPR, long_unsigned_type_node, ++ step1, TYPE_SIZE_UNIT (type->newtype[0])); ++ ++ /* Emit gimple _X3 = _X2 + 1. */ ++ tree step3 = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, ++ step2, build_one_cst (long_unsigned_type_node)); ++ ++ /* Emit _X4 = (compressed_size) _X3. */ ++ tree step4 = gimplify_build1 (gsi, NOP_EXPR, ++ make_unsigned_type (compressed_size), step3); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ print_generic_expr (dump_file, step3); ++ fprintf (dump_file, "\n"); ++ } ++ return step4; ++} ++ ++/* Emit a series of gimples to decompress the index into the original ++ pointer. The basic blocks where gsi is located must have at least ++ one stmt. */ ++ ++tree ++ipa_struct_reorg::decompress_offset_to_ptr (tree xhs, srtype *type, ++ gimple_stmt_iterator *gsi) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nDecompress candidate offset:\n"); ++ print_generic_expr (dump_file, xhs); ++ fprintf (dump_file, "\nto pointer:\n"); ++ } ++ ++ /* Emit _X1 = xhs - 1. */ ++ tree offset = fold_convert (long_unsigned_type_node, xhs); ++ tree step1 = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node, ++ offset, ++ build_one_cst (long_unsigned_type_node)); ++ ++ /* Emit _X2 = _X1 * sizeof (struct). */ ++ tree step2 = gimplify_build2 (gsi, MULT_EXPR, long_unsigned_type_node, ++ step1, TYPE_SIZE_UNIT (type->newtype[0])); ++ ++ /* Emit _X3 = phead + _X2. */ ++ tree gptr_addr = fold_convert (long_unsigned_type_node, type->pc_gptr); ++ tree step3 = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, ++ gptr_addr, step2); ++ ++ /* Emit _X4 = (struct *) _X3. */ ++ tree step4 = gimplify_build1 (gsi, NOP_EXPR, TREE_TYPE (type->pc_gptr), ++ step3); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ print_generic_expr (dump_file, step3); ++ fprintf (dump_file, "\n"); ++ } ++ return step4; ++} ++ ++/* Return the compression candidate srtype of SSA_NAME or COMPONENT_REF. */ ++ ++srtype * ++ipa_struct_reorg::get_compression_candidate_type (tree xhs) ++{ ++ if (xhs == NULL_TREE) ++ return NULL; ++ ++ if (TREE_CODE (xhs) == SSA_NAME || TREE_CODE (xhs) == COMPONENT_REF) ++ { ++ srtype *access_type = find_type (inner_type (TREE_TYPE (xhs))); ++ if (access_type != NULL && access_type->pc_candidate) ++ return access_type; ++ } ++ return NULL; ++} ++ ++/* True if the input type is the candidate type for pointer compression. */ ++ ++bool ++ipa_struct_reorg::pc_candidate_st_type_p (tree type) ++{ ++ if (type == NULL_TREE) ++ return false; ++ ++ if (TREE_CODE (type) == POINTER_TYPE) ++ { ++ if (TREE_CODE (TREE_TYPE (type)) == RECORD_TYPE) ++ { ++ srtype *access_type = find_type (TREE_TYPE (type)); ++ if (access_type != NULL && access_type->pc_candidate) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/* True if the input xhs is a candidate for pointer compression. */ ++ ++bool ++ipa_struct_reorg::pc_candidate_tree_p (tree xhs) ++{ ++ if (xhs == NULL_TREE) ++ return false; ++ ++ if (TREE_CODE (xhs) == COMPONENT_REF) ++ { ++ srtype *base_type = find_type (TREE_TYPE (TREE_OPERAND (xhs, 0))); ++ if (base_type == NULL || base_type->has_escaped ()) ++ return false; ++ ++ return pc_candidate_st_type_p (TREE_TYPE (xhs)); ++ } ++ return false; ++} ++ ++/* True if xhs is a component_ref that base has escaped but uses a compression ++ candidate type. */ ++ ++bool ++ipa_struct_reorg::pc_type_conversion_candidate_p (tree xhs) ++{ ++ if (xhs == NULL_TREE) ++ return false; ++ ++ if (TREE_CODE (xhs) == COMPONENT_REF) ++ { ++ srtype *base_type = find_type (TREE_TYPE (TREE_OPERAND (xhs, 0))); ++ if (base_type != NULL && base_type->has_escaped ()) ++ return pc_candidate_st_type_p (TREE_TYPE (xhs)); ++ ++ } ++ return false; ++} ++ ++/* Creates a new basic block with zero for compressed null pointers. */ ++ ++basic_block ++ipa_struct_reorg::create_bb_for_compress_nullptr (basic_block last_bb, ++ tree &phi) ++{ ++ basic_block new_bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (new_bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ ++ /* Emit phi = 0. */ ++ gimple_stmt_iterator gsi = gsi_last_bb (new_bb); ++ phi = make_ssa_name (make_unsigned_type (compressed_size)); ++ tree rhs = build_int_cst (make_unsigned_type (compressed_size), 0); ++ gimple *new_stmt = gimple_build_assign (phi, rhs); ++ gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nCreate bb %d for compress nullptr:\n", ++ new_bb->index); ++ gimple_dump_bb (dump_file, new_bb, 0, dump_flags); ++ } ++ return new_bb; ++} ++ ++/* Create a new basic block to compress the pointer to the index relative to ++ the allocated memory pool header. */ ++ ++basic_block ++ipa_struct_reorg::create_bb_for_compress_candidate (basic_block last_bb, ++ tree new_rhs, srtype *type, ++ tree &phi) ++{ ++ basic_block new_bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (new_bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ ++ gimple_stmt_iterator gsi = gsi_last_bb (new_bb); ++ /* compress_ptr_to_offset () needs at least one stmt in target bb. */ ++ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT); ++ phi = compress_ptr_to_offset (new_rhs, type, &gsi); ++ /* Remove the NOP created above. */ ++ gsi_remove (&gsi, true); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nCreate bb %d for compress candidate:\n", ++ new_bb->index); ++ gimple_dump_bb (dump_file, new_bb, 0, dump_flags); ++ } ++ return new_bb; ++} ++ ++/* Compression can be simplified by these following cases: ++ 1. if rhs is NULL, uses zero to represent it. ++ 2. if new_rhs has been converted into INTEGER_TYPE in the previous stmt, ++ just use it here. For example: ++ _1 = t->s ++ -> tt->s = _1. */ ++ ++bool ++ipa_struct_reorg::pc_direct_rewrite_chance_p (tree rhs, tree &new_rhs) ++{ ++ if (integer_zerop (rhs)) ++ { ++ new_rhs = build_int_cst (make_unsigned_type (compressed_size), 0); ++ return true; ++ } ++ else if (new_rhs && TREE_CODE (TREE_TYPE (new_rhs)) == INTEGER_TYPE) ++ { ++ return true; ++ } ++ return false; ++} ++ ++/* Perform pointer compression with check. The conversion will be as shown in ++ the following example: ++ Orig bb: ++ bb <1>: ++ _1->t = _2 ++ ++ will be transformed to: ++ bb <1>: ++ _3 = _2 ++ if (_2 == NULL) ++ goto bb <2> ++ else ++ goto bb <3> ++ ++ bb <2>: ++ _3 = 0 ++ goto bb <4> ++ ++ bb <3>: ++ ... ++ _4 = compress (_2) ++ goto bb <4> ++ ++ bb <4>: ++ _5 = PHI (_3, _4) ++ _1->t = _5 ++ The gsi will move to the beginning of split dst bb <4>, _1->t = _5 will be ++ emitted by rewrite_assign (). */ ++ ++bool ++ipa_struct_reorg::compress_candidate_with_check (gimple_stmt_iterator *gsi, ++ tree rhs, tree &new_rhs) ++{ ++ tree cond_lhs = make_ssa_name (TREE_TYPE (new_rhs)); ++ gimple *assign_stmt = gimple_build_assign (cond_lhs, new_rhs); ++ gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); ++ ++ /* Insert cond stmt. */ ++ tree rhs_pointer_type = build_pointer_type (TREE_TYPE (new_rhs)); ++ gcond *cond = gimple_build_cond (EQ_EXPR, cond_lhs, ++ build_int_cst (rhs_pointer_type, 0), ++ NULL_TREE, NULL_TREE); ++ gimple_set_location (cond, UNKNOWN_LOCATION); ++ gsi_insert_before (gsi, cond, GSI_SAME_STMT); ++ ++ gimple* cur_stmt = as_a (cond); ++ edge e = split_block (cur_stmt->bb, cur_stmt); ++ basic_block split_src_bb = e->src; ++ basic_block split_dst_bb = e->dest; ++ ++ /* Create bb for nullptr. */ ++ tree phi1 = NULL_TREE; ++ basic_block true_bb = create_bb_for_compress_nullptr (split_src_bb, phi1); ++ ++ /* Create bb for comprssion. */ ++ srtype *type = get_compression_candidate_type (rhs); ++ gcc_assert (type != NULL); ++ tree phi2 = NULL_TREE; ++ basic_block false_bb = create_bb_for_compress_candidate (true_bb, new_rhs, ++ type, phi2); ++ ++ /* Rebuild and reset cfg. */ ++ remove_edge_raw (e); ++ ++ edge etrue = make_edge (split_src_bb, true_bb, EDGE_TRUE_VALUE); ++ etrue->probability = profile_probability::unlikely (); ++ true_bb->count = etrue->count (); ++ ++ edge efalse = make_edge (split_src_bb, false_bb, EDGE_FALSE_VALUE); ++ efalse->probability = profile_probability::likely (); ++ false_bb->count = efalse->count (); ++ ++ edge e1 = make_single_succ_edge (true_bb, split_dst_bb, EDGE_FALLTHRU); ++ edge e2 = make_single_succ_edge (false_bb, split_dst_bb, EDGE_FALLTHRU); ++ ++ tree phi = make_ssa_name (make_unsigned_type (compressed_size)); ++ gphi *phi_node = create_phi_node (phi, split_dst_bb); ++ add_phi_arg (phi_node, phi1, e1, UNKNOWN_LOCATION); ++ add_phi_arg (phi_node, phi2, e2, UNKNOWN_LOCATION); ++ ++ if (dom_info_available_p (CDI_DOMINATORS)) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, split_dst_bb, split_src_bb); ++ set_immediate_dominator (CDI_DOMINATORS, true_bb, split_src_bb); ++ set_immediate_dominator (CDI_DOMINATORS, false_bb, split_src_bb); ++ } ++ *gsi = gsi_start_bb (split_dst_bb); ++ new_rhs = phi; ++ return true; ++} ++ ++/* If there is a direct rewrite chance or simplification opportunity, perform ++ the simplified compression rewrite. Otherwise, create a cond expression and ++ two basic blocks to implement pointer compression. */ ++ ++bool ++ipa_struct_reorg::compress_candidate (gassign *stmt, gimple_stmt_iterator *gsi, ++ tree rhs, tree &new_rhs) ++{ ++ if (pc_direct_rewrite_chance_p (rhs, new_rhs)) ++ return true; ++ ++ return compress_candidate_with_check (gsi, rhs, new_rhs); ++} ++ ++/* Create a new basic block to decompress the index to null pointer. */ ++ ++basic_block ++ipa_struct_reorg::create_bb_for_decompress_nullptr (basic_block last_bb, ++ tree new_rhs, ++ tree &phi_node) ++{ ++ basic_block new_bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (new_bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ gimple_stmt_iterator gsi = gsi_last_bb (new_bb); ++ tree rhs_pointer_type = build_pointer_type (TREE_TYPE (new_rhs)); ++ phi_node = make_ssa_name (rhs_pointer_type); ++ gimple *new_stmt = gimple_build_assign (phi_node, ++ build_int_cst (rhs_pointer_type, 0)); ++ gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nCreate bb %d for decompress nullptr:\n", ++ new_bb->index); ++ gimple_dump_bb (dump_file, new_bb, 0, dump_flags); ++ } ++ return new_bb; ++} ++ ++/* Create a new basic block to decompress the index into original pointer. */ ++ ++basic_block ++ipa_struct_reorg::create_bb_for_decompress_candidate (basic_block last_bb, ++ tree lhs, srtype *type, ++ tree &phi_node) ++{ ++ basic_block new_bb = create_empty_bb (last_bb); ++ if (last_bb->loop_father != NULL) ++ { ++ add_bb_to_loop (new_bb, last_bb->loop_father); ++ loops_state_set (LOOPS_NEED_FIXUP); ++ } ++ gimple_stmt_iterator gsi = gsi_last_bb (new_bb); ++ /* decompress_ptr_to_offset () needs at least one stmt in target bb. */ ++ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT); ++ phi_node = decompress_offset_to_ptr (lhs, type, &gsi); ++ /* Remove the NOP created above. */ ++ gsi_remove (&gsi, true); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nCreate bb %d for decompress candidate:\n", ++ new_bb->index); ++ gimple_dump_bb (dump_file, new_bb, 0, dump_flags); ++ } ++ return new_bb; ++} ++ ++/* Perform pointer decompression with check. The conversion will be as shown ++ in the following example: ++ Orig bb: ++ bb <1>: ++ _1 = _2->t ++ ++ will be transformed to: ++ bb <1>: ++ _3 = _2->t ++ if (_3 == 0) ++ goto bb <2> ++ else ++ goto bb <3> ++ ++ bb <2>: ++ _4 = NULL ++ goto bb <4> ++ ++ bb <3>: ++ ... ++ _5 = decompress (_3) ++ goto bb <4> ++ ++ bb <4>: ++ _6 = PHI (_4, _5) ++ _1 = _6 ++ The gsi will move to the beginning of split dst bb <4>, _1 = _6 will be ++ emitted by rewrite_assign (). */ ++ ++bool ++ipa_struct_reorg::decompress_candidate_with_check (gimple_stmt_iterator *gsi, ++ tree rhs, tree &new_rhs) ++{ ++ /* Insert cond stmt. */ ++ tree cond_lhs = make_ssa_name (TREE_TYPE (new_rhs)); ++ gassign *cond_assign = gimple_build_assign (cond_lhs, new_rhs); ++ gsi_insert_before (gsi, cond_assign, GSI_SAME_STMT); ++ ++ tree pc_type = make_unsigned_type (compressed_size); ++ gcond *cond = gimple_build_cond (EQ_EXPR, cond_lhs, ++ build_int_cst (pc_type, 0), ++ NULL_TREE, NULL_TREE); ++ gimple_set_location (cond, UNKNOWN_LOCATION); ++ gsi_insert_before (gsi, cond, GSI_SAME_STMT); ++ ++ /* Split bb. */ ++ gimple* cur_stmt = as_a (cond); ++ edge e = split_block (cur_stmt->bb, cur_stmt); ++ basic_block split_src_bb = e->src; ++ basic_block split_dst_bb = e->dest; ++ ++ /* Create bb for decompress nullptr. */ ++ tree phi1 = NULL_TREE; ++ basic_block true_bb = create_bb_for_decompress_nullptr (split_src_bb, ++ new_rhs, phi1); ++ ++ /* Create bb for decomprssion candidate. */ ++ tree phi2 = NULL_TREE; ++ srtype *type = get_compression_candidate_type (rhs); ++ gcc_assert (type != NULL); ++ basic_block false_bb = create_bb_for_decompress_candidate (true_bb, cond_lhs, ++ type, phi2); ++ ++ /* Refresh and reset cfg. */ ++ remove_edge_raw (e); ++ ++ edge etrue = make_edge (split_src_bb, true_bb, EDGE_TRUE_VALUE); ++ etrue->probability = profile_probability::unlikely (); ++ true_bb->count = etrue->count (); ++ ++ edge efalse = make_edge (split_src_bb, false_bb, EDGE_FALSE_VALUE); ++ efalse->probability = profile_probability::likely (); ++ false_bb->count = efalse->count (); ++ ++ edge e1 = make_single_succ_edge (true_bb, split_dst_bb, EDGE_FALLTHRU); ++ edge e2 = make_single_succ_edge (false_bb, split_dst_bb, EDGE_FALLTHRU); ++ ++ tree phi = make_ssa_name (build_pointer_type (TREE_TYPE (cond_lhs))); ++ gphi *phi_node = create_phi_node (phi, split_dst_bb); ++ add_phi_arg (phi_node, phi1, e1, UNKNOWN_LOCATION); ++ add_phi_arg (phi_node, phi2, e2, UNKNOWN_LOCATION); ++ ++ if (dom_info_available_p (CDI_DOMINATORS)) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, split_dst_bb, split_src_bb); ++ set_immediate_dominator (CDI_DOMINATORS, true_bb, split_src_bb); ++ set_immediate_dominator (CDI_DOMINATORS, false_bb, split_src_bb); ++ } ++ *gsi = gsi_start_bb (split_dst_bb); ++ new_rhs = phi; ++ return true; ++} ++ ++/* If there is a simplification opportunity, perform the simplified ++ decompression rewrite. Otherwise, create a cond expression and two basic ++ blocks to implement pointer decompression. */ ++ ++bool ++ipa_struct_reorg::decompress_candidate (gimple_stmt_iterator *gsi, ++ tree lhs, tree rhs, tree &new_lhs, ++ tree &new_rhs) ++{ ++ // TODO: simplifiy check and rewrite will be pushed in next PR. ++ return decompress_candidate_with_check (gsi, rhs, new_rhs); ++} ++ ++/* Try to perform pointer compression and decompression. */ ++ ++void ++ipa_struct_reorg::try_rewrite_with_pointer_compression (gassign *stmt, ++ gimple_stmt_iterator ++ *gsi, tree lhs, ++ tree rhs, tree &new_lhs, ++ tree &new_rhs) ++{ ++ bool l = pc_candidate_tree_p (lhs); ++ bool r = pc_candidate_tree_p (rhs); ++ if (!l && !r) ++ { ++ tree tmp_rhs = new_rhs == NULL_TREE ? rhs : new_rhs; ++ if (pc_type_conversion_candidate_p (lhs)) ++ { ++ /* Transfer MEM[(struct *)_1].files = _4; ++ to MEM[(struct *)_1].files = (struct *)_4; */ ++ new_rhs = fold_convert (TREE_TYPE (lhs), tmp_rhs); ++ } ++ else if (pc_type_conversion_candidate_p (rhs)) ++ { ++ /* Transfer _4 = MEM[(struct *)_1].nodes; ++ to _4 = (new_struct *) MEM[(struct *)_1].nodes; */ ++ new_rhs = fold_convert (TREE_TYPE (new_lhs), tmp_rhs); ++ } ++ } ++ else if (l && r) ++ gcc_unreachable (); ++ else if (l) ++ { ++ if (!compress_candidate (stmt, gsi, rhs, new_rhs)) ++ gcc_unreachable (); ++ } ++ else if (r) ++ { ++ if (!decompress_candidate (gsi, lhs, rhs, new_lhs, new_rhs)) ++ gcc_unreachable (); ++ } ++} ++ + bool + ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + { +@@ -6109,6 +6780,9 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + fprintf (dump_file, "replaced with:\n"); + for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++) + { ++ if (current_layout_opt_level >= POINTER_COMPRESSION_SAFE) ++ try_rewrite_with_pointer_compression (stmt, gsi, lhs, rhs, ++ newlhs[i], newrhs[i]); + gimple *newstmt = gimple_build_assign (newlhs[i] ? newlhs[i] : lhs, newrhs[i] ? newrhs[i] : rhs); + if (dump_file && (dump_flags & TDF_DETAILS)) + { +@@ -6183,6 +6857,13 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + gcc_assert (false); + gimple_call_set_lhs (g, decl->newdecl[i]); + gsi_insert_before (gsi, g, GSI_SAME_STMT); ++ if (type->pc_candidate) ++ { ++ /* Init global header for pointer compression. */ ++ gassign *gptr ++ = gimple_build_assign (type->pc_gptr, decl->newdecl[i]); ++ gsi_insert_before (gsi, gptr, GSI_SAME_STMT); ++ } + } + return true; + } +@@ -6649,6 +7330,12 @@ ipa_struct_reorg::rewrite_functions (void) + push_cfun (DECL_STRUCT_FUNCTION (node->decl)); + current_function = f; + ++ if (current_layout_opt_level >= POINTER_COMPRESSION_SAFE) ++ { ++ calculate_dominance_info (CDI_DOMINATORS); ++ loop_optimizer_init (0); ++ } ++ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nBefore rewrite: %dth_%s\n", +@@ -6724,6 +7411,9 @@ ipa_struct_reorg::rewrite_functions (void) + + free_dominance_info (CDI_DOMINATORS); + ++ if (current_layout_opt_level >= POINTER_COMPRESSION_SAFE) ++ loop_optimizer_finalize (); ++ + if (dump_file) + { + fprintf (dump_file, "\nAfter rewrite: %dth_%s\n", +@@ -6758,6 +7448,10 @@ ipa_struct_reorg::execute_struct_relayout (void) + { + continue; + } ++ if (get_type_name (types[i]->type) == NULL) ++ { ++ continue; ++ } + retval |= ipa_struct_relayout (type, this).execute (); + } + +@@ -6778,6 +7472,132 @@ ipa_struct_reorg::execute_struct_relayout (void) + return retval; + } + ++ ++/* True if the var with void type is only used to compare with the same ++ target type. */ ++ ++bool ++ipa_struct_reorg::safe_void_cmp_p (tree var, srtype *type) ++{ ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var) ++ { ++ gimple *use_stmt = USE_STMT (use_p); ++ if (is_gimple_debug (use_stmt)) ++ continue; ++ ++ if (gimple_code (use_stmt) == GIMPLE_COND) ++ { ++ tree lhs = gimple_cond_lhs (use_stmt); ++ tree rhs = gimple_cond_rhs (use_stmt); ++ tree xhs = lhs == var ? rhs : lhs; ++ if (types_compatible_p (inner_type (TREE_TYPE (xhs)), type->type)) ++ continue; ++ ++ } ++ return false; ++ } ++ return true; ++} ++ ++/* Mark the structure that should perform pointer compression. */ ++ ++void ++ipa_struct_reorg::check_and_prune_struct_for_pointer_compression (void) ++{ ++ unsigned pc_transform_num = 0; ++ ++ if (dump_file) ++ fprintf (dump_file, "\nMark the structure that should perform pointer" ++ " compression:\n"); ++ ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ srtype *type = types[i]; ++ if (dump_file) ++ print_generic_expr (dump_file, type->type); ++ ++ if (type->has_escaped ()) ++ { ++ if (dump_file) ++ fprintf (dump_file, " has escaped by %s, skip compression.\n", ++ type->escape_reason ()); ++ continue; ++ } ++ if (TYPE_FIELDS (type->type) == NULL) ++ { ++ if (dump_file) ++ fprintf (dump_file, " has zero field, skip compression.\n"); ++ continue; ++ } ++ if (type->chain_type) ++ { ++ if (dump_file) ++ fprintf (dump_file, " is chain_type, skip compression.\n"); ++ continue; ++ } ++ if (type->has_alloc_array != 1) ++ { ++ if (dump_file) ++ fprintf (dump_file, " has alloc number: %d, skip compression.\n", ++ type->has_alloc_array); ++ continue; ++ } ++ if (get_type_name (type->type) == NULL) ++ { ++ if (dump_file) ++ fprintf (dump_file, " has empty struct name," ++ " skip compression.\n"); ++ continue; ++ } ++ if ((current_layout_opt_level & POINTER_COMPRESSION_SAFE) ++ && !type->has_legal_alloc_num) ++ { ++ if (dump_file) ++ fprintf (dump_file, " has illegal struct array size," ++ " skip compression.\n"); ++ continue; ++ } ++ pc_transform_num++; ++ type->pc_candidate = true; ++ if (dump_file) ++ fprintf (dump_file, " attemps to do pointer compression.\n"); ++ } ++ ++ if (dump_file) ++ { ++ if (pc_transform_num) ++ fprintf (dump_file, "\nNumber of structures to transform in " ++ "pointer compression is %d\n", pc_transform_num); ++ else ++ fprintf (dump_file, "\nNo structures to transform in " ++ "pointer compression.\n"); ++ } ++} ++ ++/* Init pointer size from parameter param_pointer_compression_size. */ ++ ++static void ++init_pointer_size_for_pointer_compression (void) ++{ ++ switch (param_pointer_compression_size) ++ { ++ case 8: ++ compressed_size = 8; // sizeof (uint8) ++ break; ++ case 16: ++ compressed_size = 16; // sizeof (uint16) ++ break; ++ case 32: ++ compressed_size = 32; // sizeof (uint32) ++ break; ++ default: ++ error ("Invalid pointer compression size, using the following param: " ++ "\"--param pointer-compression-size=[8,16,32]\""); ++ } ++} ++ + unsigned int + ipa_struct_reorg::execute (unsigned int opt) + { +@@ -6798,6 +7618,8 @@ ipa_struct_reorg::execute (unsigned int opt) + { + analyze_types (); + } ++ if (opt >= POINTER_COMPRESSION_SAFE) ++ check_and_prune_struct_for_pointer_compression (); + + ret = rewrite_functions (); + } +@@ -6850,6 +7672,8 @@ public: + unsigned int level = 0; + switch (struct_layout_optimize_level) + { ++ case 4: level |= POINTER_COMPRESSION_SAFE; ++ // FALLTHRU + case 3: level |= DEAD_FIELD_ELIMINATION; + // FALLTHRU + case 2: level |= STRUCT_REORDER_FIELDS; +@@ -6862,6 +7686,9 @@ public: + default: gcc_unreachable (); + } + ++ if (level & POINTER_COMPRESSION_SAFE) ++ init_pointer_size_for_pointer_compression (); ++ + /* Preserved for backward compatibility, reorder fields needs run before + struct split and complete struct relayout. */ + if (flag_ipa_reorder_fields && level < STRUCT_REORDER_FIELDS) +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +index 936c0fa6f..d88799982 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +@@ -122,7 +122,10 @@ private: + public: + + tree newtype[max_split]; ++ tree pc_gptr; + bool visited; ++ bool pc_candidate; ++ bool has_legal_alloc_num; + int has_alloc_array; + + // Constructors +@@ -144,6 +147,7 @@ public: + void analyze (void); + bool has_dead_field (void); + void mark_escape (escape_type, gimple *stmt); ++ void create_global_ptr_for_pc (); + bool has_escaped (void) + { + return escapes != does_not_escape; +diff --git a/gcc/params.opt b/gcc/params.opt +index 9d1faa7ab..1d355819c 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -984,4 +984,8 @@ High execution rate loops to be analyzed in prefetch (in%). + Common Joined UInteger Var(param_prefetch_func_counts_threshold) Init(100) Param Optimization + Threshold functions of cache miss counts to be analyzed in prefetching. + ++-param=compressed-pointer-size= ++Common Joined UInteger Var(param_pointer_compression_size) Init(32) IntegerRange(8, 32) Param Optimization ++Target size of compressed pointer, which should be 8, 16 or 32. ++ + ; This comment is to ensure we retain the blank line above. +-- +2.25.1 + diff --git a/0078-libsanitizer-cherry-pick-9cf13067cb5088626ba7-from-u.patch b/0078-libsanitizer-cherry-pick-9cf13067cb5088626ba7-from-u.patch new file mode 100644 index 0000000000000000000000000000000000000000..a7a34d700f5fdccf7544162583dc128e590b5591 --- /dev/null +++ b/0078-libsanitizer-cherry-pick-9cf13067cb5088626ba7-from-u.patch @@ -0,0 +1,42 @@ +From 2701442d0cf6292f6624443c15813d6d1a3562fe Mon Sep 17 00:00:00 2001 +From: Martin Liska +Date: Mon, 11 Jul 2022 22:03:14 +0200 +Subject: [PATCH] libsanitizer: cherry-pick 9cf13067cb5088626ba7 from upstream + +9cf13067cb5088626ba7ee1ec4c42ec59c7995a0 [sanitizer] Remove #include to resolve fsconfig_command/mount_attr conflict with glibc 2.36 +--- + .../sanitizer_platform_limits_posix.cpp | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp +index 8ed3e92d270..97fd07acf9d 100644 +--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp ++++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp +@@ -73,7 +73,9 @@ + #include + #include + #include ++#if SANITIZER_ANDROID + #include ++#endif + #include + #include + #include +@@ -869,10 +871,10 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); + unsigned IOCTL_EVIOCGPROP = IOCTL_NOT_PRESENT; + unsigned IOCTL_EVIOCSKEYCODE_V2 = IOCTL_NOT_PRESENT; + #endif +- unsigned IOCTL_FS_IOC_GETFLAGS = FS_IOC_GETFLAGS; +- unsigned IOCTL_FS_IOC_GETVERSION = FS_IOC_GETVERSION; +- unsigned IOCTL_FS_IOC_SETFLAGS = FS_IOC_SETFLAGS; +- unsigned IOCTL_FS_IOC_SETVERSION = FS_IOC_SETVERSION; ++ unsigned IOCTL_FS_IOC_GETFLAGS = _IOR('f', 1, long); ++ unsigned IOCTL_FS_IOC_GETVERSION = _IOR('v', 1, long); ++ unsigned IOCTL_FS_IOC_SETFLAGS = _IOW('f', 2, long); ++ unsigned IOCTL_FS_IOC_SETVERSION = _IOW('v', 2, long); + unsigned IOCTL_GIO_CMAP = GIO_CMAP; + unsigned IOCTL_GIO_FONT = GIO_FONT; + unsigned IOCTL_GIO_UNIMAP = GIO_UNIMAP; +-- +2.33.0 + diff --git a/gcc.spec b/gcc.spec index 21b83b42a74e6e937dcb13a2a1c9caaa28bb8ea6..f405deef7ca9e288a5d46dbb9b5205e3c32e081c 100644 --- a/gcc.spec +++ b/gcc.spec @@ -61,7 +61,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: gcc Version: %{gcc_version} -Release: 16 +Release: 17 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD URL: https://gcc.gnu.org @@ -162,6 +162,36 @@ Patch45: 0045-Transposed-SLP-Enable-Transposed-SLP.patch Patch46: 0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch Patch47: 0047-DFE-Fix-the-bug-caused-by-inconsistent-types.patch Patch48: 0048-Struct-Reorg-Type-simplify-limitation-when-in-struct.patch +Patch49: 0049-build-Add-some-file-right-to-executable.patch +Patch50: 0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch +Patch51: 0051-Backport-phiopt-Fix-up-conditional_replacement-PR993.patch +Patch52: 0052-Backport-phiopt-Handle-bool-in-two_value_replacement.patch +Patch53: 0053-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch +Patch54: 0054-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch +Patch55: 0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch +Patch56: 0056-Backport-phiopt-Optimize-x-y-cmp-z-PR94589.patch +Patch57: 0057-Backport-Add-support-for-__builtin_bswap128.patch +Patch58: 0058-Backport-tree-optimization-95393-fold-MIN-MAX_EXPR-g.patch +Patch59: 0059-Backport-Add-a-couple-of-A-CST1-CST2-match-and-simpl.patch +Patch60: 0060-Backport-Optimize-x-0-y-y-to-x-31-y-in-match.pd.patch +Patch61: 0061-Backport-Replace-conditional_replacement-with-match-.patch +Patch62: 0062-Backport-Allow-match-and-simplified-phiopt-to-run-in.patch +Patch63: 0063-Backport-Improve-match_simplify_replacement-in-phi-o.patch +Patch64: 0064-Backport-phiopt-Use-gphi-phi-instead-of-gimple-phi-s.patch +Patch65: 0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch +Patch66: 0066-Backport-tree-optimization-102880-make-PHI-OPT-recog.patch +Patch67: 0067-Backport-tree-Add-vector_element_bits-_tree-PR94980-.patch +Patch68: 0068-Backport-Lower-VEC_COND_EXPR-into-internal-functions.patch +Patch69: 0069-Backport-gimple-match-Add-a-gimple_extract_op-functi.patch +Patch70: 0070-Backport-aarch64-Fix-subs_compare_2.c-regression-PR1.patch +Patch71: 0071-PHIOPT-Disable-the-match-A-CST1-0-when-the-CST1-is-n.patch +Patch72: 0072-Struct-Reorg-Merge-struct_layout-pass-into-struct_re.patch +Patch73: 0073-PHIOPT-Add-A-B-op-CST-B-match-and-simplify-optimizat.patch +Patch74: 0074-FORWPROP-Fold-series-of-instructions-into-mul.patch +Patch75: 0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch +Patch76: 0076-Struct-Reorg-Fix-speccpu2006-462-double-free-I60YUV.patch +Patch77: 0077-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch +Patch78: 0078-libsanitizer-cherry-pick-9cf13067cb5088626ba7-from-u.patch %global gcc_target_platform %{_arch}-linux-gnu @@ -664,6 +694,36 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch46 -p1 %patch47 -p1 %patch48 -p1 +%patch49 -p1 +%patch50 -p1 +%patch51 -p1 +%patch52 -p1 +%patch53 -p1 +%patch54 -p1 +%patch55 -p1 +%patch56 -p1 +%patch57 -p1 +%patch58 -p1 +%patch59 -p1 +%patch60 -p1 +%patch61 -p1 +%patch62 -p1 +%patch63 -p1 +%patch64 -p1 +%patch65 -p1 +%patch66 -p1 +%patch67 -p1 +%patch68 -p1 +%patch69 -p1 +%patch70 -p1 +%patch71 -p1 +%patch72 -p1 +%patch73 -p1 +%patch74 -p1 +%patch75 -p1 +%patch76 -p1 +%patch77 -p1 +%patch78 -p1 %build @@ -2684,6 +2744,12 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Wed Nov 23 2022 liyancheng <412998149@qq.com> - 10.3.1-17 +- Type:Sync +- ID:NA +- SUG:NA +- DESC:Sync patch from openeuler/gcc + * Fri Sep 16 2022 eastb233 - 10.3.1-16 - Type:Sync - ID:NA