From c5a8a72bcec0338c515455122aa1cbbcc35c1a6f Mon Sep 17 00:00:00 2001 From: "Cui,Lili" Date: Fri, 11 Aug 2023 10:21:14 +0800 Subject: [PATCH 1/8] [Sync] Add attribute hot judgement for INLINE_HINT_known_hot hint. We set up INLINE_HINT_known_hot hint only when we have profile feedback, now add function attribute judgement for it, when both caller and callee have __attribute__((hot)), we will also set up INLINE_HINT_known_hot hint for it. With this patch applied, ADL Multi-copy: 538.imagic_r 16.7% ICX Multi-copy: 538.imagic_r 15.2% CLX Multi-copy: 538.imagic_r 12.7% Znver3 Multi-copy: 538.imagic_r 10.6% Arm Multi-copy: 538.imagic_r 13.4% gcc/ChangeLog * ipa-inline-analysis.cc (do_estimate_edge_time): Add function attribute judgement for INLINE_HINT_known_hot hint. gcc/testsuite/ChangeLog: * gcc.dg/ipa/inlinehint-6.c: New test. (cherry picked from commit 8cf4a6a8c34172f371c1d9e6e375970b361f7007) --- ...t-judgement-for-INLINE_HINT_known_ho.patch | 124 ++++++++++++++++++ gcc.spec | 8 +- 2 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch diff --git a/0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch b/0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch new file mode 100644 index 0000000..34d0165 --- /dev/null +++ b/0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch @@ -0,0 +1,124 @@ +From 355eb8e20327242442d139fb052d3a3befde3dd7 Mon Sep 17 00:00:00 2001 +From: "Cui,Lili" +Date: Tue, 1 Nov 2022 09:16:49 +0800 +Subject: [PATCH] Add attribute hot judgement for INLINE_HINT_known_hot + hint. + +We set up INLINE_HINT_known_hot hint only when we have profile feedback, +now add function attribute judgement for it, when both caller and callee +have __attribute__((hot)), we will also set up INLINE_HINT_known_hot hint +for it. + +With this patch applied, +ADL Multi-copy: 538.imagic_r 16.7% +ICX Multi-copy: 538.imagic_r 15.2% +CLX Multi-copy: 538.imagic_r 12.7% +Znver3 Multi-copy: 538.imagic_r 10.6% +Arm Multi-copy: 538.imagic_r 13.4% + +gcc/ChangeLog + + * ipa-inline-analysis.cc (do_estimate_edge_time): Add function attribute + judgement for INLINE_HINT_known_hot hint. + +gcc/testsuite/ChangeLog: + + * gcc.dg/ipa/inlinehint-6.c: New test. +--- + gcc/ipa-inline-analysis.cc | 13 ++++--- + gcc/testsuite/gcc.dg/ipa/inlinehint-6.c | 47 +++++++++++++++++++++++++ + 2 files changed, 56 insertions(+), 4 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/ipa/inlinehint-6.c + +diff --git a/gcc/ipa-inline-analysis.cc b/gcc/ipa-inline-analysis.cc +index 11d8d09ee..16ac24cfc 100644 +--- a/gcc/ipa-inline-analysis.cc ++++ b/gcc/ipa-inline-analysis.cc +@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3. If not see + #include "ipa-utils.h" + #include "cfgexpand.h" + #include "gimplify.h" ++#include "attribs.h" + + /* Cached node/edge growths. */ + fast_call_summary *edge_growth_cache = NULL; +@@ -249,15 +250,19 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) + hints = estimates.hints; + } + +- /* When we have profile feedback, we can quite safely identify hot +- edges and for those we disable size limits. Don't do that when +- probability that caller will call the callee is low however, since it ++ /* When we have profile feedback or function attribute, we can quite safely ++ identify hot edges and for those we disable size limits. Don't do that ++ when probability that caller will call the callee is low however, since it + may hurt optimization of the caller's hot path. */ +- if (edge->count.ipa ().initialized_p () && edge->maybe_hot_p () ++ if ((edge->count.ipa ().initialized_p () && edge->maybe_hot_p () + && (edge->count.ipa ().apply_scale (2, 1) + > (edge->caller->inlined_to + ? edge->caller->inlined_to->count.ipa () + : edge->caller->count.ipa ()))) ++ || (lookup_attribute ("hot", DECL_ATTRIBUTES (edge->caller->decl)) ++ != NULL ++ && lookup_attribute ("hot", DECL_ATTRIBUTES (edge->callee->decl)) ++ != NULL)) + hints |= INLINE_HINT_known_hot; + + gcc_checking_assert (size >= 0); +diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c b/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c +new file mode 100644 +index 000000000..1f3be641c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-6.c +@@ -0,0 +1,47 @@ ++/* { dg-options "-O3 -c -fdump-ipa-inline-details -fno-early-inlining -fno-ipa-cp" } */ ++/* { dg-add-options bind_pic_locally } */ ++ ++#define size_t long long int ++ ++struct A ++{ ++ size_t f1, f2, f3, f4; ++}; ++struct C ++{ ++ struct A a; ++ size_t b; ++}; ++struct C x; ++ ++__attribute__((hot)) struct C callee (struct A *a, struct C *c) ++{ ++ c->a=(*a); ++ ++ if((c->b + 7) & 17) ++ { ++ c->a.f1 = c->a.f2 + c->a.f1; ++ c->a.f2 = c->a.f3 - c->a.f2; ++ c->a.f3 = c->a.f2 + c->a.f3; ++ c->a.f4 = c->a.f2 - c->a.f4; ++ c->b = c->a.f2; ++ ++ } ++ return *c; ++} ++ ++__attribute__((hot)) struct C caller (size_t d, size_t e, size_t f, size_t g, struct C *c) ++{ ++ struct A a; ++ a.f1 = 1 + d; ++ a.f2 = e; ++ a.f3 = 12 + f; ++ a.f4 = 68 + g; ++ if (c->b > 0) ++ return callee (&a, c); ++ else ++ return *c; ++} ++ ++/* { dg-final { scan-ipa-dump "known_hot" "inline" } } */ ++ +-- +2.31.1 + diff --git a/gcc.spec b/gcc.spec index 15da1b7..ee134e3 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 4 +%global gcc_release 5 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -139,6 +139,7 @@ Provides: gcc(major) = %{gcc_major} Patch0: 0000-Version-Set-version-to-12.3.1.patch Patch1: 0001-CONFIG-Regenerate-configure-file.patch Patch2: 0002-libquadmath-Enable-libquadmath-on-kunpeng.patch +Patch3: 0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. @@ -609,6 +610,7 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch0 -p1 %patch1 -p1 %patch2 -p1 +%patch3 -p1 echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE @@ -2712,6 +2714,10 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Fri Aug 11 2023 Cui,Lili 12.3.1-5 +- Type:Sync +- Add attribute hot judgement for INLINE_HINT_known_hot hint. + * Mon Jul 17 2023 huangxiaoquan 12.3.1-4 - Type:SPEC - DESC:Enable libquadmath on kunpeng -- Gitee From 0322d6d1d91c5fcf42c8a7384ddeb6f6e0cf9849 Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Fri, 11 Aug 2023 10:24:40 +0800 Subject: [PATCH 2/8] [Sync] Enable small loop unrolling for O2 Modern processors has multiple way instruction decoders For x86, icelake/zen3 has 5 uops, so for small loop with <= 4 instructions (usually has 3 uops with a cmp/jmp pair that can be macro-fused), the decoder would have 2 uops bubble for each iteration and the pipeline could not be fully utilized. Therefore, this patch enables loop unrolling for small size loop at O2 to fullfill the decoder as much as possible. It turns on rtl loop unrolling when targetm.loop_unroll_adjust exists and O2 plus speed only. In x86 backend the default behavior is to unroll small loops with less than 4 insns by 1 time. This improves 548.exchange2 by 9% on icelake and 7.4% on zen3 with 0.9% codesize increment. For other benchmarks the variants are minor and overall codesize increased by 0.2%. The kernel image size increased by 0.06%, and no impact on eembc. gcc/ChangeLog: * common/config/i386/i386-common.cc (ix86_optimization_table): Enable small loop unroll at O2 by default. * config/i386/i386.cc (ix86_loop_unroll_adjust): Adjust unroll factor if -munroll-only-small-loops enabled and -funroll-loops/ -funroll-all-loops are disabled. * config/i386/i386.h (struct processor_costs): Add 2 field small_unroll_ninsns and small_unroll_factor. * config/i386/i386.opt: Add -munroll-only-small-loops. * doc/invoke.texi: Document -munroll-only-small-loops. * loop-init.cc (pass_rtl_unroll_loops::gate): Enable rtl loop unrolling for -O2-speed and above if target hook loop_unroll_adjust exists. (pass_rtl_unroll_loops::execute): Set UAP_UNROLL flag when target hook loop_unroll_adjust exists. * config/i386/x86-tune-costs.h: Update all processor costs with small_unroll_ninsns = 4 and small_unroll_factor = 2. gcc/testsuite/ChangeLog: * gcc.dg/guality/loop-1.c: Add additional option -mno-unroll-only-small-loops. * gcc.target/i386/pr86270.c: Add -mno-unroll-only-small-loops. * gcc.target/i386/pr93002.c: Likewise. (cherry picked from commit 79c6c64bfe6cd9e6def74b1486b6978bfb922d4f) --- 0004-Enable-small-loop-unrolling-for-O2.patch | 490 ++++++++++++++++++ gcc.spec | 8 +- 2 files changed, 497 insertions(+), 1 deletion(-) create mode 100644 0004-Enable-small-loop-unrolling-for-O2.patch diff --git a/0004-Enable-small-loop-unrolling-for-O2.patch b/0004-Enable-small-loop-unrolling-for-O2.patch new file mode 100644 index 0000000..3913fcf --- /dev/null +++ b/0004-Enable-small-loop-unrolling-for-O2.patch @@ -0,0 +1,490 @@ +From 1070bc24f53e851cae55320e26715cc594efcd2f Mon Sep 17 00:00:00 2001 +From: Hongyu Wang +Date: Thu, 8 Sep 2022 16:52:02 +0800 +Subject: [PATCH] Enable small loop unrolling for O2 + +Modern processors has multiple way instruction decoders +For x86, icelake/zen3 has 5 uops, so for small loop with <= 4 +instructions (usually has 3 uops with a cmp/jmp pair that can be +macro-fused), the decoder would have 2 uops bubble for each iteration +and the pipeline could not be fully utilized. + +Therefore, this patch enables loop unrolling for small size loop at O2 +to fullfill the decoder as much as possible. It turns on rtl loop +unrolling when targetm.loop_unroll_adjust exists and O2 plus speed only. +In x86 backend the default behavior is to unroll small loops with less +than 4 insns by 1 time. + +This improves 548.exchange2 by 9% on icelake and 7.4% on zen3 with +0.9% codesize increment. For other benchmarks the variants are minor +and overall codesize increased by 0.2%. + +The kernel image size increased by 0.06%, and no impact on eembc. + +gcc/ChangeLog: + + * common/config/i386/i386-common.cc (ix86_optimization_table): + Enable small loop unroll at O2 by default. + * config/i386/i386.cc (ix86_loop_unroll_adjust): Adjust unroll + factor if -munroll-only-small-loops enabled and -funroll-loops/ + -funroll-all-loops are disabled. + * config/i386/i386.h (struct processor_costs): Add 2 field + small_unroll_ninsns and small_unroll_factor. + * config/i386/i386.opt: Add -munroll-only-small-loops. + * doc/invoke.texi: Document -munroll-only-small-loops. + * loop-init.cc (pass_rtl_unroll_loops::gate): Enable rtl + loop unrolling for -O2-speed and above if target hook + loop_unroll_adjust exists. + (pass_rtl_unroll_loops::execute): Set UAP_UNROLL flag + when target hook loop_unroll_adjust exists. + * config/i386/x86-tune-costs.h: Update all processor costs + with small_unroll_ninsns = 4 and small_unroll_factor = 2. + +gcc/testsuite/ChangeLog: + + * gcc.dg/guality/loop-1.c: Add additional option + -mno-unroll-only-small-loops. + * gcc.target/i386/pr86270.c: Add -mno-unroll-only-small-loops. + * gcc.target/i386/pr93002.c: Likewise. +--- + gcc/common/config/i386/i386-common.cc | 1 + + gcc/config/i386/i386.cc | 18 ++++++++ + gcc/config/i386/i386.h | 5 +++ + gcc/config/i386/i386.opt | 4 ++ + gcc/config/i386/x86-tune-costs.h | 58 +++++++++++++++++++++++++ + gcc/doc/invoke.texi | 11 ++++- + gcc/loop-init.cc | 10 +++-- + gcc/testsuite/gcc.dg/guality/loop-1.c | 2 + + gcc/testsuite/gcc.target/i386/pr86270.c | 2 +- + gcc/testsuite/gcc.target/i386/pr93002.c | 2 +- + 10 files changed, 107 insertions(+), 6 deletions(-) + +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index e2594cae4..cdd5caa55 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1687,6 +1687,7 @@ static const struct default_options ix86_option_optimization_table[] = + /* The STC algorithm produces the smallest code at -Os, for x86. */ + { OPT_LEVELS_2_PLUS, OPT_freorder_blocks_algorithm_, NULL, + REORDER_BLOCKS_ALGORITHM_STC }, ++ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 }, + /* Turn off -fschedule-insns by default. It tends to make the + problem with not enough registers even worse. */ + { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 }, +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index 9a9ff3b34..e56004300 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -23570,6 +23570,24 @@ ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop) + unsigned i; + unsigned mem_count = 0; + ++ /* Unroll small size loop when unroll factor is not explicitly ++ specified. */ ++ if (!(flag_unroll_loops ++ || flag_unroll_all_loops ++ || loop->unroll)) ++ { ++ nunroll = 1; ++ ++ /* Any explicit -f{no-}unroll-{all-}loops turns off ++ -munroll-only-small-loops. */ ++ if (ix86_unroll_only_small_loops ++ && !OPTION_SET_P (flag_unroll_loops) ++ && loop->ninsns <= ix86_cost->small_unroll_ninsns) ++ nunroll = ix86_cost->small_unroll_factor; ++ ++ return nunroll; ++ } ++ + if (!TARGET_ADJUST_UNROLL) + return nunroll; + +diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h +index fce0b3564..688aaabd3 100644 +--- a/gcc/config/i386/i386.h ++++ b/gcc/config/i386/i386.h +@@ -219,6 +219,11 @@ struct processor_costs { + const char *const align_jump; /* Jump alignment. */ + const char *const align_label; /* Label alignment. */ + const char *const align_func; /* Function alignment. */ ++ ++ const unsigned small_unroll_ninsns; /* Insn count limit for small loop ++ to be unrolled. */ ++ const unsigned small_unroll_factor; /* Unroll factor for small loop to ++ be unrolled. */ + }; + + extern const struct processor_costs *ix86_cost; +diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt +index a3675e515..fc1b944ac 100644 +--- a/gcc/config/i386/i386.opt ++++ b/gcc/config/i386/i386.opt +@@ -1214,3 +1214,7 @@ Do not use GOT to access external symbols. + -param=x86-stlf-window-ninsns= + Target Joined UInteger Var(x86_stlf_window_ninsns) Init(64) Param + Instructions number above which STFL stall penalty can be compensated. ++ ++munroll-only-small-loops ++Target Var(ix86_unroll_only_small_loops) Init(0) Save ++Enable conservative small loop unrolling. +diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h +index f105d57ca..db4c2da34 100644 +--- a/gcc/config/i386/x86-tune-costs.h ++++ b/gcc/config/i386/x86-tune-costs.h +@@ -135,6 +135,8 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ + NULL, /* Jump alignment. */ + NULL, /* Label alignment. */ + NULL, /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* Processor costs (relative to an add) */ +@@ -244,6 +246,8 @@ struct processor_costs i386_cost = { /* 386 specific costs */ + "4", /* Jump alignment. */ + NULL, /* Label alignment. */ + "4", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs i486_memcpy[2] = { +@@ -354,6 +358,8 @@ struct processor_costs i486_cost = { /* 486 specific costs */ + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs pentium_memcpy[2] = { +@@ -462,6 +468,8 @@ struct processor_costs pentium_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static const +@@ -563,6 +571,8 @@ struct processor_costs lakemont_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes +@@ -679,6 +689,8 @@ struct processor_costs pentiumpro_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs geode_memcpy[2] = { +@@ -786,6 +798,8 @@ struct processor_costs geode_cost = { + NULL, /* Jump alignment. */ + NULL, /* Label alignment. */ + NULL, /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs k6_memcpy[2] = { +@@ -896,6 +910,8 @@ struct processor_costs k6_cost = { + "32:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "32", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* For some reason, Athlon deals better with REP prefix (relative to loops) +@@ -1007,6 +1023,8 @@ struct processor_costs athlon_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* K8 has optimized REP instruction for medium sized blocks, but for very +@@ -1127,6 +1145,8 @@ struct processor_costs k8_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for +@@ -1255,6 +1275,8 @@ struct processor_costs amdfam10_cost = { + "32:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "32", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* BDVER has optimized REP instruction for medium sized blocks, but for +@@ -1376,6 +1398,8 @@ const struct processor_costs bdver_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "11", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + +@@ -1529,6 +1553,8 @@ struct processor_costs znver1_cost = { + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* ZNVER2 has optimized REP instruction for medium sized blocks, but for +@@ -1686,6 +1712,8 @@ struct processor_costs znver2_cost = { + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + struct processor_costs znver3_cost = { +@@ -1818,6 +1846,8 @@ struct processor_costs znver3_cost = { + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* This table currently replicates znver3_cost table. */ +@@ -1952,6 +1982,8 @@ struct processor_costs znver4_cost = { + "16", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* skylake_cost should produce code tuned for Skylake familly of CPUs. */ +@@ -2076,6 +2108,8 @@ struct processor_costs skylake_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* icelake_cost should produce code tuned for Icelake family of CPUs. +@@ -2202,6 +2236,8 @@ struct processor_costs icelake_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* alderlake_cost should produce code tuned for alderlake family of CPUs. */ +@@ -2322,6 +2358,8 @@ struct processor_costs alderlake_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* BTVER1 has optimized REP instruction for medium sized blocks, but for +@@ -2435,6 +2473,8 @@ const struct processor_costs btver1_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "11", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs btver2_memcpy[2] = { +@@ -2545,6 +2585,8 @@ const struct processor_costs btver2_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "11", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs pentium4_memcpy[2] = { +@@ -2654,6 +2696,8 @@ struct processor_costs pentium4_cost = { + NULL, /* Jump alignment. */ + NULL, /* Label alignment. */ + NULL, /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs nocona_memcpy[2] = { +@@ -2766,6 +2810,8 @@ struct processor_costs nocona_cost = { + NULL, /* Jump alignment. */ + NULL, /* Label alignment. */ + NULL, /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs atom_memcpy[2] = { +@@ -2876,6 +2922,8 @@ struct processor_costs atom_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs slm_memcpy[2] = { +@@ -2986,6 +3034,8 @@ struct processor_costs slm_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs tremont_memcpy[2] = { +@@ -3110,6 +3160,8 @@ struct processor_costs tremont_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + static stringop_algs intel_memcpy[2] = { +@@ -3220,6 +3272,8 @@ struct processor_costs intel_cost = { + "16:8:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* Generic should produce code tuned for Core-i7 (and newer chips) +@@ -3339,6 +3393,8 @@ struct processor_costs generic_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + + /* core_cost should produce code tuned for Core familly of CPUs. */ +@@ -3465,5 +3521,7 @@ struct processor_costs core_cost = { + "16:11:8", /* Jump alignment. */ + "0:0:8", /* Label alignment. */ + "16", /* Func alignment. */ ++ 4, /* Small unroll limit. */ ++ 2, /* Small unroll factor. */ + }; + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index ff8cd032f..16f4b367e 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1449,7 +1449,8 @@ See RS/6000 and PowerPC Options. + -mgeneral-regs-only -mcall-ms2sysv-xlogues -mrelax-cmpxchg-loop @gol + -mindirect-branch=@var{choice} -mfunction-return=@var{choice} @gol + -mindirect-branch-register -mharden-sls=@var{choice} @gol +--mindirect-branch-cs-prefix -mneeded -mno-direct-extern-access} ++-mindirect-branch-cs-prefix -mneeded -mno-direct-extern-access @gol ++-munroll-only-small-loops} + + @emph{x86 Windows Options} + @gccoptlist{-mconsole -mcygwin -mno-cygwin -mdll @gol +@@ -33183,6 +33184,14 @@ treat access to protected symbols as local symbols. The default is + @option{-mno-direct-extern-access} and executable compiled with + @option{-mdirect-extern-access} may not be binary compatible if + protected symbols are used in shared libraries and executable. ++ ++@item -munroll-only-small-loops ++@opindex munroll-only-small-loops ++@opindex mno-unroll-only-small-loops ++Controls conservative small loop unrolling. It is default enabled by ++O2, and unrolls loop with less than 4 insns by 1 time. Explicit ++-f[no-]unroll-[all-]loops would disable this flag to avoid any ++unintended unrolling behavior that user does not want. + @end table + + @node x86 Windows Options +diff --git a/gcc/loop-init.cc b/gcc/loop-init.cc +index 1e4f6cfd7..f1c717041 100644 +--- a/gcc/loop-init.cc ++++ b/gcc/loop-init.cc +@@ -565,9 +565,12 @@ public: + {} + + /* opt_pass methods: */ +- virtual bool gate (function *) ++ virtual bool gate (function *fun) + { +- return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll); ++ return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll ++ || (targetm.loop_unroll_adjust ++ && optimize >= 2 ++ && optimize_function_for_speed_p (fun))); + } + + virtual unsigned int execute (function *); +@@ -583,7 +586,8 @@ pass_rtl_unroll_loops::execute (function *fun) + if (dump_file) + df_dump (dump_file); + +- if (flag_unroll_loops) ++ if (flag_unroll_loops ++ || targetm.loop_unroll_adjust) + flags |= UAP_UNROLL; + if (flag_unroll_all_loops) + flags |= UAP_UNROLL_ALL; +diff --git a/gcc/testsuite/gcc.dg/guality/loop-1.c b/gcc/testsuite/gcc.dg/guality/loop-1.c +index 1b1f6d322..a32ea445a 100644 +--- a/gcc/testsuite/gcc.dg/guality/loop-1.c ++++ b/gcc/testsuite/gcc.dg/guality/loop-1.c +@@ -1,5 +1,7 @@ + /* { dg-do run } */ + /* { dg-options "-fno-tree-scev-cprop -fno-tree-vectorize -g" } */ ++/* { dg-additional-options "-mno-unroll-only-small-loops" { target ia32 } } */ ++ + + #include "../nop.h" + +diff --git a/gcc/testsuite/gcc.target/i386/pr86270.c b/gcc/testsuite/gcc.target/i386/pr86270.c +index 81841ef5b..cbc9fbb04 100644 +--- a/gcc/testsuite/gcc.target/i386/pr86270.c ++++ b/gcc/testsuite/gcc.target/i386/pr86270.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2" } */ ++/* { dg-options "-O2 -mno-unroll-only-small-loops" } */ + + int *a; + long len; +diff --git a/gcc/testsuite/gcc.target/i386/pr93002.c b/gcc/testsuite/gcc.target/i386/pr93002.c +index 0248fcc00..f75a847f7 100644 +--- a/gcc/testsuite/gcc.target/i386/pr93002.c ++++ b/gcc/testsuite/gcc.target/i386/pr93002.c +@@ -1,6 +1,6 @@ + /* PR target/93002 */ + /* { dg-do compile } */ +-/* { dg-options "-O2" } */ ++/* { dg-options "-O2 -mno-unroll-only-small-loops" } */ + /* { dg-final { scan-assembler-not "cmp\[^\n\r]*-1" } } */ + + volatile int sink; +-- +2.31.1 + diff --git a/gcc.spec b/gcc.spec index ee134e3..95314d0 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 5 +%global gcc_release 6 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -140,6 +140,7 @@ Patch0: 0000-Version-Set-version-to-12.3.1.patch Patch1: 0001-CONFIG-Regenerate-configure-file.patch Patch2: 0002-libquadmath-Enable-libquadmath-on-kunpeng.patch Patch3: 0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch +Patch4: 0004-Enable-small-loop-unrolling-for-O2.patch # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. @@ -611,6 +612,7 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch1 -p1 %patch2 -p1 %patch3 -p1 +%patch4 -p1 echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE @@ -2714,6 +2716,10 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Fri Aug 11 2023 Hongyu Wang 12.3.1-6 +- Type:Sync +- Enable small loop unrolling for O2. + * Fri Aug 11 2023 Cui,Lili 12.3.1-5 - Type:Sync - Add attribute hot judgement for INLINE_HINT_known_hot hint. -- Gitee From 24eb7b6504792bcb15d67bd1be238aadaf066789 Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Fri, 11 Aug 2023 10:26:25 +0800 Subject: [PATCH 3/8] [Sync] i386: Only enable small loop unrolling in backend [PR 107692] Followed by the discussion in pr107692, -munroll-only-small-loops Does not turns on/off -funroll-loops, and current check in pass_rtl_unroll_loops::gate would cause -fno-unroll-loops do not take effect. Revert the change about targetm.loop_unroll_adjust and apply the backend option change to strictly follow the rule that -funroll-loops takes full control of loop unrolling, and munroll-only-small-loops just change its behavior to unroll small size loops. gcc/ChangeLog: PR target/107692 * common/config/i386/i386-common.cc (ix86_optimization_table): Enable loop unroll O2, disable -fweb and -frename-registers by default. * config/i386/i386-options.cc (ix86_override_options_after_change): Disable small loop unroll when funroll-loops enabled, reset cunroll_grow_size when it is not explicitly enabled. (ix86_option_override_internal): Call ix86_override_options_after_change instead of calling ix86_recompute_optlev_based_flags and ix86_default_align separately. * config/i386/i386.cc (ix86_loop_unroll_adjust): Adjust unroll factor if -munroll-only-small-loops enabled. * loop-init.cc (pass_rtl_unroll_loops::gate): Do not enable loop unrolling for -O2-speed. (pass_rtl_unroll_loops::execute): Rmove targetm.loop_unroll_adjust check. gcc/testsuite/ChangeLog: PR target/107692 * gcc.dg/guality/loop-1.c: Remove additional option for ia32. * gcc.target/i386/pr86270.c: Add -fno-unroll-loops. * gcc.target/i386/pr93002.c: Likewise. (cherry picked from commit ad4ee2e6e9bc79e159a22ef422bf3be74060e47d) --- ...-small-loop-unrolling-in-backend-PR-.patch | 230 ++++++++++++++++++ gcc.spec | 8 +- 2 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 0005-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch diff --git a/0005-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch b/0005-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch new file mode 100644 index 0000000..9e89306 --- /dev/null +++ b/0005-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch @@ -0,0 +1,230 @@ +From 96898a9cd8c159625848247bd2f3a09e5c12fcfa Mon Sep 17 00:00:00 2001 +From: Hongyu Wang +Date: Sat, 19 Nov 2022 09:38:00 +0800 +Subject: [PATCH] i386: Only enable small loop unrolling in backend [PR + 107692] + +Followed by the discussion in pr107692, -munroll-only-small-loops +Does not turns on/off -funroll-loops, and current check in +pass_rtl_unroll_loops::gate would cause -fno-unroll-loops do not take +effect. Revert the change about targetm.loop_unroll_adjust and apply +the backend option change to strictly follow the rule that +-funroll-loops takes full control of loop unrolling, and +munroll-only-small-loops just change its behavior to unroll small size +loops. + +gcc/ChangeLog: + + PR target/107692 + * common/config/i386/i386-common.cc (ix86_optimization_table): + Enable loop unroll O2, disable -fweb and -frename-registers + by default. + * config/i386/i386-options.cc + (ix86_override_options_after_change): + Disable small loop unroll when funroll-loops enabled, reset + cunroll_grow_size when it is not explicitly enabled. + (ix86_option_override_internal): Call + ix86_override_options_after_change instead of calling + ix86_recompute_optlev_based_flags and ix86_default_align + separately. + * config/i386/i386.cc (ix86_loop_unroll_adjust): Adjust unroll + factor if -munroll-only-small-loops enabled. + * loop-init.cc (pass_rtl_unroll_loops::gate): Do not enable + loop unrolling for -O2-speed. + (pass_rtl_unroll_loops::execute): Rmove + targetm.loop_unroll_adjust check. + +gcc/testsuite/ChangeLog: + + PR target/107692 + * gcc.dg/guality/loop-1.c: Remove additional option for ia32. + * gcc.target/i386/pr86270.c: Add -fno-unroll-loops. + * gcc.target/i386/pr93002.c: Likewise. +--- + gcc/common/config/i386/i386-common.cc | 8 ++++++ + gcc/config/i386/i386-options.cc | 34 ++++++++++++++++++++++--- + gcc/config/i386/i386.cc | 18 ++++--------- + gcc/loop-init.cc | 10 +++----- + gcc/testsuite/gcc.dg/guality/loop-1.c | 2 -- + gcc/testsuite/gcc.target/i386/pr86270.c | 2 +- + gcc/testsuite/gcc.target/i386/pr93002.c | 2 +- + 7 files changed, 48 insertions(+), 28 deletions(-) + +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index cdd5caa55..f650e255f 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1687,7 +1687,15 @@ static const struct default_options ix86_option_optimization_table[] = + /* The STC algorithm produces the smallest code at -Os, for x86. */ + { OPT_LEVELS_2_PLUS, OPT_freorder_blocks_algorithm_, NULL, + REORDER_BLOCKS_ALGORITHM_STC }, ++ ++ /* Turn on -funroll-loops with -munroll-only-small-loops to enable small ++ loop unrolling at -O2. */ ++ { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 }, + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 }, ++ /* Turns off -frename-registers and -fweb which are enabled by ++ funroll-loops. */ ++ { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 }, ++ { OPT_LEVELS_ALL, OPT_fweb, NULL, 0 }, + /* Turn off -fschedule-insns by default. It tends to make the + problem with not enough registers even worse. */ + { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 }, +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 099cec4b6..ff44ad4e0 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -1816,8 +1816,37 @@ ix86_recompute_optlev_based_flags (struct gcc_options *opts, + void + ix86_override_options_after_change (void) + { ++ /* Default align_* from the processor table. */ + ix86_default_align (&global_options); ++ + ix86_recompute_optlev_based_flags (&global_options, &global_options_set); ++ ++ /* Disable unrolling small loops when there's explicit ++ -f{,no}unroll-loop. */ ++ if ((OPTION_SET_P (flag_unroll_loops)) ++ || (OPTION_SET_P (flag_unroll_all_loops) ++ && flag_unroll_all_loops)) ++ { ++ if (!OPTION_SET_P (ix86_unroll_only_small_loops)) ++ ix86_unroll_only_small_loops = 0; ++ /* Re-enable -frename-registers and -fweb if funroll-loops ++ enabled. */ ++ if (!OPTION_SET_P (flag_web)) ++ flag_web = flag_unroll_loops; ++ if (!OPTION_SET_P (flag_rename_registers)) ++ flag_rename_registers = flag_unroll_loops; ++ /* -fcunroll-grow-size default follws -f[no]-unroll-loops. */ ++ if (!OPTION_SET_P (flag_cunroll_grow_size)) ++ flag_cunroll_grow_size = flag_unroll_loops ++ || flag_peel_loops ++ || optimize >= 3; ++ } ++ else ++ { ++ if (!OPTION_SET_P (flag_cunroll_grow_size)) ++ flag_cunroll_grow_size = flag_peel_loops || optimize >= 3; ++ } ++ + } + + /* Clear stack slot assignments remembered from previous functions. +@@ -2329,7 +2358,7 @@ ix86_option_override_internal (bool main_args_p, + + set_ix86_tune_features (opts, ix86_tune, opts->x_ix86_dump_tunes); + +- ix86_recompute_optlev_based_flags (opts, opts_set); ++ ix86_override_options_after_change (); + + ix86_tune_cost = processor_cost_table[ix86_tune]; + /* TODO: ix86_cost should be chosen at instruction or function granuality +@@ -2360,9 +2389,6 @@ ix86_option_override_internal (bool main_args_p, + || TARGET_64BIT_P (opts->x_ix86_isa_flags)) + opts->x_ix86_regparm = REGPARM_MAX; + +- /* Default align_* from the processor table. */ +- ix86_default_align (opts); +- + /* Provide default for -mbranch-cost= value. */ + SET_OPTION_IF_UNSET (opts, opts_set, ix86_branch_cost, + ix86_tune_cost->branch_cost); +diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc +index e56004300..462dce10e 100644 +--- a/gcc/config/i386/i386.cc ++++ b/gcc/config/i386/i386.cc +@@ -23572,20 +23572,12 @@ ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop) + + /* Unroll small size loop when unroll factor is not explicitly + specified. */ +- if (!(flag_unroll_loops +- || flag_unroll_all_loops +- || loop->unroll)) ++ if (ix86_unroll_only_small_loops && !loop->unroll) + { +- nunroll = 1; +- +- /* Any explicit -f{no-}unroll-{all-}loops turns off +- -munroll-only-small-loops. */ +- if (ix86_unroll_only_small_loops +- && !OPTION_SET_P (flag_unroll_loops) +- && loop->ninsns <= ix86_cost->small_unroll_ninsns) +- nunroll = ix86_cost->small_unroll_factor; +- +- return nunroll; ++ if (loop->ninsns <= ix86_cost->small_unroll_ninsns) ++ return MIN (nunroll, ix86_cost->small_unroll_factor); ++ else ++ return 1; + } + + if (!TARGET_ADJUST_UNROLL) +diff --git a/gcc/loop-init.cc b/gcc/loop-init.cc +index f1c717041..1e4f6cfd7 100644 +--- a/gcc/loop-init.cc ++++ b/gcc/loop-init.cc +@@ -565,12 +565,9 @@ public: + {} + + /* opt_pass methods: */ +- virtual bool gate (function *fun) ++ virtual bool gate (function *) + { +- return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll +- || (targetm.loop_unroll_adjust +- && optimize >= 2 +- && optimize_function_for_speed_p (fun))); ++ return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll); + } + + virtual unsigned int execute (function *); +@@ -586,8 +583,7 @@ pass_rtl_unroll_loops::execute (function *fun) + if (dump_file) + df_dump (dump_file); + +- if (flag_unroll_loops +- || targetm.loop_unroll_adjust) ++ if (flag_unroll_loops) + flags |= UAP_UNROLL; + if (flag_unroll_all_loops) + flags |= UAP_UNROLL_ALL; +diff --git a/gcc/testsuite/gcc.dg/guality/loop-1.c b/gcc/testsuite/gcc.dg/guality/loop-1.c +index a32ea445a..1b1f6d322 100644 +--- a/gcc/testsuite/gcc.dg/guality/loop-1.c ++++ b/gcc/testsuite/gcc.dg/guality/loop-1.c +@@ -1,7 +1,5 @@ + /* { dg-do run } */ + /* { dg-options "-fno-tree-scev-cprop -fno-tree-vectorize -g" } */ +-/* { dg-additional-options "-mno-unroll-only-small-loops" { target ia32 } } */ +- + + #include "../nop.h" + +diff --git a/gcc/testsuite/gcc.target/i386/pr86270.c b/gcc/testsuite/gcc.target/i386/pr86270.c +index cbc9fbb04..98b012caf 100644 +--- a/gcc/testsuite/gcc.target/i386/pr86270.c ++++ b/gcc/testsuite/gcc.target/i386/pr86270.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mno-unroll-only-small-loops" } */ ++/* { dg-options "-O2 -fno-unroll-loops" } */ + + int *a; + long len; +diff --git a/gcc/testsuite/gcc.target/i386/pr93002.c b/gcc/testsuite/gcc.target/i386/pr93002.c +index f75a847f7..7e2d869e1 100644 +--- a/gcc/testsuite/gcc.target/i386/pr93002.c ++++ b/gcc/testsuite/gcc.target/i386/pr93002.c +@@ -1,6 +1,6 @@ + /* PR target/93002 */ + /* { dg-do compile } */ +-/* { dg-options "-O2 -mno-unroll-only-small-loops" } */ ++/* { dg-options "-O2 -fno-unroll-loops" } */ + /* { dg-final { scan-assembler-not "cmp\[^\n\r]*-1" } } */ + + volatile int sink; +-- +2.31.1 + diff --git a/gcc.spec b/gcc.spec index 95314d0..aae0ee3 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 6 +%global gcc_release 7 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -141,6 +141,7 @@ Patch1: 0001-CONFIG-Regenerate-configure-file.patch Patch2: 0002-libquadmath-Enable-libquadmath-on-kunpeng.patch Patch3: 0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch Patch4: 0004-Enable-small-loop-unrolling-for-O2.patch +Patch5: 0005-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. @@ -613,6 +614,7 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch2 -p1 %patch3 -p1 %patch4 -p1 +%patch5 -p1 echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE @@ -2716,6 +2718,10 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Fri Aug 11 2023 Hongyu Wang 12.3.1-7 +- Type:Sync +- i386: Only enable small loop unrolling in backend [PR 107692]. + * Fri Aug 11 2023 Hongyu Wang 12.3.1-6 - Type:Sync - Enable small loop unrolling for O2. -- Gitee From 5e9724992d967ef598d70c92c5c936ebbf828946 Mon Sep 17 00:00:00 2001 From: h00564365 Date: Tue, 29 Aug 2023 11:38:59 +0800 Subject: [PATCH 4/8] [Sync] Sync patch from openeuler/gcc Sync patch from openeuler/gcc - 20230829 --- ...> 0001-Version-Set-version-to-12.3.1.patch | 2 +- ...ckport-inline-subword-atomic-patches.patch | 2042 ++++++ ...003-CONFIG-Regenerate-configure-file.patch | 2 +- ...admath-Enable-libquadmath-on-kunpeng.patch | 2 +- ...-B-op-CST-B-match-and-simplify-optim.patch | 89 + ...Fold-series-of-instructions-into-mul.patch | 130 + ...ld-series-of-instructions-into-umulh.patch | 105 + ...ble-mull64-transformation-by-default.patch | 66 + 0010-Version-Clear-DATESTAMP_s.patch | 26 + ...t-judgement-for-INLINE_HINT_known_ho.patch | 4 +- ...2-Enable-small-loop-unrolling-for-O2.patch | 4 +- ...-small-loop-unrolling-in-backend-PR-.patch | 4 +- ...are-Add-a-new-optimization-for-array.patch | 1981 ++++++ ...tructure-reorganization-optimization.patch | 6170 +++++++++++++++++ ...Relayout-Complete-Structure-Relayout.patch | 2056 ++++++ ...-bugfix-for-structure-reorganization.patch | 489 ++ ...-optimization-opportunity-for-ccmp-i.patch | 342 + ...-fp-model-Enable-fp-model-on-kunpeng.patch | 405 ++ ...-simdmath-Enable-simdmath-on-kunpeng.patch | 317 + ...orderFields-Structure-reorder-fields.patch | 5739 +++++++++++++++ ...ad-Field-Elimination-in-Struct-Reorg.patch | 1753 +++++ gcc.spec | 52 +- 22 files changed, 21762 insertions(+), 18 deletions(-) rename 0000-Version-Set-version-to-12.3.1.patch => 0001-Version-Set-version-to-12.3.1.patch (87%) create mode 100644 0002-RISCV-Backport-inline-subword-atomic-patches.patch rename 0001-CONFIG-Regenerate-configure-file.patch => 0003-CONFIG-Regenerate-configure-file.patch (95%) rename 0002-libquadmath-Enable-libquadmath-on-kunpeng.patch => 0004-libquadmath-Enable-libquadmath-on-kunpeng.patch (98%) create mode 100644 0006-MULL64-1-3-Add-A-B-op-CST-B-match-and-simplify-optim.patch create mode 100644 0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch create mode 100644 0008-MULL64-3-3-Fold-series-of-instructions-into-umulh.patch create mode 100644 0009-MULL64-Disable-mull64-transformation-by-default.patch create mode 100644 0010-Version-Clear-DATESTAMP_s.patch rename 0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch => 0011-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch (97%) rename 0004-Enable-small-loop-unrolling-for-O2.patch => 0012-Enable-small-loop-unrolling-for-O2.patch (99%) rename 0005-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch => 0013-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch (99%) create mode 100644 0014-Array-widen-compare-Add-a-new-optimization-for-array.patch create mode 100644 0015-Backport-Structure-reorganization-optimization.patch create mode 100644 0016-CompleteStructRelayout-Complete-Structure-Relayout.patch create mode 100644 0017-StructReorg-Some-bugfix-for-structure-reorganization.patch create mode 100644 0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch create mode 100644 0019-fp-model-Enable-fp-model-on-kunpeng.patch create mode 100644 0020-simdmath-Enable-simdmath-on-kunpeng.patch create mode 100644 0021-StructReorderFields-Structure-reorder-fields.patch create mode 100644 0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch diff --git a/0000-Version-Set-version-to-12.3.1.patch b/0001-Version-Set-version-to-12.3.1.patch similarity index 87% rename from 0000-Version-Set-version-to-12.3.1.patch rename to 0001-Version-Set-version-to-12.3.1.patch index 9817172..e5e920e 100644 --- a/0000-Version-Set-version-to-12.3.1.patch +++ b/0001-Version-Set-version-to-12.3.1.patch @@ -1,7 +1,7 @@ From 73ee6351353b036f466ba1aab9a9e7d7865bf972 Mon Sep 17 00:00:00 2001 From: eastb233 Date: Tue, 11 Jul 2023 16:07:51 +0800 -Subject: [PATCH] [Version] Set version to 12.3.1 +Subject: [PATCH 01/22] [Version] Set version to 12.3.1 --- gcc/BASE-VER | 2 +- diff --git a/0002-RISCV-Backport-inline-subword-atomic-patches.patch b/0002-RISCV-Backport-inline-subword-atomic-patches.patch new file mode 100644 index 0000000..d3d2c33 --- /dev/null +++ b/0002-RISCV-Backport-inline-subword-atomic-patches.patch @@ -0,0 +1,2042 @@ +From 123615a0aac59a731516ef11c1fe433d39b6573a Mon Sep 17 00:00:00 2001 +From: laokz +Date: Tue, 11 Jul 2023 21:03:14 +0800 +Subject: [PATCH 02/22] [RISCV] Backport inline subword atomic patches + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f797260adaf52bee0ec0e16190bbefbe1bfc3692 + +203f3060dd363361b172f7295f42bb6bf5ac0b3b +--- + gcc/config/riscv/linux.h | 10 - + gcc/config/riscv/riscv-protos.h | 2 + + gcc/config/riscv/riscv.cc | 49 ++ + gcc/config/riscv/riscv.opt | 4 + + gcc/config/riscv/sync.md | 301 +++++++++ + gcc/doc/invoke.texi | 10 +- + .../gcc.target/riscv/inline-atomics-1.c | 18 + + .../gcc.target/riscv/inline-atomics-2.c | 9 + + .../gcc.target/riscv/inline-atomics-3.c | 569 ++++++++++++++++++ + .../gcc.target/riscv/inline-atomics-4.c | 566 +++++++++++++++++ + .../gcc.target/riscv/inline-atomics-5.c | 87 +++ + .../gcc.target/riscv/inline-atomics-6.c | 87 +++ + .../gcc.target/riscv/inline-atomics-7.c | 69 +++ + .../gcc.target/riscv/inline-atomics-8.c | 69 +++ + libgcc/config/riscv/atomic.c | 2 + + 15 files changed, 1841 insertions(+), 11 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-1.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-2.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-3.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-4.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-5.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-6.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-7.c + create mode 100644 gcc/testsuite/gcc.target/riscv/inline-atomics-8.c + +diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h +index 38803723b..b5c6c5027 100644 +--- a/gcc/config/riscv/linux.h ++++ b/gcc/config/riscv/linux.h +@@ -35,16 +35,6 @@ along with GCC; see the file COPYING3. If not see + #undef MUSL_DYNAMIC_LINKER + #define MUSL_DYNAMIC_LINKER "/lib/ld-musl-riscv" XLEN_SPEC MUSL_ABI_SUFFIX ".so.1" + +-/* Because RISC-V only has word-sized atomics, it requries libatomic where +- others do not. So link libatomic by default, as needed. */ +-#undef LIB_SPEC +-#ifdef LD_AS_NEEDED_OPTION +-#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC \ +- " %{pthread:" LD_AS_NEEDED_OPTION " -latomic " LD_NO_AS_NEEDED_OPTION "}" +-#else +-#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC " -latomic " +-#endif +- + #define ICACHE_FLUSH_FUNC "__riscv_flush_icache" + + #define CPP_SPEC "%{pthread:-D_REENTRANT}" +diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h +index 65bb85f55..3b039e00d 100644 +--- a/gcc/config/riscv/riscv-protos.h ++++ b/gcc/config/riscv/riscv-protos.h +@@ -74,6 +74,8 @@ extern bool riscv_expand_block_move (rtx, rtx, rtx); + extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *); + extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *); + extern bool riscv_gpr_save_operation_p (rtx); ++extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *); ++extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *); + + /* Routines implemented in riscv-c.cc. */ + void riscv_cpu_cpp_builtins (cpp_reader *); +diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc +index 4939d9964..9cf79beba 100644 +--- a/gcc/config/riscv/riscv.cc ++++ b/gcc/config/riscv/riscv.cc +@@ -5605,6 +5605,55 @@ riscv_asan_shadow_offset (void) + return TARGET_64BIT ? (HOST_WIDE_INT_1 << 29) : 0; + } + ++/* Given memory reference MEM, expand code to compute the aligned ++ memory address, shift and mask values and store them into ++ *ALIGNED_MEM, *SHIFT, *MASK and *NOT_MASK. */ ++ ++void ++riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask, ++ rtx *not_mask) ++{ ++ /* Align the memory address to a word. */ ++ rtx addr = force_reg (Pmode, XEXP (mem, 0)); ++ ++ rtx addr_mask = gen_int_mode (-4, Pmode); ++ ++ rtx aligned_addr = gen_reg_rtx (Pmode); ++ emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, addr_mask)); ++ ++ *aligned_mem = change_address (mem, SImode, aligned_addr); ++ ++ /* Calculate the shift amount. */ ++ emit_move_insn (*shift, gen_rtx_AND (SImode, gen_lowpart (SImode, addr), ++ gen_int_mode (3, SImode))); ++ emit_move_insn (*shift, gen_rtx_ASHIFT (SImode, *shift, ++ gen_int_mode (3, SImode))); ++ ++ /* Calculate the mask. */ ++ int unshifted_mask = GET_MODE_MASK (GET_MODE (mem)); ++ ++ emit_move_insn (*mask, gen_int_mode (unshifted_mask, SImode)); ++ ++ emit_move_insn (*mask, gen_rtx_ASHIFT (SImode, *mask, ++ gen_lowpart (QImode, *shift))); ++ ++ emit_move_insn (*not_mask, gen_rtx_NOT(SImode, *mask)); ++} ++ ++/* Leftshift a subword within an SImode register. */ ++ ++void ++riscv_lshift_subword (machine_mode mode, rtx value, rtx shift, ++ rtx *shifted_value) ++{ ++ rtx value_reg = gen_reg_rtx (SImode); ++ emit_move_insn (value_reg, simplify_gen_subreg (SImode, value, ++ mode, 0)); ++ ++ emit_move_insn(*shifted_value, gen_rtx_ASHIFT (SImode, value_reg, ++ gen_lowpart (QImode, shift))); ++} ++ + /* Initialize the GCC target structure. */ + #undef TARGET_ASM_ALIGNED_HI_OP + #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" +diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt +index 492aad123..328d848d6 100644 +--- a/gcc/config/riscv/riscv.opt ++++ b/gcc/config/riscv/riscv.opt +@@ -225,3 +225,7 @@ Enum(isa_spec_class) String(20191213) Value(ISA_SPEC_CLASS_20191213) + misa-spec= + Target RejectNegative Joined Enum(isa_spec_class) Var(riscv_isa_spec) Init(TARGET_DEFAULT_ISA_SPEC) + Set the version of RISC-V ISA spec. ++ ++minline-atomics ++Target Var(TARGET_INLINE_SUBWORD_ATOMIC) Init(1) ++Always inline subword atomic operations. +diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md +index 86b41e6b0..9c4fbabc6 100644 +--- a/gcc/config/riscv/sync.md ++++ b/gcc/config/riscv/sync.md +@@ -21,8 +21,11 @@ + + (define_c_enum "unspec" [ + UNSPEC_COMPARE_AND_SWAP ++ UNSPEC_COMPARE_AND_SWAP_SUBWORD + UNSPEC_SYNC_OLD_OP ++ UNSPEC_SYNC_OLD_OP_SUBWORD + UNSPEC_SYNC_EXCHANGE ++ UNSPEC_SYNC_EXCHANGE_SUBWORD + UNSPEC_ATOMIC_STORE + UNSPEC_MEMORY_BARRIER + ]) +@@ -92,6 +95,135 @@ + "%F3amo.%A3 %0,%z2,%1" + [(set (attr "length") (const_int 8))]) + ++(define_insn "subword_atomic_fetch_strong_" ++ [(set (match_operand:SI 0 "register_operand" "=&r") ;; old value at mem ++ (match_operand:SI 1 "memory_operand" "+A")) ;; mem location ++ (set (match_dup 1) ++ (unspec_volatile:SI ++ [(any_atomic:SI (match_dup 1) ++ (match_operand:SI 2 "register_operand" "rI")) ;; value for op ++ (match_operand:SI 3 "register_operand" "rI")] ;; mask ++ UNSPEC_SYNC_OLD_OP_SUBWORD)) ++ (match_operand:SI 4 "register_operand" "rI") ;; not_mask ++ (clobber (match_scratch:SI 5 "=&r")) ;; tmp_1 ++ (clobber (match_scratch:SI 6 "=&r"))] ;; tmp_2 ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++ { ++ return "1:\;" ++ "lr.w.aq\t%0, %1\;" ++ "\t%5, %0, %2\;" ++ "and\t%5, %5, %3\;" ++ "and\t%6, %0, %4\;" ++ "or\t%6, %6, %5\;" ++ "sc.w.rl\t%5, %6, %1\;" ++ "bnez\t%5, 1b"; ++ } ++ [(set (attr "length") (const_int 28))]) ++ ++(define_expand "atomic_fetch_nand" ++ [(match_operand:SHORT 0 "register_operand") ;; old value at mem ++ (not:SHORT (and:SHORT (match_operand:SHORT 1 "memory_operand") ;; mem location ++ (match_operand:SHORT 2 "reg_or_0_operand"))) ;; value for op ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++{ ++ /* We have no QImode/HImode atomics, so form a mask, then use ++ subword_atomic_fetch_strong_nand to implement a LR/SC version of the ++ operation. */ ++ ++ /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining ++ is disabled */ ++ ++ rtx old = gen_reg_rtx (SImode); ++ rtx mem = operands[1]; ++ rtx value = operands[2]; ++ rtx aligned_mem = gen_reg_rtx (SImode); ++ rtx shift = gen_reg_rtx (SImode); ++ rtx mask = gen_reg_rtx (SImode); ++ rtx not_mask = gen_reg_rtx (SImode); ++ ++ riscv_subword_address (mem, &aligned_mem, &shift, &mask, ¬_mask); ++ ++ rtx shifted_value = gen_reg_rtx (SImode); ++ riscv_lshift_subword (mode, value, shift, &shifted_value); ++ ++ emit_insn (gen_subword_atomic_fetch_strong_nand (old, aligned_mem, ++ shifted_value, ++ mask, not_mask)); ++ ++ emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, ++ gen_lowpart (QImode, shift))); ++ ++ emit_move_insn (operands[0], gen_lowpart (mode, old)); ++ ++ DONE; ++}) ++ ++(define_insn "subword_atomic_fetch_strong_nand" ++ [(set (match_operand:SI 0 "register_operand" "=&r") ;; old value at mem ++ (match_operand:SI 1 "memory_operand" "+A")) ;; mem location ++ (set (match_dup 1) ++ (unspec_volatile:SI ++ [(not:SI (and:SI (match_dup 1) ++ (match_operand:SI 2 "register_operand" "rI"))) ;; value for op ++ (match_operand:SI 3 "register_operand" "rI")] ;; mask ++ UNSPEC_SYNC_OLD_OP_SUBWORD)) ++ (match_operand:SI 4 "register_operand" "rI") ;; not_mask ++ (clobber (match_scratch:SI 5 "=&r")) ;; tmp_1 ++ (clobber (match_scratch:SI 6 "=&r"))] ;; tmp_2 ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++ { ++ return "1:\;" ++ "lr.w.aq\t%0, %1\;" ++ "and\t%5, %0, %2\;" ++ "not\t%5, %5\;" ++ "and\t%5, %5, %3\;" ++ "and\t%6, %0, %4\;" ++ "or\t%6, %6, %5\;" ++ "sc.w.rl\t%5, %6, %1\;" ++ "bnez\t%5, 1b"; ++ } ++ [(set (attr "length") (const_int 32))]) ++ ++(define_expand "atomic_fetch_" ++ [(match_operand:SHORT 0 "register_operand") ;; old value at mem ++ (any_atomic:SHORT (match_operand:SHORT 1 "memory_operand") ;; mem location ++ (match_operand:SHORT 2 "reg_or_0_operand")) ;; value for op ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++{ ++ /* We have no QImode/HImode atomics, so form a mask, then use ++ subword_atomic_fetch_strong_ to implement a LR/SC version of the ++ operation. */ ++ ++ /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining ++ is disabled */ ++ ++ rtx old = gen_reg_rtx (SImode); ++ rtx mem = operands[1]; ++ rtx value = operands[2]; ++ rtx aligned_mem = gen_reg_rtx (SImode); ++ rtx shift = gen_reg_rtx (SImode); ++ rtx mask = gen_reg_rtx (SImode); ++ rtx not_mask = gen_reg_rtx (SImode); ++ ++ riscv_subword_address (mem, &aligned_mem, &shift, &mask, ¬_mask); ++ ++ rtx shifted_value = gen_reg_rtx (SImode); ++ riscv_lshift_subword (mode, value, shift, &shifted_value); ++ ++ emit_insn (gen_subword_atomic_fetch_strong_ (old, aligned_mem, ++ shifted_value, ++ mask, not_mask)); ++ ++ emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, ++ gen_lowpart (QImode, shift))); ++ ++ emit_move_insn (operands[0], gen_lowpart (mode, old)); ++ ++ DONE; ++}) ++ + (define_insn "atomic_exchange" + [(set (match_operand:GPR 0 "register_operand" "=&r") + (unspec_volatile:GPR +@@ -104,6 +236,56 @@ + "%F3amoswap.%A3 %0,%z2,%1" + [(set (attr "length") (const_int 8))]) + ++(define_expand "atomic_exchange" ++ [(match_operand:SHORT 0 "register_operand") ;; old value at mem ++ (match_operand:SHORT 1 "memory_operand") ;; mem location ++ (match_operand:SHORT 2 "register_operand") ;; value ++ (match_operand:SI 3 "const_int_operand")] ;; model ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++{ ++ rtx old = gen_reg_rtx (SImode); ++ rtx mem = operands[1]; ++ rtx value = operands[2]; ++ rtx aligned_mem = gen_reg_rtx (SImode); ++ rtx shift = gen_reg_rtx (SImode); ++ rtx mask = gen_reg_rtx (SImode); ++ rtx not_mask = gen_reg_rtx (SImode); ++ ++ riscv_subword_address (mem, &aligned_mem, &shift, &mask, ¬_mask); ++ ++ rtx shifted_value = gen_reg_rtx (SImode); ++ riscv_lshift_subword (mode, value, shift, &shifted_value); ++ ++ emit_insn (gen_subword_atomic_exchange_strong (old, aligned_mem, ++ shifted_value, not_mask)); ++ ++ emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, ++ gen_lowpart (QImode, shift))); ++ ++ emit_move_insn (operands[0], gen_lowpart (mode, old)); ++ DONE; ++}) ++ ++(define_insn "subword_atomic_exchange_strong" ++ [(set (match_operand:SI 0 "register_operand" "=&r") ;; old value at mem ++ (match_operand:SI 1 "memory_operand" "+A")) ;; mem location ++ (set (match_dup 1) ++ (unspec_volatile:SI ++ [(match_operand:SI 2 "reg_or_0_operand" "rI") ;; value ++ (match_operand:SI 3 "reg_or_0_operand" "rI")] ;; not_mask ++ UNSPEC_SYNC_EXCHANGE_SUBWORD)) ++ (clobber (match_scratch:SI 4 "=&r"))] ;; tmp_1 ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++ { ++ return "1:\;" ++ "lr.w.aq\t%0, %1\;" ++ "and\t%4, %0, %3\;" ++ "or\t%4, %4, %2\;" ++ "sc.w.rl\t%4, %4, %1\;" ++ "bnez\t%4, 1b"; ++ } ++ [(set (attr "length") (const_int 20))]) ++ + (define_insn "atomic_cas_value_strong" + [(set (match_operand:GPR 0 "register_operand" "=&r") + (match_operand:GPR 1 "memory_operand" "+A")) +@@ -152,6 +334,125 @@ + DONE; + }) + ++(define_expand "atomic_compare_and_swap" ++ [(match_operand:SI 0 "register_operand") ;; bool output ++ (match_operand:SHORT 1 "register_operand") ;; val output ++ (match_operand:SHORT 2 "memory_operand") ;; memory ++ (match_operand:SHORT 3 "reg_or_0_operand") ;; expected value ++ (match_operand:SHORT 4 "reg_or_0_operand") ;; desired value ++ (match_operand:SI 5 "const_int_operand") ;; is_weak ++ (match_operand:SI 6 "const_int_operand") ;; mod_s ++ (match_operand:SI 7 "const_int_operand")] ;; mod_f ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++{ ++ emit_insn (gen_atomic_cas_value_strong (operands[1], operands[2], ++ operands[3], operands[4], ++ operands[6], operands[7])); ++ ++ rtx val = gen_reg_rtx (SImode); ++ if (operands[1] != const0_rtx) ++ emit_move_insn (val, gen_rtx_SIGN_EXTEND (SImode, operands[1])); ++ else ++ emit_move_insn (val, const0_rtx); ++ ++ rtx exp = gen_reg_rtx (SImode); ++ if (operands[3] != const0_rtx) ++ emit_move_insn (exp, gen_rtx_SIGN_EXTEND (SImode, operands[3])); ++ else ++ emit_move_insn (exp, const0_rtx); ++ ++ rtx compare = val; ++ if (exp != const0_rtx) ++ { ++ rtx difference = gen_rtx_MINUS (SImode, val, exp); ++ compare = gen_reg_rtx (SImode); ++ emit_move_insn (compare, difference); ++ } ++ ++ if (word_mode != SImode) ++ { ++ rtx reg = gen_reg_rtx (word_mode); ++ emit_move_insn (reg, gen_rtx_SIGN_EXTEND (word_mode, compare)); ++ compare = reg; ++ } ++ ++ emit_move_insn (operands[0], gen_rtx_EQ (SImode, compare, const0_rtx)); ++ DONE; ++}) ++ ++(define_expand "atomic_cas_value_strong" ++ [(match_operand:SHORT 0 "register_operand") ;; val output ++ (match_operand:SHORT 1 "memory_operand") ;; memory ++ (match_operand:SHORT 2 "reg_or_0_operand") ;; expected value ++ (match_operand:SHORT 3 "reg_or_0_operand") ;; desired value ++ (match_operand:SI 4 "const_int_operand") ;; mod_s ++ (match_operand:SI 5 "const_int_operand") ;; mod_f ++ (match_scratch:SHORT 6)] ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++{ ++ /* We have no QImode/HImode atomics, so form a mask, then use ++ subword_atomic_cas_strong to implement a LR/SC version of the ++ operation. */ ++ ++ /* Logic duplicated in gcc/libgcc/config/riscv/atomic.c for use when inlining ++ is disabled */ ++ ++ rtx old = gen_reg_rtx (SImode); ++ rtx mem = operands[1]; ++ rtx aligned_mem = gen_reg_rtx (SImode); ++ rtx shift = gen_reg_rtx (SImode); ++ rtx mask = gen_reg_rtx (SImode); ++ rtx not_mask = gen_reg_rtx (SImode); ++ ++ riscv_subword_address (mem, &aligned_mem, &shift, &mask, ¬_mask); ++ ++ rtx o = operands[2]; ++ rtx n = operands[3]; ++ rtx shifted_o = gen_reg_rtx (SImode); ++ rtx shifted_n = gen_reg_rtx (SImode); ++ ++ riscv_lshift_subword (mode, o, shift, &shifted_o); ++ riscv_lshift_subword (mode, n, shift, &shifted_n); ++ ++ emit_move_insn (shifted_o, gen_rtx_AND (SImode, shifted_o, mask)); ++ emit_move_insn (shifted_n, gen_rtx_AND (SImode, shifted_n, mask)); ++ ++ emit_insn (gen_subword_atomic_cas_strong (old, aligned_mem, ++ shifted_o, shifted_n, ++ mask, not_mask)); ++ ++ emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old, ++ gen_lowpart (QImode, shift))); ++ ++ emit_move_insn (operands[0], gen_lowpart (mode, old)); ++ ++ DONE; ++}) ++ ++(define_insn "subword_atomic_cas_strong" ++ [(set (match_operand:SI 0 "register_operand" "=&r") ;; old value at mem ++ (match_operand:SI 1 "memory_operand" "+A")) ;; mem location ++ (set (match_dup 1) ++ (unspec_volatile:SI [(match_operand:SI 2 "reg_or_0_operand" "rJ") ;; expected value ++ (match_operand:SI 3 "reg_or_0_operand" "rJ")] ;; desired value ++ UNSPEC_COMPARE_AND_SWAP_SUBWORD)) ++ (match_operand:SI 4 "register_operand" "rI") ;; mask ++ (match_operand:SI 5 "register_operand" "rI") ;; not_mask ++ (clobber (match_scratch:SI 6 "=&r"))] ;; tmp_1 ++ "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC" ++ { ++ return "1:\;" ++ "lr.w.aq\t%0, %1\;" ++ "and\t%6, %0, %4\;" ++ "bne\t%6, %z2, 1f\;" ++ "and\t%6, %0, %5\;" ++ "or\t%6, %6, %3\;" ++ "sc.w.rl\t%6, %6, %1\;" ++ "bnez\t%6, 1b\;" ++ "1:"; ++ } ++ [(set (attr "length") (const_int 28))]) ++ + (define_expand "atomic_test_and_set" + [(match_operand:QI 0 "register_operand" "") ;; bool output + (match_operand:QI 1 "memory_operand" "+A") ;; memory +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index cb83dd8a1..ff8cd032f 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -1210,7 +1210,8 @@ See RS/6000 and PowerPC Options. + -malign-data=@var{type} @gol + -mbig-endian -mlittle-endian @gol + -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol +--mstack-protector-guard-offset=@var{offset}} ++-mstack-protector-guard-offset=@var{offset} ++-minline-atomics -mno-inline-atomics} + + @emph{RL78 Options} + @gccoptlist{-msim -mmul=none -mmul=g13 -mmul=g14 -mallregs @gol +@@ -28035,6 +28036,13 @@ Do or don't use smaller but slower prologue and epilogue code that uses + library function calls. The default is to use fast inline prologues and + epilogues. + ++@opindex minline-atomics ++@item -minline-atomics ++@itemx -mno-inline-atomics ++Do or don't use smaller but slower subword atomic emulation code that uses ++libatomic function calls. The default is to use fast inline subword atomics ++that do not require libatomic. ++ + @item -mshorten-memrefs + @itemx -mno-shorten-memrefs + @opindex mshorten-memrefs +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-1.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-1.c +new file mode 100644 +index 000000000..5c5623d9b +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-1.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mno-inline-atomics" } */ ++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "fetch_and_nand" { target *-*-* } 0 } */ ++/* { dg-final { scan-assembler "\tcall\t__sync_fetch_and_add_1" } } */ ++/* { dg-final { scan-assembler "\tcall\t__sync_fetch_and_nand_1" } } */ ++/* { dg-final { scan-assembler "\tcall\t__sync_bool_compare_and_swap_1" } } */ ++ ++char foo; ++char bar; ++char baz; ++ ++int ++main () ++{ ++ __sync_fetch_and_add(&foo, 1); ++ __sync_fetch_and_nand(&bar, 1); ++ __sync_bool_compare_and_swap (&baz, 1, 2); ++} +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-2.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-2.c +new file mode 100644 +index 000000000..01b439086 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-2.c +@@ -0,0 +1,9 @@ ++/* { dg-do compile } */ ++/* Verify that subword atomics do not generate calls. */ ++/* { dg-options "-minline-atomics" } */ ++/* { dg-message "note: '__sync_fetch_and_nand' changed semantics in GCC 4.4" "fetch_and_nand" { target *-*-* } 0 } */ ++/* { dg-final { scan-assembler-not "\tcall\t__sync_fetch_and_add_1" } } */ ++/* { dg-final { scan-assembler-not "\tcall\t__sync_fetch_and_nand_1" } } */ ++/* { dg-final { scan-assembler-not "\tcall\t__sync_bool_compare_and_swap_1" } } */ ++ ++#include "inline-atomics-1.c" +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-3.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-3.c +new file mode 100644 +index 000000000..709f37343 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-3.c +@@ -0,0 +1,569 @@ ++/* Check all char alignments. */ ++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-op-1.c */ ++/* Test __atomic routines for existence and proper execution on 1 byte ++ values with each valid memory model. */ ++/* { dg-do run } */ ++/* { dg-options "-minline-atomics -Wno-address-of-packed-member" } */ ++ ++/* Test the execution of the __atomic_*OP builtin routines for a char. */ ++ ++extern void abort(void); ++ ++char count, res; ++const char init = ~0; ++ ++struct A ++{ ++ char a; ++ char b; ++ char c; ++ char d; ++} __attribute__ ((packed)) A; ++ ++/* The fetch_op routines return the original value before the operation. */ ++ ++void ++test_fetch_add (char* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_fetch_add (v, count, __ATOMIC_RELAXED) != 0) ++ abort (); ++ ++ if (__atomic_fetch_add (v, 1, __ATOMIC_CONSUME) != 1) ++ abort (); ++ ++ if (__atomic_fetch_add (v, count, __ATOMIC_ACQUIRE) != 2) ++ abort (); ++ ++ if (__atomic_fetch_add (v, 1, __ATOMIC_RELEASE) != 3) ++ abort (); ++ ++ if (__atomic_fetch_add (v, count, __ATOMIC_ACQ_REL) != 4) ++ abort (); ++ ++ if (__atomic_fetch_add (v, 1, __ATOMIC_SEQ_CST) != 5) ++ abort (); ++} ++ ++ ++void ++test_fetch_sub (char* v) ++{ ++ *v = res = 20; ++ count = 0; ++ ++ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_RELAXED) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, 1, __ATOMIC_CONSUME) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQUIRE) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, 1, __ATOMIC_RELEASE) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQ_REL) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, 1, __ATOMIC_SEQ_CST) != res--) ++ abort (); ++} ++ ++void ++test_fetch_and (char* v) ++{ ++ *v = init; ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_fetch_and (v, init, __ATOMIC_CONSUME) != 0) ++ abort (); ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ *v = ~*v; ++ if (__atomic_fetch_and (v, init, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST) != 0) ++ abort (); ++} ++ ++void ++test_fetch_nand (char* v) ++{ ++ *v = init; ++ ++ if (__atomic_fetch_nand (v, 0, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, init, __ATOMIC_CONSUME) != init) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, 0, __ATOMIC_ACQUIRE) != 0 ) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, init, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL) != 0) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, 0, __ATOMIC_SEQ_CST) != init) ++ abort (); ++} ++ ++void ++test_fetch_xor (char* v) ++{ ++ *v = init; ++ count = 0; ++ ++ if (__atomic_fetch_xor (v, count, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME) != init) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE) != 0) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, ~count, __ATOMIC_SEQ_CST) != init) ++ abort (); ++} ++ ++void ++test_fetch_or (char* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_fetch_or (v, count, __ATOMIC_RELAXED) != 0) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, 2, __ATOMIC_CONSUME) != 1) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, count, __ATOMIC_ACQUIRE) != 3) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, 8, __ATOMIC_RELEASE) != 7) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, count, __ATOMIC_ACQ_REL) != 15) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, count, __ATOMIC_SEQ_CST) != 31) ++ abort (); ++} ++ ++/* The OP_fetch routines return the new value after the operation. */ ++ ++void ++test_add_fetch (char* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_RELAXED) != 1) ++ abort (); ++ ++ if (__atomic_add_fetch (v, 1, __ATOMIC_CONSUME) != 2) ++ abort (); ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_ACQUIRE) != 3) ++ abort (); ++ ++ if (__atomic_add_fetch (v, 1, __ATOMIC_RELEASE) != 4) ++ abort (); ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_ACQ_REL) != 5) ++ abort (); ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_SEQ_CST) != 6) ++ abort (); ++} ++ ++ ++void ++test_sub_fetch (char* v) ++{ ++ *v = res = 20; ++ count = 0; ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, 1, __ATOMIC_CONSUME) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQUIRE) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, 1, __ATOMIC_RELEASE) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_SEQ_CST) != --res) ++ abort (); ++} ++ ++void ++test_and_fetch (char* v) ++{ ++ *v = init; ++ ++ if (__atomic_and_fetch (v, 0, __ATOMIC_RELAXED) != 0) ++ abort (); ++ ++ *v = init; ++ if (__atomic_and_fetch (v, init, __ATOMIC_CONSUME) != init) ++ abort (); ++ ++ if (__atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ *v = ~*v; ++ if (__atomic_and_fetch (v, init, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL) != 0) ++ abort (); ++ ++ *v = ~*v; ++ if (__atomic_and_fetch (v, 0, __ATOMIC_SEQ_CST) != 0) ++ abort (); ++} ++ ++void ++test_nand_fetch (char* v) ++{ ++ *v = init; ++ ++ if (__atomic_nand_fetch (v, 0, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, init, __ATOMIC_CONSUME) != 0) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE) != init) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, init, __ATOMIC_RELEASE) != 0) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, init, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST) != init) ++ abort (); ++} ++ ++ ++ ++void ++test_xor_fetch (char* v) ++{ ++ *v = init; ++ count = 0; ++ ++ if (__atomic_xor_fetch (v, count, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, ~count, __ATOMIC_CONSUME) != 0) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, ~count, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST) != 0) ++ abort (); ++} ++ ++void ++test_or_fetch (char* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_or_fetch (v, count, __ATOMIC_RELAXED) != 1) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, 2, __ATOMIC_CONSUME) != 3) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, count, __ATOMIC_ACQUIRE) != 7) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, 8, __ATOMIC_RELEASE) != 15) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, count, __ATOMIC_ACQ_REL) != 31) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, count, __ATOMIC_SEQ_CST) != 63) ++ abort (); ++} ++ ++ ++/* Test the OP routines with a result which isn't used. Use both variations ++ within each function. */ ++ ++void ++test_add (char* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ __atomic_add_fetch (v, count, __ATOMIC_RELAXED); ++ if (*v != 1) ++ abort (); ++ ++ __atomic_fetch_add (v, count, __ATOMIC_CONSUME); ++ if (*v != 2) ++ abort (); ++ ++ __atomic_add_fetch (v, 1 , __ATOMIC_ACQUIRE); ++ if (*v != 3) ++ abort (); ++ ++ __atomic_fetch_add (v, 1, __ATOMIC_RELEASE); ++ if (*v != 4) ++ abort (); ++ ++ __atomic_add_fetch (v, count, __ATOMIC_ACQ_REL); ++ if (*v != 5) ++ abort (); ++ ++ __atomic_fetch_add (v, count, __ATOMIC_SEQ_CST); ++ if (*v != 6) ++ abort (); ++} ++ ++ ++void ++test_sub (char* v) ++{ ++ *v = res = 20; ++ count = 0; ++ ++ __atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_fetch_sub (v, count + 1, __ATOMIC_CONSUME); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_sub_fetch (v, 1, __ATOMIC_ACQUIRE); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_fetch_sub (v, 1, __ATOMIC_RELEASE); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_fetch_sub (v, count + 1, __ATOMIC_SEQ_CST); ++ if (*v != --res) ++ abort (); ++} ++ ++void ++test_and (char* v) ++{ ++ *v = init; ++ ++ __atomic_and_fetch (v, 0, __ATOMIC_RELAXED); ++ if (*v != 0) ++ abort (); ++ ++ *v = init; ++ __atomic_fetch_and (v, init, __ATOMIC_CONSUME); ++ if (*v != init) ++ abort (); ++ ++ __atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE); ++ if (*v != 0) ++ abort (); ++ ++ *v = ~*v; ++ __atomic_fetch_and (v, init, __ATOMIC_RELEASE); ++ if (*v != init) ++ abort (); ++ ++ __atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL); ++ if (*v != 0) ++ abort (); ++ ++ *v = ~*v; ++ __atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST); ++ if (*v != 0) ++ abort (); ++} ++ ++void ++test_nand (char* v) ++{ ++ *v = init; ++ ++ __atomic_fetch_nand (v, 0, __ATOMIC_RELAXED); ++ if (*v != init) ++ abort (); ++ ++ __atomic_fetch_nand (v, init, __ATOMIC_CONSUME); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE); ++ if (*v != init) ++ abort (); ++ ++ __atomic_nand_fetch (v, init, __ATOMIC_RELEASE); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL); ++ if (*v != init) ++ abort (); ++ ++ __atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST); ++ if (*v != init) ++ abort (); ++} ++ ++ ++ ++void ++test_xor (char* v) ++{ ++ *v = init; ++ count = 0; ++ ++ __atomic_xor_fetch (v, count, __ATOMIC_RELAXED); ++ if (*v != init) ++ abort (); ++ ++ __atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE); ++ if (*v != init) ++ abort (); ++ ++ __atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL); ++ if (*v != init) ++ abort (); ++ ++ __atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST); ++ if (*v != 0) ++ abort (); ++} ++ ++void ++test_or (char* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ __atomic_or_fetch (v, count, __ATOMIC_RELAXED); ++ if (*v != 1) ++ abort (); ++ ++ count *= 2; ++ __atomic_fetch_or (v, count, __ATOMIC_CONSUME); ++ if (*v != 3) ++ abort (); ++ ++ count *= 2; ++ __atomic_or_fetch (v, 4, __ATOMIC_ACQUIRE); ++ if (*v != 7) ++ abort (); ++ ++ count *= 2; ++ __atomic_fetch_or (v, 8, __ATOMIC_RELEASE); ++ if (*v != 15) ++ abort (); ++ ++ count *= 2; ++ __atomic_or_fetch (v, count, __ATOMIC_ACQ_REL); ++ if (*v != 31) ++ abort (); ++ ++ count *= 2; ++ __atomic_fetch_or (v, count, __ATOMIC_SEQ_CST); ++ if (*v != 63) ++ abort (); ++} ++ ++int ++main () ++{ ++ char* V[] = {&A.a, &A.b, &A.c, &A.d}; ++ ++ for (int i = 0; i < 4; i++) { ++ test_fetch_add (V[i]); ++ test_fetch_sub (V[i]); ++ test_fetch_and (V[i]); ++ test_fetch_nand (V[i]); ++ test_fetch_xor (V[i]); ++ test_fetch_or (V[i]); ++ ++ test_add_fetch (V[i]); ++ test_sub_fetch (V[i]); ++ test_and_fetch (V[i]); ++ test_nand_fetch (V[i]); ++ test_xor_fetch (V[i]); ++ test_or_fetch (V[i]); ++ ++ test_add (V[i]); ++ test_sub (V[i]); ++ test_and (V[i]); ++ test_nand (V[i]); ++ test_xor (V[i]); ++ test_or (V[i]); ++ } ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-4.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-4.c +new file mode 100644 +index 000000000..eecfaae5c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-4.c +@@ -0,0 +1,566 @@ ++/* Check all short alignments. */ ++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-op-2.c */ ++/* Test __atomic routines for existence and proper execution on 2 byte ++ values with each valid memory model. */ ++/* { dg-do run } */ ++/* { dg-options "-minline-atomics -Wno-address-of-packed-member" } */ ++ ++/* Test the execution of the __atomic_*OP builtin routines for a short. */ ++ ++extern void abort(void); ++ ++short count, res; ++const short init = ~0; ++ ++struct A ++{ ++ short a; ++ short b; ++} __attribute__ ((packed)) A; ++ ++/* The fetch_op routines return the original value before the operation. */ ++ ++void ++test_fetch_add (short* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_fetch_add (v, count, __ATOMIC_RELAXED) != 0) ++ abort (); ++ ++ if (__atomic_fetch_add (v, 1, __ATOMIC_CONSUME) != 1) ++ abort (); ++ ++ if (__atomic_fetch_add (v, count, __ATOMIC_ACQUIRE) != 2) ++ abort (); ++ ++ if (__atomic_fetch_add (v, 1, __ATOMIC_RELEASE) != 3) ++ abort (); ++ ++ if (__atomic_fetch_add (v, count, __ATOMIC_ACQ_REL) != 4) ++ abort (); ++ ++ if (__atomic_fetch_add (v, 1, __ATOMIC_SEQ_CST) != 5) ++ abort (); ++} ++ ++ ++void ++test_fetch_sub (short* v) ++{ ++ *v = res = 20; ++ count = 0; ++ ++ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_RELAXED) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, 1, __ATOMIC_CONSUME) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQUIRE) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, 1, __ATOMIC_RELEASE) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, count + 1, __ATOMIC_ACQ_REL) != res--) ++ abort (); ++ ++ if (__atomic_fetch_sub (v, 1, __ATOMIC_SEQ_CST) != res--) ++ abort (); ++} ++ ++void ++test_fetch_and (short* v) ++{ ++ *v = init; ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_fetch_and (v, init, __ATOMIC_CONSUME) != 0) ++ abort (); ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ *v = ~*v; ++ if (__atomic_fetch_and (v, init, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST) != 0) ++ abort (); ++} ++ ++void ++test_fetch_nand (short* v) ++{ ++ *v = init; ++ ++ if (__atomic_fetch_nand (v, 0, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, init, __ATOMIC_CONSUME) != init) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, 0, __ATOMIC_ACQUIRE) != 0 ) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, init, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL) != 0) ++ abort (); ++ ++ if (__atomic_fetch_nand (v, 0, __ATOMIC_SEQ_CST) != init) ++ abort (); ++} ++ ++void ++test_fetch_xor (short* v) ++{ ++ *v = init; ++ count = 0; ++ ++ if (__atomic_fetch_xor (v, count, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME) != init) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE) != 0) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_fetch_xor (v, ~count, __ATOMIC_SEQ_CST) != init) ++ abort (); ++} ++ ++void ++test_fetch_or (short* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_fetch_or (v, count, __ATOMIC_RELAXED) != 0) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, 2, __ATOMIC_CONSUME) != 1) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, count, __ATOMIC_ACQUIRE) != 3) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, 8, __ATOMIC_RELEASE) != 7) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, count, __ATOMIC_ACQ_REL) != 15) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_fetch_or (v, count, __ATOMIC_SEQ_CST) != 31) ++ abort (); ++} ++ ++/* The OP_fetch routines return the new value after the operation. */ ++ ++void ++test_add_fetch (short* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_RELAXED) != 1) ++ abort (); ++ ++ if (__atomic_add_fetch (v, 1, __ATOMIC_CONSUME) != 2) ++ abort (); ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_ACQUIRE) != 3) ++ abort (); ++ ++ if (__atomic_add_fetch (v, 1, __ATOMIC_RELEASE) != 4) ++ abort (); ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_ACQ_REL) != 5) ++ abort (); ++ ++ if (__atomic_add_fetch (v, count, __ATOMIC_SEQ_CST) != 6) ++ abort (); ++} ++ ++ ++void ++test_sub_fetch (short* v) ++{ ++ *v = res = 20; ++ count = 0; ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, 1, __ATOMIC_CONSUME) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQUIRE) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, 1, __ATOMIC_RELEASE) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL) != --res) ++ abort (); ++ ++ if (__atomic_sub_fetch (v, count + 1, __ATOMIC_SEQ_CST) != --res) ++ abort (); ++} ++ ++void ++test_and_fetch (short* v) ++{ ++ *v = init; ++ ++ if (__atomic_and_fetch (v, 0, __ATOMIC_RELAXED) != 0) ++ abort (); ++ ++ *v = init; ++ if (__atomic_and_fetch (v, init, __ATOMIC_CONSUME) != init) ++ abort (); ++ ++ if (__atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ *v = ~*v; ++ if (__atomic_and_fetch (v, init, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL) != 0) ++ abort (); ++ ++ *v = ~*v; ++ if (__atomic_and_fetch (v, 0, __ATOMIC_SEQ_CST) != 0) ++ abort (); ++} ++ ++void ++test_nand_fetch (short* v) ++{ ++ *v = init; ++ ++ if (__atomic_nand_fetch (v, 0, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, init, __ATOMIC_CONSUME) != 0) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE) != init) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, init, __ATOMIC_RELEASE) != 0) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, init, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST) != init) ++ abort (); ++} ++ ++ ++ ++void ++test_xor_fetch (short* v) ++{ ++ *v = init; ++ count = 0; ++ ++ if (__atomic_xor_fetch (v, count, __ATOMIC_RELAXED) != init) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, ~count, __ATOMIC_CONSUME) != 0) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE) != 0) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, ~count, __ATOMIC_RELEASE) != init) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, 0, __ATOMIC_ACQ_REL) != init) ++ abort (); ++ ++ if (__atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST) != 0) ++ abort (); ++} ++ ++void ++test_or_fetch (short* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ if (__atomic_or_fetch (v, count, __ATOMIC_RELAXED) != 1) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, 2, __ATOMIC_CONSUME) != 3) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, count, __ATOMIC_ACQUIRE) != 7) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, 8, __ATOMIC_RELEASE) != 15) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, count, __ATOMIC_ACQ_REL) != 31) ++ abort (); ++ ++ count *= 2; ++ if (__atomic_or_fetch (v, count, __ATOMIC_SEQ_CST) != 63) ++ abort (); ++} ++ ++ ++/* Test the OP routines with a result which isn't used. Use both variations ++ within each function. */ ++ ++void ++test_add (short* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ __atomic_add_fetch (v, count, __ATOMIC_RELAXED); ++ if (*v != 1) ++ abort (); ++ ++ __atomic_fetch_add (v, count, __ATOMIC_CONSUME); ++ if (*v != 2) ++ abort (); ++ ++ __atomic_add_fetch (v, 1 , __ATOMIC_ACQUIRE); ++ if (*v != 3) ++ abort (); ++ ++ __atomic_fetch_add (v, 1, __ATOMIC_RELEASE); ++ if (*v != 4) ++ abort (); ++ ++ __atomic_add_fetch (v, count, __ATOMIC_ACQ_REL); ++ if (*v != 5) ++ abort (); ++ ++ __atomic_fetch_add (v, count, __ATOMIC_SEQ_CST); ++ if (*v != 6) ++ abort (); ++} ++ ++ ++void ++test_sub (short* v) ++{ ++ *v = res = 20; ++ count = 0; ++ ++ __atomic_sub_fetch (v, count + 1, __ATOMIC_RELAXED); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_fetch_sub (v, count + 1, __ATOMIC_CONSUME); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_sub_fetch (v, 1, __ATOMIC_ACQUIRE); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_fetch_sub (v, 1, __ATOMIC_RELEASE); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_sub_fetch (v, count + 1, __ATOMIC_ACQ_REL); ++ if (*v != --res) ++ abort (); ++ ++ __atomic_fetch_sub (v, count + 1, __ATOMIC_SEQ_CST); ++ if (*v != --res) ++ abort (); ++} ++ ++void ++test_and (short* v) ++{ ++ *v = init; ++ ++ __atomic_and_fetch (v, 0, __ATOMIC_RELAXED); ++ if (*v != 0) ++ abort (); ++ ++ *v = init; ++ __atomic_fetch_and (v, init, __ATOMIC_CONSUME); ++ if (*v != init) ++ abort (); ++ ++ __atomic_and_fetch (v, 0, __ATOMIC_ACQUIRE); ++ if (*v != 0) ++ abort (); ++ ++ *v = ~*v; ++ __atomic_fetch_and (v, init, __ATOMIC_RELEASE); ++ if (*v != init) ++ abort (); ++ ++ __atomic_and_fetch (v, 0, __ATOMIC_ACQ_REL); ++ if (*v != 0) ++ abort (); ++ ++ *v = ~*v; ++ __atomic_fetch_and (v, 0, __ATOMIC_SEQ_CST); ++ if (*v != 0) ++ abort (); ++} ++ ++void ++test_nand (short* v) ++{ ++ *v = init; ++ ++ __atomic_fetch_nand (v, 0, __ATOMIC_RELAXED); ++ if (*v != init) ++ abort (); ++ ++ __atomic_fetch_nand (v, init, __ATOMIC_CONSUME); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_nand_fetch (v, 0, __ATOMIC_ACQUIRE); ++ if (*v != init) ++ abort (); ++ ++ __atomic_nand_fetch (v, init, __ATOMIC_RELEASE); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_fetch_nand (v, init, __ATOMIC_ACQ_REL); ++ if (*v != init) ++ abort (); ++ ++ __atomic_nand_fetch (v, 0, __ATOMIC_SEQ_CST); ++ if (*v != init) ++ abort (); ++} ++ ++ ++ ++void ++test_xor (short* v) ++{ ++ *v = init; ++ count = 0; ++ ++ __atomic_xor_fetch (v, count, __ATOMIC_RELAXED); ++ if (*v != init) ++ abort (); ++ ++ __atomic_fetch_xor (v, ~count, __ATOMIC_CONSUME); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_xor_fetch (v, 0, __ATOMIC_ACQUIRE); ++ if (*v != 0) ++ abort (); ++ ++ __atomic_fetch_xor (v, ~count, __ATOMIC_RELEASE); ++ if (*v != init) ++ abort (); ++ ++ __atomic_fetch_xor (v, 0, __ATOMIC_ACQ_REL); ++ if (*v != init) ++ abort (); ++ ++ __atomic_xor_fetch (v, ~count, __ATOMIC_SEQ_CST); ++ if (*v != 0) ++ abort (); ++} ++ ++void ++test_or (short* v) ++{ ++ *v = 0; ++ count = 1; ++ ++ __atomic_or_fetch (v, count, __ATOMIC_RELAXED); ++ if (*v != 1) ++ abort (); ++ ++ count *= 2; ++ __atomic_fetch_or (v, count, __ATOMIC_CONSUME); ++ if (*v != 3) ++ abort (); ++ ++ count *= 2; ++ __atomic_or_fetch (v, 4, __ATOMIC_ACQUIRE); ++ if (*v != 7) ++ abort (); ++ ++ count *= 2; ++ __atomic_fetch_or (v, 8, __ATOMIC_RELEASE); ++ if (*v != 15) ++ abort (); ++ ++ count *= 2; ++ __atomic_or_fetch (v, count, __ATOMIC_ACQ_REL); ++ if (*v != 31) ++ abort (); ++ ++ count *= 2; ++ __atomic_fetch_or (v, count, __ATOMIC_SEQ_CST); ++ if (*v != 63) ++ abort (); ++} ++ ++int ++main () { ++ short* V[] = {&A.a, &A.b}; ++ ++ for (int i = 0; i < 2; i++) { ++ test_fetch_add (V[i]); ++ test_fetch_sub (V[i]); ++ test_fetch_and (V[i]); ++ test_fetch_nand (V[i]); ++ test_fetch_xor (V[i]); ++ test_fetch_or (V[i]); ++ ++ test_add_fetch (V[i]); ++ test_sub_fetch (V[i]); ++ test_and_fetch (V[i]); ++ test_nand_fetch (V[i]); ++ test_xor_fetch (V[i]); ++ test_or_fetch (V[i]); ++ ++ test_add (V[i]); ++ test_sub (V[i]); ++ test_and (V[i]); ++ test_nand (V[i]); ++ test_xor (V[i]); ++ test_or (V[i]); ++ } ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-5.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-5.c +new file mode 100644 +index 000000000..52093894a +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-5.c +@@ -0,0 +1,87 @@ ++/* Test __atomic routines for existence and proper execution on 1 byte ++ values with each valid memory model. */ ++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-compare-exchange-1.c */ ++/* { dg-do run } */ ++/* { dg-options "-minline-atomics" } */ ++ ++/* Test the execution of the __atomic_compare_exchange_n builtin for a char. */ ++ ++extern void abort(void); ++ ++char v = 0; ++char expected = 0; ++char max = ~0; ++char desired = ~0; ++char zero = 0; ++ ++#define STRONG 0 ++#define WEAK 1 ++ ++int ++main () ++{ ++ ++ if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != max) ++ abort (); ++ ++ if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != max) ++ abort (); ++ if (v != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ if (v != max) ++ abort (); ++ ++ /* Now test the generic version. */ ++ ++ v = 0; ++ ++ if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != max) ++ abort (); ++ ++ if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != max) ++ abort (); ++ if (v != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ if (v != max) ++ abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-6.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-6.c +new file mode 100644 +index 000000000..8fee8c448 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-6.c +@@ -0,0 +1,87 @@ ++/* Test __atomic routines for existence and proper execution on 2 byte ++ values with each valid memory model. */ ++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-compare-exchange-2.c */ ++/* { dg-do run } */ ++/* { dg-options "-minline-atomics" } */ ++ ++/* Test the execution of the __atomic_compare_exchange_n builtin for a short. */ ++ ++extern void abort(void); ++ ++short v = 0; ++short expected = 0; ++short max = ~0; ++short desired = ~0; ++short zero = 0; ++ ++#define STRONG 0 ++#define WEAK 1 ++ ++int ++main () ++{ ++ ++ if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != max) ++ abort (); ++ ++ if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != max) ++ abort (); ++ if (v != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ if (v != max) ++ abort (); ++ ++ /* Now test the generic version. */ ++ ++ v = 0; ++ ++ if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) ++ abort (); ++ if (expected != max) ++ abort (); ++ ++ if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != max) ++ abort (); ++ if (v != 0) ++ abort (); ++ ++ if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ ++ if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) ++ abort (); ++ if (expected != 0) ++ abort (); ++ if (v != max) ++ abort (); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-7.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-7.c +new file mode 100644 +index 000000000..24c344c0c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-7.c +@@ -0,0 +1,69 @@ ++/* Test __atomic routines for existence and proper execution on 1 byte ++ values with each valid memory model. */ ++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-exchange-1.c */ ++/* { dg-do run } */ ++/* { dg-options "-minline-atomics" } */ ++ ++/* Test the execution of the __atomic_exchange_n builtin for a char. */ ++ ++extern void abort(void); ++ ++char v, count, ret; ++ ++int ++main () ++{ ++ v = 0; ++ count = 0; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) != count) ++ abort (); ++ count++; ++ ++ /* Now test the generic version. */ ++ ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/riscv/inline-atomics-8.c b/gcc/testsuite/gcc.target/riscv/inline-atomics-8.c +new file mode 100644 +index 000000000..edc212df0 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/riscv/inline-atomics-8.c +@@ -0,0 +1,69 @@ ++/* Test __atomic routines for existence and proper execution on 2 byte ++ values with each valid memory model. */ ++/* Duplicate logic as libatomic/testsuite/libatomic.c/atomic-exchange-2.c */ ++/* { dg-do run } */ ++/* { dg-options "-minline-atomics" } */ ++ ++/* Test the execution of the __atomic_X builtin for a short. */ ++ ++extern void abort(void); ++ ++short v, count, ret; ++ ++int ++main () ++{ ++ v = 0; ++ count = 0; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) != count) ++ abort (); ++ count++; ++ ++ if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) != count) ++ abort (); ++ count++; ++ ++ /* Now test the generic version. */ ++ ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST); ++ if (ret != count - 1 || v != count) ++ abort (); ++ count++; ++ ++ return 0; ++} +diff --git a/libgcc/config/riscv/atomic.c b/libgcc/config/riscv/atomic.c +index 7007e7a20..a29909b97 100644 +--- a/libgcc/config/riscv/atomic.c ++++ b/libgcc/config/riscv/atomic.c +@@ -30,6 +30,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + #define INVERT "not %[tmp1], %[tmp1]\n\t" + #define DONT_INVERT "" + ++/* Logic duplicated in gcc/gcc/config/riscv/sync.md for use when inlining is enabled */ ++ + #define GENERATE_FETCH_AND_OP(type, size, opname, insn, invert, cop) \ + type __sync_fetch_and_ ## opname ## _ ## size (type *p, type v) \ + { \ +-- +2.33.0 + diff --git a/0001-CONFIG-Regenerate-configure-file.patch b/0003-CONFIG-Regenerate-configure-file.patch similarity index 95% rename from 0001-CONFIG-Regenerate-configure-file.patch rename to 0003-CONFIG-Regenerate-configure-file.patch index 9c18db4..c31c887 100644 --- a/0001-CONFIG-Regenerate-configure-file.patch +++ b/0003-CONFIG-Regenerate-configure-file.patch @@ -1,7 +1,7 @@ From 37ef787e743d98f9f6e53005d99709fb8e284964 Mon Sep 17 00:00:00 2001 From: eastb233 Date: Fri, 14 Jul 2023 11:07:05 +0800 -Subject: [PATCH 1/2] [CONFIG] Regenerate configure file +Subject: [PATCH 03/22] [CONFIG] Regenerate configure file Regenerate configure file under libquadmath directory since it is out of date. diff --git a/0002-libquadmath-Enable-libquadmath-on-kunpeng.patch b/0004-libquadmath-Enable-libquadmath-on-kunpeng.patch similarity index 98% rename from 0002-libquadmath-Enable-libquadmath-on-kunpeng.patch rename to 0004-libquadmath-Enable-libquadmath-on-kunpeng.patch index 3d68e78..0bd784e 100644 --- a/0002-libquadmath-Enable-libquadmath-on-kunpeng.patch +++ b/0004-libquadmath-Enable-libquadmath-on-kunpeng.patch @@ -1,7 +1,7 @@ From 52a810b4d8a725a7edb2988f6c3813a9938362a5 Mon Sep 17 00:00:00 2001 From: eastb233 Date: Fri, 14 Jul 2023 11:10:24 +0800 -Subject: [PATCH 2/2] [libquadmath] Enable libquadmath on kunpeng +Subject: [PATCH 04/22] [libquadmath] Enable libquadmath on kunpeng This enable libquadmath on kunpeng platform to convenient users that migrating from x86 platform. libquadmath uses "__float128" diff --git a/0006-MULL64-1-3-Add-A-B-op-CST-B-match-and-simplify-optim.patch b/0006-MULL64-1-3-Add-A-B-op-CST-B-match-and-simplify-optim.patch new file mode 100644 index 0000000..35dc803 --- /dev/null +++ b/0006-MULL64-1-3-Add-A-B-op-CST-B-match-and-simplify-optim.patch @@ -0,0 +1,89 @@ +From e7013d2640d82e928ebdaf830b6833051ac65296 Mon Sep 17 00:00:00 2001 +From: zhongyunde +Date: Sat, 5 Nov 2022 13:22:33 +0800 +Subject: [PATCH 06/22] [MULL64 1/3] Add A ? B op CST : B match and simplify + optimizations + + Refer to commit b6bdd7a4, use pattern match to simple + A ? B op CST : B (where CST is power of 2) simplifications. + Fixes the 1st issue of https://gitee.com/openeuler/gcc/issues/I5TSG0?from=project-issue. + + gcc/ + * match.pd (A ? B op CST : B): Add simplifcations for A ? B op POW2 : B + + gcc/testsuite/ + * gcc.dg/pr107190.c: New test. +--- + gcc/match.pd | 21 +++++++++++++++++++++ + gcc/testsuite/gcc.dg/pr107190.c | 27 +++++++++++++++++++++++++++ + 2 files changed, 48 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/pr107190.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index fc2833bbd..fd0857fc9 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4280,6 +4280,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++(if (canonicalize_math_p ()) ++/* These patterns are mostly used by PHIOPT to move some operations outside of ++ the if statements. They should be done late because it gives jump threading ++ and few other passes to reduce what is going on. */ ++/* a ? x op C : x -> x op (a << log2(C)) when C is power of 2. */ ++ (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate) ++ (simplify ++ (cond @0 (op:s @1 integer_pow2p@2) @1) ++ /* powerof2cst */ ++ (if (INTEGRAL_TYPE_P (type)) ++ (with { ++ tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); ++ } ++ (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; }))) ++ ) ++ ) ++ ) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c +new file mode 100644 +index 000000000..235b2761a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr107190.c +@@ -0,0 +1,27 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */ ++ ++# define BN_BITS4 32 ++# define BN_MASK2 (0xffffffffffffffffL) ++# define BN_MASK2l (0xffffffffL) ++# define BN_MASK2h (0xffffffff00000000L) ++# define BN_MASK2h1 (0xffffffff80000000L) ++# define LBITS(a) ((a)&BN_MASK2l) ++# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) ++# define L2HBITS(a) (((a)< +Date: Wed, 9 Nov 2022 17:04:13 +0800 +Subject: [PATCH 07/22] [MULL64 2/3] Fold series of instructions into mul + + Merge the low part of series instructions into mul + + gcc/ + * match.pd: Add simplifcations for low part of mul + * common.opt: Add new option fmerge-mull enable with -O2 + * opts.c: default_options_table + + gcc/testsuite/ + * g++.dg/tree-ssa/mull64.C: New test. +--- + gcc/common.opt | 4 +++ + gcc/match.pd | 27 ++++++++++++++++++++ + gcc/opts.cc | 1 + + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++ + 4 files changed, 66 insertions(+) + create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C + +diff --git a/gcc/common.opt b/gcc/common.opt +index 8a0dafc52..e365a48bc 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2126,6 +2126,10 @@ fmerge-debug-strings + Common Var(flag_merge_debug_strings) Init(1) + Attempt to merge identical debug strings across compilation units. + ++fmerge-mull ++Common Var(flag_merge_mull) Init(0) Optimization ++Attempt to merge series instructions into mul. ++ + fmessage-length= + Common RejectNegative Joined UInteger + -fmessage-length= Limit diagnostics to characters per line. 0 suppresses line-wrapping. +diff --git a/gcc/match.pd b/gcc/match.pd +index fd0857fc9..2092e6959 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4301,6 +4301,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP1 to fold some operations into more ++ simple IR. The following scenario should be matched: ++ In0Lo = In0(D) & 4294967295; ++ In0Hi = In0(D) >> 32; ++ In1Lo = In1(D) & 4294967295; ++ In1Hi = In1(D) >> 32; ++ Addc = In0Lo * In1Hi + In0Hi * In1Lo; ++ addc32 = Addc << 32; ++ ResLo = In0Lo * In1Lo + addc32 */ ++(simplify ++ (plus:c (mult @4 @5) ++ (lshift ++ (plus:c ++ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3)) ++ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2))) ++ INTEGER_CST@3 ++ ) ++ ) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) ++ && TYPE_PRECISION (type) == 64) ++ (mult (convert:type @0) (convert:type @1)) ++ ) ++) ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +diff --git a/gcc/opts.cc b/gcc/opts.cc +index a97630d1c..eae71ed20 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -647,6 +647,7 @@ static const struct default_options default_options_table[] = + VECT_COST_MODEL_VERY_CHEAP }, + { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, ++ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 }, + + /* -O2 and above optimizations, but not -Os or -Og. */ + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 }, +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +new file mode 100644 +index 000000000..2a3b74604 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */ ++ ++# define BN_BITS4 32 ++# define BN_MASK2 (0xffffffffffffffffL) ++# define BN_MASK2l (0xffffffffL) ++# define BN_MASK2h (0xffffffff00000000L) ++# define BN_MASK2h1 (0xffffffff80000000L) ++# define LBITS(a) ((a)&BN_MASK2l) ++# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) ++# define L2HBITS(a) (((a)< +Date: Fri, 11 Nov 2022 11:30:37 +0800 +Subject: [PATCH 08/22] [MULL64 3/3] Fold series of instructions into umulh + + Merge the high part of series instructions into umulh + + gcc/ + * match.pd: Add simplifcations for high part of umulh + + gcc/testsuite/ + * g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4 +--- + gcc/match.pd | 56 ++++++++++++++++++++++++++ + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 5 ++- + 2 files changed, 59 insertions(+), 2 deletions(-) + +diff --git a/gcc/match.pd b/gcc/match.pd +index 2092e6959..b7e3588e8 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4301,6 +4301,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP4 to move some operations outside of ++ the if statements. They should be done late because it gives jump threading ++ and few other passes to reduce what is going on. */ ++/* Mul64 is defined as a multiplication algorithm which compute two 64-bit ++ integers to one 128-bit integer. Try to match the high part of mul pattern ++ after the low part of mul pattern is simplified. The following scenario ++ should be matched: ++ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { ++ In0Lo = In0(D) & 4294967295; -- bit_and@4 SSA_NAME@0 @2 ++ In0Hi = In0(D) >> 32; -- rshift@5 SSA_NAME@0 @3 ++ In1Lo = In1(D) & 4294967295; -- bit_and@6 SSA_NAME@1 INTEGER_CST@2 ++ In1Hi = In1(D) >> 32; -- rshift@7 SSA_NAME@1 INTEGER_CST@3 ++ Mull_01 = In0Hi * In1Lo; -- mult@8 @5 @6 ++ Addc = In0Lo * In1Hi + Mull_01; -- plus@9 (mult (@4 @7) @8 ++ AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7)) ++ addc32 = Addc << 32; -- lshift@10 @9 @3 ++ ResLo = In0(D) * In1(D); -- mult @0 @1 ++ ResHi = ((long unsigned int) (addc32 > ResLo)) + ++ (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH; ++ } */ ++(simplify ++ (plus:c ++ (plus:c ++ (convert ++ (gt (lshift@10 @9 @3) ++ (mult:c @0 @1))) ++ (lshift ++ (convert ++ (gt @8 @9)) ++ @3)) ++ (plus:c@11 ++ (rshift ++ (plus:c@9 ++ (mult:c (bit_and@4 SSA_NAME@0 @2) @7) ++ (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) ++ @3) ++ (mult:c (rshift@5 SSA_NAME@0 @3) ++ (rshift@7 SSA_NAME@1 INTEGER_CST@3)) ++ ) ++ ) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type) ++ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) ++ && TYPE_PRECISION (type) == 64) ++ (with { ++ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); ++ tree shift = build_int_cst (integer_type_node, 64); ++ } ++ (convert:type (rshift ++ (mult (convert:i128_type @0) ++ (convert:i128_type @1)) ++ { shift; }))) ++ ) ++) ++#endif ++ + #if GIMPLE + /* These patterns are mostly used by FORWPROP1 to fold some operations into more + simple IR. The following scenario should be matched: +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +index 2a3b74604..f61cf5e6f 100644 +--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */ ++/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */ + + # define BN_BITS4 32 + # define BN_MASK2 (0xffffffffffffffffL) +@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1, + retHi = m11; + } + +-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */ ++/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */ ++/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */ +-- +2.33.0 + diff --git a/0009-MULL64-Disable-mull64-transformation-by-default.patch b/0009-MULL64-Disable-mull64-transformation-by-default.patch new file mode 100644 index 0000000..347ba0a --- /dev/null +++ b/0009-MULL64-Disable-mull64-transformation-by-default.patch @@ -0,0 +1,66 @@ +From 7c1f4425c680ea144d29bc55a1283d46444a2691 Mon Sep 17 00:00:00 2001 +From: eastb233 +Date: Wed, 7 Dec 2022 09:43:15 +0800 +Subject: [PATCH 09/22] [MULL64] Disable mull64 transformation by default + +This commit disables mull64 transformation by default since +it shows some runtime failure in workloads. + +This is a workaround fix for https://gitee.com/src-openeuler/gcc/issues/I64UQH +--- + gcc/match.pd | 2 +- + gcc/opts.cc | 1 - + gcc/testsuite/g++.dg/tree-ssa/mull64.C | 2 +- + gcc/testsuite/gcc.dg/pr107190.c | 2 +- + 4 files changed, 3 insertions(+), 4 deletions(-) + +diff --git a/gcc/match.pd b/gcc/match.pd +index b7e3588e8..6f24d5079 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -4290,7 +4290,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (simplify + (cond @0 (op:s @1 integer_pow2p@2) @1) + /* powerof2cst */ +- (if (INTEGRAL_TYPE_P (type)) ++ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)) + (with { + tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); + } +diff --git a/gcc/opts.cc b/gcc/opts.cc +index eae71ed20..a97630d1c 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -647,7 +647,6 @@ static const struct default_options default_options_table[] = + VECT_COST_MODEL_VERY_CHEAP }, + { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, +- { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 }, + + /* -O2 and above optimizations, but not -Os or -Og. */ + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 }, +diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +index f61cf5e6f..cad891e62 100644 +--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C ++++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */ ++/* { dg-options "-O2 -fmerge-mull -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */ + + # define BN_BITS4 32 + # define BN_MASK2 (0xffffffffffffffffL) +diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c +index 235b2761a..d1e72e5df 100644 +--- a/gcc/testsuite/gcc.dg/pr107190.c ++++ b/gcc/testsuite/gcc.dg/pr107190.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */ ++/* { dg-options "-O2 -fmerge-mull -fexpensive-optimizations -fdump-tree-phiopt2-details" } */ + + # define BN_BITS4 32 + # define BN_MASK2 (0xffffffffffffffffL) +-- +2.33.0 + diff --git a/0010-Version-Clear-DATESTAMP_s.patch b/0010-Version-Clear-DATESTAMP_s.patch new file mode 100644 index 0000000..e4c480f --- /dev/null +++ b/0010-Version-Clear-DATESTAMP_s.patch @@ -0,0 +1,26 @@ +From 8e8f783b02df155e3aafa94af6cc1f66604e08eb Mon Sep 17 00:00:00 2001 +From: eastb233 +Date: Fri, 21 Jul 2023 14:45:27 +0800 +Subject: [PATCH 10/22] [Version] Clear DATESTAMP_s + +--- + gcc/Makefile.in | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 31ff95500..db2a0e1bd 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -897,8 +897,7 @@ PATCHLEVEL_c := \ + # significant - do not remove it. + BASEVER_s := "\"$(BASEVER_c)\"" + DEVPHASE_s := "\"$(if $(DEVPHASE_c), ($(DEVPHASE_c)))\"" +-DATESTAMP_s := \ +- "\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\"" ++DATESTAMP_s := "\"\"" + PKGVERSION_s:= "\"@PKGVERSION@\"" + BUGURL_s := "\"@REPORT_BUGS_TO@\"" + +-- +2.33.0 + diff --git a/0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch b/0011-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch similarity index 97% rename from 0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch rename to 0011-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch index 34d0165..f3add4c 100644 --- a/0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch +++ b/0011-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch @@ -1,7 +1,7 @@ From 355eb8e20327242442d139fb052d3a3befde3dd7 Mon Sep 17 00:00:00 2001 From: "Cui,Lili" Date: Tue, 1 Nov 2022 09:16:49 +0800 -Subject: [PATCH] Add attribute hot judgement for INLINE_HINT_known_hot +Subject: [PATCH 11/22] Add attribute hot judgement for INLINE_HINT_known_hot hint. We set up INLINE_HINT_known_hot hint only when we have profile feedback, @@ -120,5 +120,5 @@ index 000000000..1f3be641c +/* { dg-final { scan-ipa-dump "known_hot" "inline" } } */ + -- -2.31.1 +2.33.0 diff --git a/0004-Enable-small-loop-unrolling-for-O2.patch b/0012-Enable-small-loop-unrolling-for-O2.patch similarity index 99% rename from 0004-Enable-small-loop-unrolling-for-O2.patch rename to 0012-Enable-small-loop-unrolling-for-O2.patch index 3913fcf..97ca890 100644 --- a/0004-Enable-small-loop-unrolling-for-O2.patch +++ b/0012-Enable-small-loop-unrolling-for-O2.patch @@ -1,7 +1,7 @@ From 1070bc24f53e851cae55320e26715cc594efcd2f Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Thu, 8 Sep 2022 16:52:02 +0800 -Subject: [PATCH] Enable small loop unrolling for O2 +Subject: [PATCH 12/22] Enable small loop unrolling for O2 Modern processors has multiple way instruction decoders For x86, icelake/zen3 has 5 uops, so for small loop with <= 4 @@ -486,5 +486,5 @@ index 0248fcc00..f75a847f7 100644 volatile int sink; -- -2.31.1 +2.33.0 diff --git a/0005-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch b/0013-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch similarity index 99% rename from 0005-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch rename to 0013-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch index 9e89306..6f89af0 100644 --- a/0005-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch +++ b/0013-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch @@ -1,7 +1,7 @@ From 96898a9cd8c159625848247bd2f3a09e5c12fcfa Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Sat, 19 Nov 2022 09:38:00 +0800 -Subject: [PATCH] i386: Only enable small loop unrolling in backend [PR +Subject: [PATCH 13/22] i386: Only enable small loop unrolling in backend [PR 107692] Followed by the discussion in pr107692, -munroll-only-small-loops @@ -226,5 +226,5 @@ index f75a847f7..7e2d869e1 100644 volatile int sink; -- -2.31.1 +2.33.0 diff --git a/0014-Array-widen-compare-Add-a-new-optimization-for-array.patch b/0014-Array-widen-compare-Add-a-new-optimization-for-array.patch new file mode 100644 index 0000000..182560e --- /dev/null +++ b/0014-Array-widen-compare-Add-a-new-optimization-for-array.patch @@ -0,0 +1,1981 @@ +From 5ef5f6c4ae806f56ff81450c759f36d59b5b23db Mon Sep 17 00:00:00 2001 +From: dingguangya +Date: Sat, 29 Jul 2023 17:45:01 +0800 +Subject: [PATCH 14/22] [Array-widen-compare] Add a new optimization for array + comparison scenarios + +Add option farray-widen-compare. +For an array pointer whose element is a single-byte type, +by changing the pointer type to a long-byte type, the elements +can be combined and compared after loading. +--- + gcc/Makefile.in | 1 + + gcc/common.opt | 5 + + gcc/doc/invoke.texi | 13 +- + gcc/passes.def | 1 + + .../gcc.dg/tree-ssa/awiden-compare-1.c | 19 + + .../gcc.dg/tree-ssa/awiden-compare-2.c | 90 + + .../gcc.dg/tree-ssa/awiden-compare-3.c | 22 + + .../gcc.dg/tree-ssa/awiden-compare-4.c | 22 + + .../gcc.dg/tree-ssa/awiden-compare-5.c | 19 + + .../gcc.dg/tree-ssa/awiden-compare-6.c | 19 + + .../gcc.dg/tree-ssa/awiden-compare-7.c | 22 + + .../gcc.dg/tree-ssa/awiden-compare-8.c | 24 + + gcc/timevar.def | 1 + + gcc/tree-pass.h | 1 + + gcc/tree-ssa-loop-array-widen-compare.cc | 1555 +++++++++++++++++ + 15 files changed, 1813 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c + create mode 100644 gcc/tree-ssa-loop-array-widen-compare.cc + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 31ff95500..0aabc6ea3 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1653,6 +1653,7 @@ OBJS = \ + tree-ssa-loop-ivopts.o \ + tree-ssa-loop-manip.o \ + tree-ssa-loop-niter.o \ ++ tree-ssa-loop-array-widen-compare.o \ + tree-ssa-loop-prefetch.o \ + tree-ssa-loop-split.o \ + tree-ssa-loop-unswitch.o \ +diff --git a/gcc/common.opt b/gcc/common.opt +index e365a48bc..4d91ce8cf 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1116,6 +1116,11 @@ fasynchronous-unwind-tables + Common Var(flag_asynchronous_unwind_tables) Optimization + Generate unwind tables that are exact at each instruction boundary. + ++farray-widen-compare ++Common Var(flag_array_widen_compare) Optimization ++Extends types for pointers to arrays to improve array comparsion performance. ++In some extreme situations this may result in unsafe behavior. ++ + fauto-inc-dec + Common Var(flag_auto_inc_dec) Init(1) Optimization + Generate auto-inc/dec instructions. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index ff8cd032f..a11e2c24b 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -507,7 +507,7 @@ Objective-C and Objective-C++ Dialects}. + -falign-loops[=@var{n}[:@var{m}:[@var{n2}[:@var{m2}]]]] @gol + -fno-allocation-dce -fallow-store-data-races @gol + -fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol +--fauto-inc-dec -fbranch-probabilities @gol ++-farray-widen-compare -fauto-inc-dec -fbranch-probabilities @gol + -fcaller-saves @gol + -fcombine-stack-adjustments -fconserve-stack @gol + -fcompare-elim -fcprop-registers -fcrossjumping @gol +@@ -11387,6 +11387,17 @@ This pass is always skipped on architectures that do not have + instructions to support this. Enabled by default at @option{-O1} and + higher on architectures that support this. + ++@item -farray-widen-compare ++@opindex farray-widen-compare ++In the narrow-byte array comparison scenario, the types of pointers ++pointing to array are extended so that elements of multiple bytes can ++be loaded at a time when a wide type is used to dereference an array, ++thereby improving the performance of this comparison scenario. In some ++extreme situations this may result in unsafe behavior. ++ ++This option may generate better or worse code; results are highly dependent ++on the structure of loops within the source code. ++ + @item -fdce + @opindex fdce + Perform dead code elimination (DCE) on RTL@. +diff --git a/gcc/passes.def b/gcc/passes.def +index 375d3d62d..8dbb7983e 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_dse); + NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */); + NEXT_PASS (pass_phiopt, true /* early_p */); ++ NEXT_PASS (pass_array_widen_compare); + NEXT_PASS (pass_tail_recursion); + NEXT_PASS (pass_if_to_switch); + NEXT_PASS (pass_convert_switch); +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c +new file mode 100644 +index 000000000..e18ef5ec1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (++len != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c +new file mode 100644 +index 000000000..f4b20b43c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c +@@ -0,0 +1,90 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define EMPTY_HASH_VALUE 0 ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++#define true 1 ++ ++typedef struct { ++ uint32_t len; ++ uint32_t dist; ++} lzma_match; ++ ++ ++lzma_match * ++func ( ++ const uint32_t len_limit, ++ const uint32_t pos, ++ const uint8_t *const cur, ++ uint32_t cur_match, ++ uint32_t depth, ++ uint32_t *const son, ++ const uint32_t cyclic_pos, ++ const uint32_t cyclic_size, ++ lzma_match *matches, ++ uint32_t len_best) ++{ ++ uint32_t *ptr0 = son + (cyclic_pos << 1) + 1; ++ uint32_t *ptr1 = son + (cyclic_pos << 1); ++ ++ uint32_t len0 = 0; ++ uint32_t len1 = 0; ++ ++ while (true) ++ { ++ const uint32_t delta = pos - cur_match; ++ if (depth-- == 0 || delta >= cyclic_size) ++ { ++ *ptr0 = EMPTY_HASH_VALUE; ++ *ptr1 = EMPTY_HASH_VALUE; ++ return matches; ++ } ++ ++ uint32_t *const pair = son + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0)) << 1); ++ ++ const uint8_t *const pb = cur -delta; ++ uint32_t len = my_min(len0, len1); ++ ++ if (pb[len] == cur[len]) ++ { ++ while (++len != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ ++ if (len_best < len) ++ { ++ len_best = len; ++ matches->len = len; ++ matches->dist = delta - 1; ++ ++matches; ++ ++ if (len == len_limit) ++ { ++ *ptr1 = pair[0]; ++ *ptr0 = pair[1]; ++ return matches; ++ } ++ } ++ } ++ ++ if (pb[len] < cur[len]) ++ { ++ *ptr1 = cur_match; ++ ptr1 = pair + 1; ++ cur_match = *ptr1; ++ len1 = len; ++ } ++ else ++ { ++ *ptr0 = cur_match; ++ ptr0 = pair; ++ cur_match = *ptr0; ++ len0 = len; ++ } ++ } ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c +new file mode 100644 +index 000000000..86f5e7a1e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len != len_limit) ++ { ++ if (pb[len] != cur[len]) ++ break; ++ len = len + 1; ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c +new file mode 100644 +index 000000000..d66558699 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len != len_limit) ++ { ++ if (pb[len] != cur[len]) ++ break; ++ len = len + 2; ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c +new file mode 100644 +index 000000000..e3e12bca4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (++len != len_limit) ++ if (pb[len] != cur[len-1]) ++ break; ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c +new file mode 100644 +index 000000000..b8500735e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len++ != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c +new file mode 100644 +index 000000000..977bf5685 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (len != len_limit) ++ { ++ len = len + 1; ++ if (pb[len] != cur[len]) ++ break; ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c +new file mode 100644 +index 000000000..386784c92 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c +@@ -0,0 +1,24 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */ ++ ++#include ++#include ++ ++#define my_min(x, y) ((x) < (y) ? (x) : (y)) ++ ++uint32_t ++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur) ++{ ++ uint32_t len = my_min(len0, len1); ++ while (++len != len_limit) ++ { ++ if (pb[len] != cur[len]) ++ { ++ len = len - 1; ++ break; ++ } ++ } ++ return len; ++} ++ ++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */ +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 2dae5e1c7..794b8017d 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -216,6 +216,7 @@ DEFTIMEVAR (TV_TREE_NRV , "tree NRV optimization") + DEFTIMEVAR (TV_TREE_COPY_RENAME , "tree rename SSA copies") + DEFTIMEVAR (TV_TREE_SSA_VERIFY , "tree SSA verifier") + DEFTIMEVAR (TV_TREE_STMT_VERIFY , "tree STMT verifier") ++DEFTIMEVAR (TV_TREE_ARRAY_WIDEN_COMPARE, "tree array widen compare") + DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion") + DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering") + DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 606d1d60b..55ee2fe7f 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -453,6 +453,7 @@ extern gimple_opt_pass *make_pass_cselim (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_phiopt (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_forwprop (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_phiprop (gcc::context *ctxt); ++extern gimple_opt_pass *make_pass_array_widen_compare (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_tree_ifcombine (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_dse (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_nrv (gcc::context *ctxt); +diff --git a/gcc/tree-ssa-loop-array-widen-compare.cc b/gcc/tree-ssa-loop-array-widen-compare.cc +new file mode 100644 +index 000000000..ba6170fa0 +--- /dev/null ++++ b/gcc/tree-ssa-loop-array-widen-compare.cc +@@ -0,0 +1,1555 @@ ++/* Array widen compare. ++ Copyright (C) 2022-2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "target.h" ++#include "tree.h" ++#include "gimple.h" ++#include "tree-pass.h" ++#include "gimple-ssa.h" ++#include "tree-pretty-print.h" ++#include "fold-const.h" ++#include "gimplify.h" ++#include "gimple-iterator.h" ++#include "tree-ssa-loop-manip.h" ++#include "tree-ssa-loop.h" ++#include "ssa.h" ++#include "tree-into-ssa.h" ++#include "cfganal.h" ++#include "cfgloop.h" ++#include "gimple-pretty-print.h" ++#include "tree-cfg.h" ++#include "cgraph.h" ++#include "print-tree.h" ++#include "cfghooks.h" ++#include "gimple-fold.h" ++ ++/* This pass handles scenarios similar to the following: ++ ++ uint32_t ++ func (uint32_t len0, uint32_t len1, const uint32_t len_limit, ++ const uint8_t *const pb, const uint8_t *const cur) ++ { ++ uint32_t len = my_min (len0, len1); ++ while (++len != len_limit) ++ if (pb[len] != cur[len]) ++ break; ++ return len; ++ } ++ ++ Features of this type of loop: ++ 1) the loop has two exits; ++ 2) One of the exits comes from the comparison result of the array; ++ ++ From the source code point of view, the pass completes the conversion of the ++ above scenario into: ++ ++ uint32_t ++ func (uint32_t len0, uint32_t len1, const uint32_t len_limit, ++ const uint8_t *const pb, const uint8_t *const cur) ++ { ++ uint32_t len = my_min (len0, len1); ++ // align_loop ++ for(++len; len + sizeof(uint64_t) <= len_limit; len += sizeof (uint64_t)) ++ { ++ uint64_t a = *((uint64_t*)(cur+len)); ++ uint64_t b = *((uint64_t*)(pb+len)); ++ if (a != b) ++ { ++ int lz = __builtin_ctzll (a ^ b); ++ len += lz / 8; ++ return len; ++ } ++ } ++ // epilogue_loop ++ for (;len != len_limit; ++len) ++ if (pb[len] != cur[len]) ++ break; ++ return len; ++ } ++ ++ This pass is to complete the conversion of such scenarios from the internal ++ perspective of the compiler: ++ 1) determine_loop_form: The function completes the screening of such ++ scenarios; ++ 2) convert_to_new_loop: The function completes the conversion of ++ origin_loop to new loops, and removes origin_loop; ++ 3) origin_loop_info: The structure is used to record important information ++ of origin_loop: such as loop exit, growth step size ++ of loop induction variable, initial value ++ of induction variable, etc; ++ 4) create_new_loops: The function is used as the key content of the pass ++ to complete the creation of new loops. */ ++ ++/* The useful information of origin loop. */ ++ ++struct origin_loop_info ++{ ++ tree base; /* The initial index of the array in the old loop. */ ++ tree limit; /* The limit index of the array in the old loop. */ ++ tree arr1; /* Array 1 in the old loop. */ ++ tree arr2; /* Array 2 in the old loop. */ ++ edge entry_edge; /* The edge into the old loop. */ ++ basic_block exit_bb1; ++ basic_block exit_bb2; ++ edge exit_e1; ++ edge exit_e2; ++ gimple *cond_stmt1; ++ gimple *cond_stmt2; ++ gimple *update_stmt; ++ bool exist_prolog_assgin; ++ /* Whether the marker has an initial value assigned ++ to the array index. */ ++ unsigned HOST_WIDE_INT step; ++ /* The growth step of the loop induction variable. */ ++}; ++ ++typedef struct origin_loop_info origin_loop_info; ++ ++static origin_loop_info origin_loop; ++hash_map defs_map; ++ ++/* Dump the bb information in a loop. */ ++ ++static void ++dump_loop_bb (struct loop *loop) ++{ ++ basic_block *body = get_loop_body_in_dom_order (loop); ++ basic_block bb = NULL; ++ ++ for (unsigned i = 0; i < loop->num_nodes; i++) ++ { ++ bb = body[i]; ++ if (bb->loop_father != loop) ++ { ++ continue; ++ } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "===== the %dth bb of loop ==========:\n", i); ++ gimple_dump_bb (dump_file, bb, 0, dump_flags); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ free (body); ++} ++ ++/* Return true if the loop has precisely one backedge. */ ++ ++static bool ++loop_single_backedge_p (class loop *loop) ++{ ++ basic_block latch = loop->latch; ++ if (!single_succ_p (latch)) ++ return false; ++ ++ edge e = single_succ_edge (latch); ++ edge backedge = find_edge (latch, loop->header); ++ ++ if (e != backedge) ++ return false; ++ ++ return true; ++} ++ ++/* Return true if the loop has precisely one preheader BB. */ ++ ++static bool ++loop_single_preheader_bb (class loop *loop) ++{ ++ basic_block header = loop->header; ++ if (EDGE_COUNT (header->preds) != 2) ++ return false; ++ ++ edge e1 = EDGE_PRED (header, 0); ++ edge e2 = EDGE_PRED (header, 1); ++ ++ if ((e1->src == loop->latch && e2->src->loop_father != loop) ++ || (e2->src == loop->latch && e1->src->loop_father != loop)) ++ return true; ++ ++ return false; ++} ++ ++/* Initialize the origin_loop structure. */ ++static void ++init_origin_loop_structure () ++{ ++ origin_loop.base = NULL; ++ origin_loop.limit = NULL; ++ origin_loop.arr1 = NULL; ++ origin_loop.arr2 = NULL; ++ origin_loop.exit_e1 = NULL; ++ origin_loop.exit_e2 = NULL; ++ origin_loop.exit_bb1 = NULL; ++ origin_loop.exit_bb2 =NULL; ++ origin_loop.entry_edge = NULL; ++ origin_loop.cond_stmt1 = NULL; ++ origin_loop.cond_stmt2 = NULL; ++ origin_loop.update_stmt = NULL; ++ origin_loop.exist_prolog_assgin = false; ++ origin_loop.step = 0; ++} ++ ++/* Get the edge that first entered the loop. */ ++ ++static edge ++get_loop_preheader_edge (class loop *loop) ++{ ++ edge e; ++ edge_iterator ei; ++ ++ FOR_EACH_EDGE (e, ei, loop->header->preds) ++ if (e->src != loop->latch) ++ break; ++ ++ if (!e) ++ { ++ gcc_assert (!loop_outer (loop)); ++ return single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); ++ } ++ ++ return e; ++} ++ ++/* Make sure the exit condition stmt satisfies a specific form. */ ++ ++static bool ++check_cond_stmt (gimple *stmt) ++{ ++ if (!stmt) ++ return false; ++ if (gimple_code (stmt) != GIMPLE_COND) ++ return false; ++ ++ if (gimple_cond_code (stmt) != NE_EXPR && gimple_cond_code (stmt) != EQ_EXPR) ++ return false; ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ ++ /* The parameter that does not support the cond statement is not SSA_NAME. ++ eg: if (len_1 != 100). */ ++ if (TREE_CODE (lhs) != SSA_NAME || TREE_CODE (rhs) != SSA_NAME) ++ return false; ++ ++ return true; ++} ++ ++/* Record the exit information in the original loop including exit edge, ++ exit bb block, exit condition stmt, ++ eg: exit_eX origin_exit_bbX cond_stmtX. */ ++ ++static bool ++record_origin_loop_exit_info (class loop *loop) ++{ ++ bool found = false; ++ edge e = NULL; ++ unsigned i = 0; ++ gimple *stmt; ++ ++ if (origin_loop.exit_e1 != NULL || origin_loop.exit_bb1 != NULL ++ || origin_loop.exit_e2 != NULL || origin_loop.exit_bb2 != NULL ++ || origin_loop.cond_stmt1 != NULL || origin_loop.cond_stmt2 != NULL) ++ return false; ++ ++ vec exit_edges = get_loop_exit_edges (loop); ++ if (exit_edges == vNULL) ++ return false; ++ ++ if (exit_edges.length () != 2) ++ goto fail; ++ ++ FOR_EACH_VEC_ELT (exit_edges, i, e) ++ { ++ if (e->src == loop->header) ++ { ++ origin_loop.exit_e1 = e; ++ origin_loop.exit_bb1 = e->dest; ++ stmt = gsi_stmt (gsi_last_bb (e->src)); ++ if (check_cond_stmt (stmt)) ++ origin_loop.cond_stmt1 = stmt; ++ } ++ else ++ { ++ origin_loop.exit_e2 = e; ++ origin_loop.exit_bb2 = e->dest; ++ stmt = gsi_stmt (gsi_last_bb (e->src)); ++ if (check_cond_stmt (stmt)) ++ origin_loop.cond_stmt2 = stmt; ++ } ++ } ++ ++ if (origin_loop.exit_e1 != NULL && origin_loop.exit_bb1 != NULL ++ && origin_loop.exit_e2 != NULL && origin_loop.exit_bb2 != NULL ++ && origin_loop.cond_stmt1 != NULL && origin_loop.cond_stmt2 != NULL) ++ found = true; ++ ++fail: ++ exit_edges.release (); ++ return found; ++} ++ ++/* Returns true if t is SSA_NAME and user variable exists. */ ++ ++static bool ++ssa_name_var_p (tree t) ++{ ++ if (!t || TREE_CODE (t) != SSA_NAME) ++ return false; ++ if (SSA_NAME_VAR (t)) ++ return true; ++ return false; ++} ++ ++/* Returns true if t1 and t2 are SSA_NAME and belong to the same variable. */ ++ ++static bool ++same_ssa_name_var_p (tree t1, tree t2) ++{ ++ if (!ssa_name_var_p (t1) || !ssa_name_var_p (t2)) ++ return false; ++ if (SSA_NAME_VAR (t1) == SSA_NAME_VAR (t2)) ++ return true; ++ return false; ++} ++ ++/* Get origin loop induction variable upper bound. */ ++ ++static bool ++get_iv_upper_bound (gimple *stmt) ++{ ++ if (origin_loop.limit != NULL) ++ return false; ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ ++ if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (rhs)) != INTEGER_TYPE) ++ return false; ++ ++ gimple *g = SSA_NAME_DEF_STMT (rhs); ++ ++ /* TODO: Currently, the input restrictions on lhs and rhs are implemented ++ through PARM_DECL. We may consider releasing the restrictions later, and ++ we need to consider the overall adaptation scenario and adding test ++ cases. */ ++ if (ssa_name_var_p (rhs) && TREE_CODE (SSA_NAME_VAR (rhs)) == PARM_DECL ++ && g && gimple_code (g) == GIMPLE_NOP ++ && (ssa_name_var_p (lhs) && TREE_CODE (SSA_NAME_VAR (lhs)) != PARM_DECL)) ++ { ++ origin_loop.limit = rhs; ++ } ++ else ++ return false; ++ ++ if (origin_loop.limit != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* Returns true only when the expression on the rhs code of stmt is PLUS_EXPR, ++ rhs1 is SSA_NAME with the same var as origin_loop base, and rhs2 is ++ INTEGER_CST. */ ++ ++static bool ++check_update_stmt (gimple *stmt) ++{ ++ if (!stmt) ++ return false; ++ ++ if (gimple_assign_rhs_code (stmt) == PLUS_EXPR) ++ { ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST ++ && same_ssa_name_var_p (rhs1, origin_loop.base)) ++ { ++ origin_loop.step = tree_to_uhwi (rhs2); ++ if (origin_loop.step == 1) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/* Get origin loop induction variable initial value. */ ++ ++static bool ++get_iv_base (gimple *stmt) ++{ ++ tree lhs = gimple_cond_lhs (stmt); ++ if (origin_loop.base != NULL || origin_loop.update_stmt != NULL) ++ return false; ++ ++ basic_block header = gimple_bb (stmt); ++ ++ gphi_iterator gsi; ++ edge e; ++ edge_iterator ei; ++ tree iv_after; ++ ++ for (gsi = gsi_start_phis (header); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gphi *phi = gsi.phi (); ++ tree res = gimple_phi_result (phi); ++ if (!same_ssa_name_var_p (res, lhs)) ++ continue; ++ tree base = PHI_ARG_DEF_FROM_EDGE (phi, origin_loop.entry_edge); ++ if (!same_ssa_name_var_p (base, lhs)) ++ return false; ++ origin_loop.base = base; ++ FOR_EACH_EDGE (e, ei, header->preds) ++ { ++ if (e != origin_loop.entry_edge) ++ { ++ iv_after = PHI_ARG_DEF_FROM_EDGE (phi, e); ++ gimple *update = SSA_NAME_DEF_STMT (iv_after); ++ if (!check_update_stmt (update)) ++ return false; ++ origin_loop.update_stmt = update; ++ if (gimple_bb (update) == header && iv_after == lhs) ++ origin_loop.exist_prolog_assgin = true; ++ } ++ } ++ } ++ ++ if (origin_loop.base != NULL && origin_loop.update_stmt != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* Record the upper bound and initial value of the induction variable in the ++ original loop; When prolog_assign is present, make sure loop header is in ++ simple form; And the interpretation of prolog_assign is as follows: ++ eg: while (++len != limit) ++ ...... ++ For such a loop, ++len will be processed before entering header_bb, and the ++ assign is regarded as the prolog_assign of the loop. */ ++ ++static bool ++record_origin_loop_header (class loop *loop) ++{ ++ basic_block header = loop->header; ++ ++ if (origin_loop.entry_edge != NULL || origin_loop.base != NULL ++ || origin_loop.update_stmt != NULL || origin_loop.limit != NULL) ++ return false; ++ origin_loop.entry_edge = get_loop_preheader_edge (loop); ++ ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ ++ for (gsi = gsi_last_bb (header); !gsi_end_p (gsi); gsi_prev (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt && is_gimple_debug (stmt)) ++ continue; ++ if (stmt && gimple_code (stmt) == GIMPLE_COND) ++ { ++ if (!get_iv_upper_bound (stmt)) ++ return false; ++ if (!get_iv_base (stmt)) ++ return false; ++ } ++ else if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN) ++ { ++ if (stmt != origin_loop.update_stmt || !origin_loop.exist_prolog_assgin) ++ return false; ++ } ++ else ++ return false; ++ } ++ ++ if (origin_loop.entry_edge != NULL && origin_loop.base != NULL ++ && origin_loop.update_stmt != NULL && origin_loop.limit != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* When prolog_assign does not exist, make sure that update_stmt exists in the ++ loop latch, and its form is a specific form, eg: ++ len_2 = len_1 + 1. */ ++ ++static bool ++record_origin_loop_latch (class loop *loop) ++{ ++ basic_block latch = loop->latch; ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ ++ gsi = gsi_start_bb (latch); ++ ++ if (origin_loop.exist_prolog_assgin) ++ { ++ if (gsi_end_p (gsi)) ++ return true; ++ } ++ else ++ { ++ if (gsi_one_before_end_p (gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt == origin_loop.update_stmt) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/* Returns true when the DEF_STMT corresponding to arg0 of the mem_ref tree ++ satisfies the POINTER_PLUS_EXPR type. */ ++ ++static bool ++check_body_mem_ref (tree mem_ref) ++{ ++ tree arg0 = TREE_OPERAND (mem_ref , 0); ++ tree arg1 = TREE_OPERAND (mem_ref , 1); ++ ++ if (TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE ++ && TREE_CODE (arg1) == INTEGER_CST ++ && tree_to_uhwi (arg1) == 0) ++ { ++ gimple *tmp_g = SSA_NAME_DEF_STMT (arg0); ++ if (tmp_g && gimple_assign_rhs_code (tmp_g) == POINTER_PLUS_EXPR) ++ return true; ++ } ++ return false; ++} ++ ++/* Returns true if the rh2 of the current stmt comes from the base in the ++ original loop. */ ++ ++static bool ++check_body_pointer_plus (gimple *stmt, tree &tmp_index) ++{ ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (TREE_TYPE (rhs1)) == POINTER_TYPE) ++ { ++ gimple *g = SSA_NAME_DEF_STMT (rhs2); ++ if (g && gimple_assign_rhs_code (g) == NOP_EXPR) ++ { ++ tree nop_rhs = gimple_assign_rhs1 (g); ++ if (same_ssa_name_var_p (nop_rhs, origin_loop.base)) ++ { ++ if (!origin_loop.arr1) ++ { ++ origin_loop.arr1 = rhs1; ++ tmp_index = rhs2; ++ } ++ else if (!origin_loop.arr2) ++ { ++ origin_loop.arr2 = rhs1; ++ if (tmp_index != rhs2) ++ return false; ++ } ++ else ++ return false; ++ return true; ++ } ++ } ++ } ++ return false; ++} ++ ++/* Record the array comparison information in the original loop, while ensuring ++ that there are only statements related to cont_stmt in the loop body. */ ++ ++static bool ++record_origin_loop_body (class loop *loop) ++{ ++ basic_block body = gimple_bb (origin_loop.cond_stmt2); ++ ++ if (origin_loop.arr1 != NULL || origin_loop.arr2 != NULL) ++ return false; ++ ++ gimple_stmt_iterator gsi; ++ for (gsi = gsi_start_bb (body); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple_set_visited (gsi_stmt (gsi), false); ++ } ++ ++ tree cond_lhs = gimple_cond_lhs (origin_loop.cond_stmt2); ++ tree cond_rhs = gimple_cond_rhs (origin_loop.cond_stmt2); ++ if (TREE_CODE (TREE_TYPE (cond_lhs)) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (cond_rhs)) != INTEGER_TYPE) ++ return false; ++ ++ auto_vec stack; ++ tree tmp_index = NULL; ++ stack.safe_push (cond_lhs); ++ stack.safe_push (cond_rhs); ++ gimple_set_visited (origin_loop.cond_stmt2, true); ++ ++ while (!stack.is_empty ()) ++ { ++ tree op = stack.pop (); ++ gimple *g = SSA_NAME_DEF_STMT (op); ++ if (!g || gimple_bb (g) != body || !is_gimple_assign (g)) ++ continue; ++ gimple_set_visited (g, true); ++ if (gimple_assign_rhs_code (g) == MEM_REF) ++ { ++ tree mem_ref = gimple_assign_rhs1 (g); ++ if (!check_body_mem_ref (mem_ref)) ++ return false; ++ stack.safe_push (TREE_OPERAND (mem_ref , 0)); ++ } ++ else if (gimple_assign_rhs_code (g) == POINTER_PLUS_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (g); ++ if (!check_body_pointer_plus (g, tmp_index)) ++ return false; ++ stack.safe_push (rhs2); ++ } ++ else if (gimple_assign_rhs_code (g) == NOP_EXPR) ++ { ++ tree rhs = gimple_assign_rhs1 (g); ++ if (!same_ssa_name_var_p (rhs, origin_loop.base)) ++ return false; ++ stack.safe_push (rhs); ++ } ++ else ++ return false; ++ } ++ bool allvisited = true; ++ for (gsi = gsi_start_bb (body); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ if (!gimple_visited_p (gsi_stmt (gsi)) ++ && !is_gimple_debug (gsi_stmt (gsi))) ++ allvisited = false; ++ } ++ if (allvisited) ++ { ++ if (origin_loop.arr1 != NULL && origin_loop.arr2 != NULL) ++ return true; ++ } ++ return false; ++} ++ ++/* Dump the original loop information to see if the origin loop ++ form matches. */ ++ ++static void ++dump_origin_loop_info () ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nThe origin loop info:\n"); ++ fprintf (dump_file, "\n the origin_loop.limit is:\n"); ++ print_node (dump_file, "", origin_loop.limit, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.base is:\n"); ++ print_node (dump_file, "", origin_loop.base, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.arr1 is:\n"); ++ print_node (dump_file, "", origin_loop.arr1, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.arr2 is:\n"); ++ print_node (dump_file, "", origin_loop.arr2, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.cond_stmt1 is:\n"); ++ print_gimple_stmt (dump_file, origin_loop.cond_stmt1, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.cond_stmt2 is:\n"); ++ print_gimple_stmt (dump_file, origin_loop.cond_stmt2, 0); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "\n the origin_loop.update_stmt is:\n"); ++ print_gimple_stmt (dump_file, origin_loop.update_stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++} ++ ++/* Returns true only if the exit bb of the original loop is unique and its phi ++ node parameter comes from the same variable. */ ++ ++static bool ++check_exit_bb (class loop *loop) ++{ ++ if (origin_loop.exit_bb1 != origin_loop.exit_bb2 ++ || flow_bb_inside_loop_p (loop, origin_loop.exit_bb1)) ++ return false; ++ ++ gphi_iterator gsi; ++ for (gsi = gsi_start_phis (origin_loop.exit_bb1); !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ gphi *phi = gsi.phi (); ++ tree res = gimple_phi_result (phi); ++ if (!same_ssa_name_var_p (res, origin_loop.base)) ++ continue; ++ if (gimple_phi_num_args (phi) == 2) ++ { ++ tree arg0 = gimple_phi_arg_def (phi, 0); ++ tree arg1 = gimple_phi_arg_def (phi, 1); ++ if (arg0 == arg1) ++ return true; ++ } ++ } ++ return false; ++} ++ ++/* Make sure that the recorded origin_loop information meets the ++ relative requirements. */ ++ ++static bool ++check_origin_loop_info (class loop *loop) ++{ ++ dump_origin_loop_info (); ++ tree arr1_elem_size, arr2_elem_size; ++ ++ if (!check_exit_bb (loop)) ++ return false; ++ ++ if (TREE_CODE (origin_loop.base) != SSA_NAME) ++ return false; ++ ++ if (!TYPE_READONLY (TREE_TYPE (origin_loop.limit))) ++ return false; ++ ++ if (!TYPE_READONLY (TREE_TYPE (TREE_TYPE (origin_loop.arr1)))) ++ return false; ++ ++ if (!TYPE_READONLY (TREE_TYPE (TREE_TYPE (origin_loop.arr2)))) ++ return false; ++ ++ if (TREE_CODE (TREE_TYPE (origin_loop.arr1)) != POINTER_TYPE ++ || TREE_CODE (TREE_TYPE (origin_loop.arr2)) != POINTER_TYPE ++ || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr1))) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr2))) != INTEGER_TYPE) ++ return false; ++ ++ arr1_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr1))); ++ arr2_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr2))); ++ ++ if (tree_to_uhwi (arr1_elem_size) != 8 || tree_to_uhwi (arr2_elem_size) != 8) ++ return false; ++ ++ return true; ++} ++ ++/* Record the useful information of the original loop and judge whether the ++ information meets the specified conditions. */ ++ ++static bool ++check_record_loop_form (class loop *loop) ++{ ++ if (!record_origin_loop_exit_info (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop exit information.\n"); ++ } ++ return false; ++ } ++ ++ if (!record_origin_loop_header (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop header information.\n"); ++ } ++ return false; ++ } ++ ++ if (!record_origin_loop_latch (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop latch information.\n"); ++ } ++ return false; ++ } ++ ++ if (!record_origin_loop_body (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to record loop body information.\n"); ++ } ++ return false; ++ } ++ ++ if (!check_origin_loop_info (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nFailed to check origin loop information.\n"); ++ } ++ return false; ++ } ++ ++ return true; ++} ++ ++/* The main entry for judging whether the loop meets some conditions. */ ++ ++static bool ++determine_loop_form (class loop *loop) ++{ ++ /* Currently only standard loops are processed, that is, only loop_header, ++ loop_latch, loop_body 3 bb blocks are included. */ ++ if (loop->inner || loop->num_nodes != 3) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, there is inner loop or" ++ "redundant bb.\n"); ++ } ++ return false; ++ } ++ ++ if (single_exit (loop) || !loop->latch) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, only one exit or loop_latch" ++ "does not exist.\n"); ++ } ++ return false; ++ } ++ ++ /* Support loop with only one backedge. */ ++ if (!loop_single_backedge_p (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, loop back edges are not" ++ "unique.\n"); ++ } ++ return false; ++ } ++ ++ /* Support loop with only one preheader BB. */ ++ if (!loop_single_preheader_bb (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nWrong loop form, loop preheader bb are not" ++ "unique.\n"); ++ } ++ return false; ++ } ++ ++ init_origin_loop_structure (); ++ if (!check_record_loop_form (loop)) ++ return false; ++ ++ return true; ++} ++ ++/* Create prolog bb for newly constructed loop; When prolog_assign exists in ++ the original loop, the corresponding assign needs to be added to prolog_bb; ++ eg: ++ len_16 = len_10 + 1 ++ Create simple copy statement when prolog_assign does not exist; ++ eg: ++ len_16 = len_10 ++ ++ The IR of bb is as above. */ ++ ++static void ++create_prolog_bb (basic_block &prolog_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer, edge entry_edge) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree lhs1; ++ ++ prolog_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (prolog_bb, outer); ++ redirect_edge_and_branch (entry_edge, prolog_bb); ++ set_immediate_dominator (CDI_DOMINATORS, prolog_bb, dominator_bb); ++ gsi = gsi_last_bb (prolog_bb); ++ lhs1 = copy_ssa_name (origin_loop.base); ++ ++ if (origin_loop.exist_prolog_assgin) ++ g = gimple_build_assign (lhs1, PLUS_EXPR, origin_loop.base, ++ build_int_cst (TREE_TYPE (origin_loop.base), origin_loop.step)); ++ else ++ g = gimple_build_assign (lhs1, NOP_EXPR, origin_loop.base); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ set_current_def (origin_loop.base, lhs1); ++ defs_map.put (prolog_bb, lhs1); ++} ++ ++/* Create preheader bb for new loop; In order to ensure the standard form of ++ the loop, add a preheader_bb before loop_header. */ ++ ++static void ++create_loop_pred_bb (basic_block &loop_pred_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ loop_pred_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (loop_pred_bb, outer); ++ set_immediate_dominator (CDI_DOMINATORS, loop_pred_bb, dominator_bb); ++ defs_map.put (loop_pred_bb, get_current_def (origin_loop.base)); ++} ++ ++/* Add phi_arg for bb with phi node. */ ++ ++static void ++rewrite_add_phi_arg (basic_block bb) ++{ ++ edge e; ++ edge_iterator ei; ++ gphi *phi; ++ gphi_iterator gsi; ++ tree res; ++ location_t loc; ++ ++ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ phi = gsi.phi (); ++ res = gimple_phi_result (phi); ++ ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ { ++ if (PHI_ARG_DEF_FROM_EDGE (phi, e)) ++ continue; ++ tree var = *(defs_map.get (e->src)); ++ if (!same_ssa_name_var_p (var, res)) ++ continue; ++ if (virtual_operand_p (var)) ++ loc = UNKNOWN_LOCATION; ++ else ++ loc = gimple_location (SSA_NAME_DEF_STMT (var)); ++ add_phi_arg (phi, var, e, loc); ++ } ++ } ++} ++ ++/* Create loop_header BB for align_loop. ++ eg: ++ _18 = (long unsigned int) len_17; ++ _19 = _18 + 8; ++ _20 = (long unsigned int) len_limit_12 (D); ++ if (_19 <= _20) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_header (basic_block &align_loop_header, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gcond *cond_stmt; ++ gphi *phi; ++ tree res; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ align_loop_header = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_header, outer); ++ make_single_succ_edge (after_bb, align_loop_header, EDGE_FALLTHRU); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_header, dominator_bb); ++ gsi = gsi_last_bb (align_loop_header); ++ phi = create_phi_node (NULL_TREE, align_loop_header); ++ create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi)); ++ res = gimple_phi_result (phi); ++ ++ tree lhs1 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node, res); ++ tree lhs2 = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (lhs1), lhs1, ++ build_int_cst (TREE_TYPE (lhs1), 8)); ++ tree lhs3 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node, ++ origin_loop.limit); ++ cond_stmt = gimple_build_cond (LE_EXPR, lhs2, lhs3, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ ++ set_current_def (origin_loop.base, res); ++ defs_map.put (align_loop_header, res); ++} ++ ++/* Create loop body BB for align_loop. ++ eg: ++ _21 = (sizetype) len_17; ++ _22 = cur_15 (D) + _21; ++ _23 = MEM[(long unsigned int *)_22]; ++ _24 = pb_13 (D) + _21; ++ _25 = MEM[(long unsigned int *)_24]; ++ if (_23 != _25) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_body_bb (basic_block &align_loop_body_bb, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gcond *cond_stmt; ++ tree lhs1, lhs2; ++ ++ align_loop_body_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_body_bb, outer); ++ make_edge (after_bb, align_loop_body_bb, EDGE_TRUE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_body_bb, dominator_bb); ++ gsi = gsi_last_bb (align_loop_body_bb); ++ ++ tree var = gimple_build (&stmts, NOP_EXPR, sizetype, ++ get_current_def (origin_loop.base)); ++ lhs1 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr2), ++ origin_loop.arr2, var); ++ g = gimple_build_assign (make_ssa_name (long_unsigned_type_node), ++ fold_build2 (MEM_REF, long_unsigned_type_node, lhs1, ++ build_int_cst (build_pointer_type (long_unsigned_type_node), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs1 = gimple_assign_lhs (g); ++ lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr1), ++ origin_loop.arr1, var); ++ g = gimple_build_assign (make_ssa_name (long_unsigned_type_node), ++ fold_build2 (MEM_REF, long_unsigned_type_node, lhs2, ++ build_int_cst (build_pointer_type (long_unsigned_type_node), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs2 = gimple_assign_lhs (g); ++ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt2), ++ lhs1, lhs2, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++/* Create loop_latch BB for align_loop. ++ eg: ++ len_26 = len_17 + 8; ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_latch (basic_block &align_loop_latch, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree res; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ align_loop_latch = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_latch, outer); ++ make_edge (after_bb, align_loop_latch, EDGE_FALSE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_latch, dominator_bb); ++ gsi = gsi_last_bb (align_loop_latch); ++ res = copy_ssa_name (entry_node); ++ g = gimple_build_assign (res, PLUS_EXPR, entry_node, ++ build_int_cst (TREE_TYPE (entry_node), 8)); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (align_loop_latch, res); ++} ++ ++/* Create a new loop and add it to outer_loop and return. */ ++ ++static class loop * ++init_new_loop (class loop *outer_loop, basic_block header, basic_block latch) ++{ ++ class loop *new_loop; ++ new_loop = alloc_loop (); ++ new_loop->header = header; ++ new_loop->latch = latch; ++ add_loop (new_loop, outer_loop); ++ ++ return new_loop; ++} ++ ++/* Create necessary exit BB for align_loop. ++ eg: ++ _27 = _23 ^ _25; ++ _28 = __builtin_ctzll (_27); ++ _29 = _28 >> 3; ++ len_30 = _29 + len_17; ++ ++ The IR of bb is as above. */ ++ ++static void ++create_align_loop_exit_bb (basic_block &align_loop_exit_bb, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gimple *cond_stmt; ++ tree lhs1, lhs2; ++ tree cond_lhs, cond_rhs; ++ gcall *build_ctzll; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ align_loop_exit_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (align_loop_exit_bb, outer); ++ make_edge (after_bb, align_loop_exit_bb, EDGE_TRUE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, align_loop_exit_bb, dominator_bb); ++ gsi = gsi_last_bb (align_loop_exit_bb); ++ ++ cond_stmt = gsi_stmt (gsi_last_bb (after_bb)); ++ cond_lhs = gimple_cond_lhs (cond_stmt); ++ cond_rhs = gimple_cond_rhs (cond_stmt); ++ ++ lhs1 = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (cond_lhs), cond_lhs, ++ cond_rhs); ++ build_ctzll = gimple_build_call (builtin_decl_explicit (BUILT_IN_CTZLL), 1, ++ lhs1); ++ lhs1 = make_ssa_name (integer_type_node); ++ gimple_call_set_lhs (build_ctzll, lhs1); ++ gimple_seq_add_stmt (&stmts, build_ctzll); ++ lhs2 = copy_ssa_name (lhs1); ++ g = gimple_build_assign (lhs2, RSHIFT_EXPR, lhs1, ++ build_int_cst (TREE_TYPE (lhs1), 3)); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs1 = gimple_build (&stmts, NOP_EXPR, TREE_TYPE (entry_node), lhs2); ++ lhs2 = copy_ssa_name (entry_node); ++ g = gimple_build_assign (lhs2, PLUS_EXPR, lhs1, entry_node); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (align_loop_exit_bb, lhs2); ++} ++ ++/* Create loop_header BB for epilogue_loop. ++ eg: ++ # len_31 = PHI ++ if (len_31 != len_limit_12 (D)) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_epilogue_loop_header (basic_block &epilogue_loop_header, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gcond *cond_stmt; ++ tree res; ++ gphi *phi; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ epilogue_loop_header = create_empty_bb (after_bb); ++ add_bb_to_loop (epilogue_loop_header, outer); ++ make_single_succ_edge (after_bb, epilogue_loop_header, EDGE_FALLTHRU); ++ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_header, dominator_bb); ++ gsi = gsi_last_bb (epilogue_loop_header); ++ phi = create_phi_node (NULL_TREE, epilogue_loop_header); ++ create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi)); ++ res = gimple_phi_result (phi); ++ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt1), res, ++ origin_loop.limit, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ ++ set_current_def (origin_loop.base, res); ++ defs_map.put (epilogue_loop_header, res); ++} ++ ++/* Create loop body BB for epilogue_loop. ++ eg: ++ _32 = (sizetype) len_31; ++ _33 = pb_13 (D) + _32; ++ _34 = *_33; ++ _35 = cur_15 (D) + _32; ++ _36 = *_35; ++ if (_34 != _36) ++ ++ The IR of bb is as above. */ ++ ++static void ++create_epilogue_loop_body_bb (basic_block &epilogue_loop_body_bb, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ gcond *cond_stmt; ++ tree lhs1, lhs2, lhs3; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ epilogue_loop_body_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (epilogue_loop_body_bb, outer); ++ make_edge (after_bb, epilogue_loop_body_bb, EDGE_TRUE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_body_bb, dominator_bb); ++ gsi = gsi_last_bb (epilogue_loop_body_bb); ++ lhs1 = gimple_build (&stmts, NOP_EXPR, sizetype, entry_node); ++ lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr1), ++ origin_loop.arr1, lhs1); ++ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node), ++ fold_build2 (MEM_REF, unsigned_char_type_node, lhs2, ++ build_int_cst (TREE_TYPE (lhs2), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs2 = gimple_assign_lhs (g); ++ lhs3 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr2), ++ origin_loop.arr2, lhs1); ++ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node), ++ fold_build2 (MEM_REF, unsigned_char_type_node, lhs3, ++ build_int_cst (TREE_TYPE (lhs3), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs3 = gimple_assign_lhs (g); ++ cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt2), lhs2, ++ lhs3, NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (epilogue_loop_body_bb, get_current_def (origin_loop.base)); ++} ++ ++/* Create loop_latch BB for epilogue_loop. ++ eg: ++ len_37 = len_31 + 1; ++ ++ The IR of bb is as above. */ ++ ++static void ++create_epilogue_loop_latch (basic_block &epilogue_loop_latch, ++ basic_block after_bb, basic_block dominator_bb, ++ class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple *g; ++ tree res; ++ ++ tree entry_node = get_current_def (origin_loop.base); ++ epilogue_loop_latch = create_empty_bb (after_bb); ++ add_bb_to_loop (epilogue_loop_latch, outer); ++ make_edge (after_bb, epilogue_loop_latch, EDGE_FALSE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_latch, dominator_bb); ++ gsi = gsi_last_bb (epilogue_loop_latch); ++ res = copy_ssa_name (entry_node); ++ g = gimple_build_assign (res, PLUS_EXPR, entry_node, ++ build_int_cst (TREE_TYPE (entry_node), origin_loop.step)); ++ gimple_seq_add_stmt (&stmts, g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ defs_map.put (epilogue_loop_latch, res); ++} ++ ++/* convert_to_new_loop ++ | | ++ | | ++ | | entry_edge ++ | ______ | ++ | / V V ++ | | -----origin_loop_header--- ++ | | | | ++ | | -------------------------\ ++ | | | \ ++ | | V \___ ___ ___ ___ ___ ___ ___ ++ | | -----origin_loop_body----- | ++ | | | | | ++ | | -------------------------\ | ++ | | | \___ ___ ___ ___ | ++ | | V V V ++ | | -----origin_loop_latch---- -----exit_bb------ ++ | | | | | | ++ | | /-------------------------- ------------------ ++ | \ __ / ++ | ++ | | ++ | ====> |entry_edge ++ | V ++ | -------prolog_bb----- ++ | | | ++ | --------------------- ++ | | ++ | V ++ | -----align_loop_header---- ++ | /-----------------> | | ++ |/ -------------------------- ++ || / \ ++ || V V ++ || ---align_loop_body--- ---epilogue_loop_header-- ++ || | | -------| |<---| ++ || --------------------\ / ------------------------- | ++ || | \____ | | | ++ || V | | V | ++ || ---align_loop_latch--- | | ---epilogue_loop_body---- | ++ || | | | | ----| | | ++ || ---------------------- | | / ------------------------- | ++ || / __________/ | | | | ++ || / | | | V | ++ | \ __________/ | | | ---epilogue_loop_latch--- | ++ | | | | | | | ++ | | | | ------------------------- / ++ | V | | | / ++ | -align_loop_exit_bb- | | \______________/ ++ | | | | | ++ | -------------------- | | ++ | | | | ++ | | V V ++ | | -----exit_bb------ ++ | |---->| | ++ | ------------------ ++ ++ The origin_loop conversion process starts from entry_edge and ends at ++ exit_bb; The execution logic of origin_loop is completely replaced by ++ align_loop + epilogue_loop: ++ 1) align_loop mainly implements the idea of ​​using wide-type dereference ++ and comparison on array elements, so as to achieve the effect of ++ acceleration; For the corresponding source code understanding, please ++ refer to the description of the pass at the beginning; ++ 2) epilogue_loop processes the previous loop remaining array element ++ comparison. */ ++ ++static void ++create_new_loops (edge entry_edge) ++{ ++ basic_block prolog_bb; ++ basic_block align_loop_header, align_loop_latch, align_loop_body_bb; ++ basic_block align_pred_bb, align_loop_exit_bb; ++ basic_block epilogue_loop_header, epilogue_loop_latch, epilogue_loop_body_bb; ++ basic_block epilogue_loop_pred_bb; ++ class loop *align_loop; ++ class loop *epilogue_loop; ++ ++ class loop *outer = entry_edge->src->loop_father; ++ ++ create_prolog_bb (prolog_bb, entry_edge->src, entry_edge->src, outer, ++ entry_edge); ++ ++ create_loop_pred_bb (align_pred_bb, prolog_bb, prolog_bb, outer); ++ make_single_succ_edge (prolog_bb, align_pred_bb, EDGE_FALLTHRU); ++ ++ create_align_loop_header (align_loop_header, align_pred_bb, ++ align_pred_bb, outer); ++ ++ create_align_loop_body_bb (align_loop_body_bb, align_loop_header, ++ align_loop_header, outer); ++ ++ create_align_loop_latch (align_loop_latch, align_loop_body_bb, ++ align_loop_body_bb, outer); ++ make_edge (align_loop_latch, align_loop_header, EDGE_FALLTHRU); ++ rewrite_add_phi_arg (align_loop_header); ++ ++ align_loop = init_new_loop (outer, align_loop_header, align_loop_latch); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nPrint byte align loop %d:\n", align_loop->num); ++ flow_loop_dump (align_loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ create_align_loop_exit_bb (align_loop_exit_bb, align_loop_body_bb, ++ align_loop_body_bb, outer); ++ ++ create_loop_pred_bb (epilogue_loop_pred_bb, align_loop_header, ++ align_loop_header, outer); ++ make_edge (align_loop_header, epilogue_loop_pred_bb, EDGE_FALSE_VALUE); ++ ++ create_epilogue_loop_header (epilogue_loop_header, epilogue_loop_pred_bb, ++ epilogue_loop_pred_bb, outer); ++ ++ create_epilogue_loop_body_bb (epilogue_loop_body_bb, epilogue_loop_header, ++ epilogue_loop_header, outer); ++ ++ create_epilogue_loop_latch (epilogue_loop_latch, epilogue_loop_body_bb, ++ epilogue_loop_body_bb, outer); ++ make_single_succ_edge (epilogue_loop_latch, epilogue_loop_header, ++ EDGE_FALLTHRU); ++ rewrite_add_phi_arg (epilogue_loop_header); ++ ++ epilogue_loop = init_new_loop (outer, epilogue_loop_header, ++ epilogue_loop_latch); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nPrint epilogue loop %d:\n", epilogue_loop->num); ++ flow_loop_dump (epilogue_loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ make_single_succ_edge (align_loop_exit_bb, origin_loop.exit_bb1, ++ EDGE_FALLTHRU); ++ set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb1, ++ entry_edge->src); ++ make_edge (epilogue_loop_body_bb, origin_loop.exit_bb1, EDGE_TRUE_VALUE); ++ ++ make_edge (epilogue_loop_header, origin_loop.exit_bb2, EDGE_FALSE_VALUE); ++ set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb2, ++ entry_edge->src); ++ ++ rewrite_add_phi_arg (origin_loop.exit_bb1); ++ rewrite_add_phi_arg (origin_loop.exit_bb2); ++ ++ remove_edge (origin_loop.exit_e1); ++ remove_edge (origin_loop.exit_e2); ++} ++ ++/* Make sure that the dominance relationship of the newly inserted cfg ++ is not missing. */ ++ ++static void ++update_loop_dominator (cdi_direction dir) ++{ ++ gcc_assert (dom_info_available_p (dir)); ++ ++ basic_block bb; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ basic_block imm_bb = get_immediate_dominator (dir, bb); ++ if (!imm_bb || bb == origin_loop.exit_bb1) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, bb, ++ recompute_dominator (CDI_DOMINATORS, bb)); ++ continue; ++ } ++ } ++} ++ ++/* Clear information about the original loop. */ ++ ++static void ++remove_origin_loop (class loop *loop) ++{ ++ basic_block *body; ++ ++ body = get_loop_body_in_dom_order (loop); ++ unsigned n = loop->num_nodes; ++ for (unsigned i = 0; i < n; i++) ++ { ++ delete_basic_block (body[i]); ++ } ++ free (body); ++ delete_loop (loop); ++} ++ ++/* Perform the conversion of origin_loop to new_loop. */ ++ ++static void ++convert_to_new_loop (class loop *loop) ++{ ++ create_new_loops (origin_loop.entry_edge); ++ remove_origin_loop (loop); ++ update_loop_dominator (CDI_DOMINATORS); ++ update_ssa (TODO_update_ssa); ++} ++ ++/* The main entry of array-widen-compare optimizes. */ ++ ++static unsigned int ++tree_ssa_array_widen_compare () ++{ ++ unsigned int todo = 0; ++ class loop *loop; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ flow_loops_dump (dump_file, NULL, 1); ++ fprintf (dump_file, "\nConfirm which loop can be optimized using" ++ " array-widen-compare\n"); ++ } ++ ++ enum li_flags LI = LI_FROM_INNERMOST; ++ for (auto loop : loops_list (cfun, LI)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "======================================\n"); ++ fprintf (dump_file, "Processing loop %d:\n", loop->num); ++ fprintf (dump_file, "======================================\n"); ++ flow_loop_dump (loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ if (determine_loop_form (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "The %dth loop form is success matched," ++ "and the loop can be optimized.\n", ++ loop->num); ++ dump_loop_bb (loop); ++ } ++ ++ convert_to_new_loop (loop); ++ } ++ } ++ ++ todo |= (TODO_update_ssa); ++ return todo; ++} ++ ++/* Array widen compare. */ ++ ++namespace { ++ ++const pass_data pass_data_tree_array_widen_compare = ++{ ++ GIMPLE_PASS, ++ "awiden_compare", ++ OPTGROUP_LOOP, ++ TV_TREE_ARRAY_WIDEN_COMPARE, ++ (PROP_cfg | PROP_ssa), ++ 0, ++ 0, ++ 0, ++ (TODO_update_ssa | TODO_verify_all) ++}; ++ ++class pass_array_widen_compare : public gimple_opt_pass ++{ ++public: ++ pass_array_widen_compare (gcc::context *ctxt) ++ : gimple_opt_pass (pass_data_tree_array_widen_compare, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *); ++ virtual unsigned int execute (function *); ++ ++}; // class pass_array_widen_compare ++ ++bool ++pass_array_widen_compare::gate (function *) ++{ ++ return (flag_array_widen_compare > 0 && optimize >= 3); ++} ++ ++unsigned int ++pass_array_widen_compare::execute (function *fun) ++{ ++ if (number_of_loops (fun) <= 1) ++ return 0; ++ ++ /* Only supports LP64 data mode. */ ++ if (TYPE_PRECISION (long_integer_type_node) != 64 ++ || POINTER_SIZE != 64 || TYPE_PRECISION (integer_type_node) != 32) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "The current data mode is not supported," ++ "only the LP64 date mode is supported.\n"); ++ return 0; ++ } ++ ++ return tree_ssa_array_widen_compare (); ++} ++ ++} // anon namespace ++ ++gimple_opt_pass * ++make_pass_array_widen_compare (gcc::context *ctxt) ++{ ++ return new pass_array_widen_compare (ctxt); ++} +-- +2.33.0 + diff --git a/0015-Backport-Structure-reorganization-optimization.patch b/0015-Backport-Structure-reorganization-optimization.patch new file mode 100644 index 0000000..d380916 --- /dev/null +++ b/0015-Backport-Structure-reorganization-optimization.patch @@ -0,0 +1,6170 @@ +From 8631d4a39453bb262675bea9abb5c1b7d52af624 Mon Sep 17 00:00:00 2001 +From: eastb233 +Date: Wed, 19 Jul 2023 10:28:04 +0800 +Subject: [PATCH 15/22] [Backport] Structure reorganization optimization + +Reference: https://gcc.gnu.org/git/?p=gcc-old.git;a=commit;h=6e1bd1c900533c627b5e4fbbecb41dcd7974b522 + +Introduce structure reorganization optimization, that change C-like +structures layout in order to better utilize spatial locality. This +transformation is affective for programs containing arrays of structures. +--- + gcc/Makefile.in | 1 + + gcc/common.opt | 4 +- + gcc/configure | 2 +- + gcc/configure.ac | 2 +- + gcc/doc/invoke.texi | 23 + + gcc/gimple-ssa-warn-access.cc | 8 + + gcc/ipa-param-manipulation.cc | 3 +- + gcc/ipa-param-manipulation.h | 3 +- + gcc/ipa-struct-reorg/escapes.def | 60 + + gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 4015 +++++++++++++++++ + gcc/ipa-struct-reorg/ipa-struct-reorg.h | 235 + + gcc/params.opt | 4 + + gcc/passes.def | 2 + + gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 35 + + gcc/testsuite/gcc.dg/struct/struct_reorg-1.c | 24 + + gcc/testsuite/gcc.dg/struct/struct_reorg-2.c | 29 + + gcc/testsuite/gcc.dg/struct/struct_reorg-3.c | 23 + + gcc/testsuite/gcc.dg/struct/struct_reorg-4.c | 59 + + .../gcc.dg/struct/w_prof_global_array.c | 29 + + .../gcc.dg/struct/w_prof_global_var.c | 42 + + .../gcc.dg/struct/w_prof_local_array.c | 37 + + .../gcc.dg/struct/w_prof_local_var.c | 40 + + .../gcc.dg/struct/w_prof_single_str_global.c | 31 + + gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c | 64 + + .../gcc.dg/struct/w_ratio_cold_str.c | 43 + + .../gcc.dg/struct/wo_prof_array_field.c | 26 + + .../struct/wo_prof_array_through_pointer.c | 38 + + .../gcc.dg/struct/wo_prof_double_malloc.c | 29 + + .../gcc.dg/struct/wo_prof_empty_str.c | 44 + + .../struct/wo_prof_escape_arg_to_local.c | 44 + + .../gcc.dg/struct/wo_prof_escape_return-1.c | 33 + + .../gcc.dg/struct/wo_prof_escape_return.c | 32 + + .../gcc.dg/struct/wo_prof_escape_str_init.c | 31 + + .../struct/wo_prof_escape_substr_array.c | 33 + + .../struct/wo_prof_escape_substr_pointer.c | 48 + + .../struct/wo_prof_escape_substr_value.c | 45 + + .../gcc.dg/struct/wo_prof_global_array.c | 32 + + .../gcc.dg/struct/wo_prof_global_var.c | 45 + + .../gcc.dg/struct/wo_prof_local_array.c | 40 + + .../gcc.dg/struct/wo_prof_local_var.c | 43 + + .../gcc.dg/struct/wo_prof_malloc_size_var-1.c | 47 + + .../gcc.dg/struct/wo_prof_malloc_size_var.c | 47 + + .../struct/wo_prof_mult_field_peeling.c | 42 + + .../gcc.dg/struct/wo_prof_single_str_global.c | 34 + + .../gcc.dg/struct/wo_prof_single_str_local.c | 34 + + .../struct/wo_prof_single_str_pointer.c | 38 + + .../gcc.dg/struct/wo_prof_two_strs.c | 67 + + gcc/timevar.def | 1 + + gcc/tree-pass.h | 1 + + 49 files changed, 5686 insertions(+), 6 deletions(-) + create mode 100644 gcc/ipa-struct-reorg/escapes.def + create mode 100644 gcc/ipa-struct-reorg/ipa-struct-reorg.cc + create mode 100644 gcc/ipa-struct-reorg/ipa-struct-reorg.h + create mode 100644 gcc/testsuite/gcc.dg/struct/struct-reorg.exp + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-1.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-2.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-3.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-4.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_global_array.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_global_var.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_local_array.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_local_var.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c + create mode 100644 gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_single_str_pointer.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 31ff95500..c863ad992 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1451,6 +1451,7 @@ OBJS = \ + incpath.o \ + init-regs.o \ + internal-fn.o \ ++ ipa-struct-reorg/ipa-struct-reorg.o \ + ipa-cp.o \ + ipa-sra.o \ + ipa-devirt.o \ +diff --git a/gcc/common.opt b/gcc/common.opt +index e365a48bc..b48fa3228 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1950,8 +1950,8 @@ Common Ignore + Does nothing. Preserved for backward compatibility. + + fipa-struct-reorg +-Common Ignore +-Does nothing. Preserved for backward compatibility. ++Common Var(flag_ipa_struct_reorg) Init(0) Optimization ++Perform structure layout optimizations. + + fipa-vrp + Common Var(flag_ipa_vrp) Optimization +diff --git a/gcc/configure b/gcc/configure +index c749ace01..98bbf0f85 100755 +--- a/gcc/configure ++++ b/gcc/configure +@@ -34191,7 +34191,7 @@ $as_echo "$as_me: executing $ac_file commands" >&6;} + "depdir":C) $SHELL $ac_aux_dir/mkinstalldirs $DEPDIR ;; + "gccdepdir":C) + ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs build/$DEPDIR +- for lang in $subdirs c-family common analyzer rtl-ssa ++ for lang in $subdirs c-family common analyzer rtl-ssa ipa-struct-reorg + do + ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs $lang/$DEPDIR + done ;; +diff --git a/gcc/configure.ac b/gcc/configure.ac +index 992a50e7b..c74f4b555 100644 +--- a/gcc/configure.ac ++++ b/gcc/configure.ac +@@ -1340,7 +1340,7 @@ AC_CHECK_HEADERS(ext/hash_map) + ZW_CREATE_DEPDIR + AC_CONFIG_COMMANDS([gccdepdir],[ + ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs build/$DEPDIR +- for lang in $subdirs c-family common analyzer rtl-ssa ++ for lang in $subdirs c-family common analyzer rtl-ssa ipa-struct-reorg + do + ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs $lang/$DEPDIR + done], [subdirs="$subdirs" ac_aux_dir=$ac_aux_dir DEPDIR=$DEPDIR]) +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index ff8cd032f..e37bae5b1 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -526,6 +526,7 @@ Objective-C and Objective-C++ Dialects}. + -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol + -finline-small-functions -fipa-modref -fipa-cp -fipa-cp-clone @gol + -fipa-bit-cp -fipa-vrp -fipa-pta -fipa-profile -fipa-pure-const @gol ++-fipa-struct-reorg @gol + -fipa-reference -fipa-reference-addressable @gol + -fipa-stack-alignment -fipa-icf -fira-algorithm=@var{algorithm} @gol + -flive-patching=@var{level} @gol +@@ -11886,6 +11887,19 @@ higher. + Discover which functions are pure or constant. + Enabled by default at @option{-O1} and higher. + ++@item -fipa-struct-reorg ++@opindex fipa-struct-reorg ++Perform structure reorganization optimization, that change C-like structures ++layout in order to better utilize spatial locality. This transformation is ++affective for programs containing arrays of structures. Available in two ++compilation modes: profile-based (enabled with @option{-fprofile-generate}) ++or static (which uses built-in heuristics). It works only in whole program ++mode, so it requires @option{-fwhole-program} to be ++enabled. Structures considered @samp{cold} by this transformation are not ++affected (see @option{--param struct-reorg-cold-struct-ratio=@var{value}}). ++ ++With this flag, the program debug info reflects a new structure layout. ++ + @item -fipa-reference + @opindex fipa-reference + Discover which static variables do not escape the +@@ -13772,6 +13786,15 @@ In each case, the @var{value} is an integer. The following choices + of @var{name} are recognized for all targets: + + @table @gcctabopt ++@item struct-reorg-cold-struct-ratio ++The threshold ratio (as a percentage) between a structure frequency ++and the frequency of the hottest structure in the program. This parameter ++is used by struct-reorg optimization enabled by @option{-fipa-struct-reorg}. ++We say that if the ratio of a structure frequency, calculated by profiling, ++to the hottest structure frequency in the program is less than this ++parameter, then structure reorganization is not applied to this structure. ++The default is 10. ++ + @item predictable-branch-outcome + When branch is predicted to be taken with probability lower than this threshold + (in percent), then it is considered well predictable. +diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc +index 8d088ad33..a24645783 100644 +--- a/gcc/gimple-ssa-warn-access.cc ++++ b/gcc/gimple-ssa-warn-access.cc +@@ -2193,6 +2193,14 @@ pass_waccess::set_pass_param (unsigned int n, bool early) + bool + pass_waccess::gate (function *) + { ++ /* FIXME: In structure optimizations, some statements will be ++ rewritten and removed from the BB, leaving some unused SSA. ++ In pass waccess, it will traverse all SSA and cause ICE ++ when handling these unused SSA. So temporarily disable ++ pass waccess when enable structure optimizations. */ ++ if (flag_ipa_struct_reorg) ++ return false; ++ + return (warn_free_nonheap_object + || warn_mismatched_alloc + || warn_mismatched_new_delete); +diff --git a/gcc/ipa-param-manipulation.cc b/gcc/ipa-param-manipulation.cc +index 38328c3e8..f9e956008 100644 +--- a/gcc/ipa-param-manipulation.cc ++++ b/gcc/ipa-param-manipulation.cc +@@ -55,7 +55,8 @@ static const char *ipa_param_prefixes[IPA_PARAM_PREFIX_COUNT] + = {"SYNTH", + "ISRA", + "simd", +- "mask"}; ++ "mask", ++ "struct_reorg"}; + + /* Names of parameters for dumping. Keep in sync with enum ipa_parm_op. */ + +diff --git a/gcc/ipa-param-manipulation.h b/gcc/ipa-param-manipulation.h +index a9ad2b216..71f4a0a2f 100644 +--- a/gcc/ipa-param-manipulation.h ++++ b/gcc/ipa-param-manipulation.h +@@ -126,6 +126,7 @@ enum ipa_param_name_prefix_indices + IPA_PARAM_PREFIX_ISRA, + IPA_PARAM_PREFIX_SIMD, + IPA_PARAM_PREFIX_MASK, ++ IPA_PARAM_PREFIX_REORG, + IPA_PARAM_PREFIX_COUNT + }; + +@@ -189,7 +190,7 @@ struct GTY(()) ipa_adjusted_param + + /* Index into ipa_param_prefixes specifying a prefix to be used with + DECL_NAMEs of newly synthesized parameters. */ +- unsigned param_prefix_index : 2; ++ unsigned param_prefix_index : 3; + + /* Storage order of the original parameter (for the cases when the new + parameter is a component of an original one). */ +diff --git a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def +new file mode 100644 +index 000000000..c4c8e0739 +--- /dev/null ++++ b/gcc/ipa-struct-reorg/escapes.def +@@ -0,0 +1,60 @@ ++/* Copyright (C) 2016-2023 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++/* Before including this file, you should define a macro: ++ DEF_ESCAPE (ENUM, TEXT) ++ ++ This macro will be called once for each escape reason. The ++ ENUM will be of type "escape_type". The TEXT is describing ++ the reason for the escape. ++*/ ++DEF_ESCAPE (escape_marked_as_used, "Type used in variable marked as used") ++DEF_ESCAPE (escape_via_global_var, "Type used via a external visible variable") ++DEF_ESCAPE (escape_via_global_init, "Type used via a global init of a variable") ++DEF_ESCAPE (escape_non_supported_allocator, "Type used by allocation which is not currently supported") ++DEF_ESCAPE (escape_dependent_type_escapes, "Type uses a type which escapes or is used by a type which escapes") ++DEF_ESCAPE (escape_var_arg_function, "Types escapes via a variable argument function") ++DEF_ESCAPE (escape_bitfields, "Types has bitfields") ++DEF_ESCAPE (escape_recusive_type, "Type has a recusive relationship") ++DEF_ESCAPE (escape_variable_sized_array, "Type has a variable sized type") ++DEF_ESCAPE (escape_external_function, "Type escapes via an external function call") ++DEF_ESCAPE (escape_visible_function, "Type escapes via expternally visible function call") ++DEF_ESCAPE (escape_pointer_function, "Type escapes via an function pointer call") ++DEF_ESCAPE (escape_unkown_field, "Type escapes via an unkown field accessed") ++DEF_ESCAPE (escape_union, "Type escapes via an union") ++DEF_ESCAPE (escape_inline_asm, "Type escapes via inline-asm") ++DEF_ESCAPE (escape_non_multiply_size, "Type escapes a pointer plus which is not a multiplicate of the size") ++DEF_ESCAPE (escape_cast_void, "Type escapes a cast to/from void*") ++DEF_ESCAPE (escape_cast_another_ptr, "Type escapes a cast to a different pointer") ++DEF_ESCAPE (escape_cast_int, "Type escapes a cast from/to intergral type") ++DEF_ESCAPE (escape_int_const, "Type escapes via integer constant") ++DEF_ESCAPE (escape_vce, "Type escapes via a VIEW_CONVERT_EXPR") ++DEF_ESCAPE (escape_array_access, "Type escapes via an array access") ++DEF_ESCAPE (escape_noclonable_function, "Type escapes via a non-clonable function") ++DEF_ESCAPE (escape_rescusive_type, "Recusive type") ++DEF_ESCAPE (escape_user_alignment, "Type has an user alignment set") ++DEF_ESCAPE (escape_volatile, "Type has an variable which is volatile") ++DEF_ESCAPE (escape_non_eq, "Type has a comparison other than equals or not equals") ++DEF_ESCAPE (escape_addr, "Type escapes via taking the address of field") ++DEF_ESCAPE (escape_cannot_change_signature, "Type used in a call that cannot change signature") ++DEF_ESCAPE (escape_non_optimize, "Type used by a function which turns off struct reorg") ++DEF_ESCAPE (escape_array, "Type is used in an array [not handled yet]") ++DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]") ++DEF_ESCAPE (escape_return, "Type escapes via a return [not handled yet]") ++ ++#undef DEF_ESCAPE +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +new file mode 100644 +index 000000000..238530860 +--- /dev/null ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +@@ -0,0 +1,4015 @@ ++/* Struct-reorg optimizations. ++ Copyright (C) 2016-2023 Free Software Foundation, Inc. ++ Contributed by Andrew Pinski ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++/* This pass implements the structure reorganization organization ++ (struct-reorg). ++ ++ Right now it handles just splitting off the hottest fields for a struct ++ of 2 fields: ++ struct s { ++ type1 field1; // Hot field ++ type2 field2; ++ }; ++ s *v; ++ into: ++ struct s_hot { ++ type1 field1; ++ }; ++ struct c_cold { ++ type2 field2; ++ }; ++ s_hot *v_hot; ++ s_cold *v_cold; ++ ++ TODO: This pass can be extended to more fields, and other alogrothims ++ like reordering. ++ ++ This pass operate in four stages: ++ 1. All of the field accesses, declarations (struct types and pointers ++ to that type) and struct types are scanned and recorded. This includes ++ global declarations. Also record all allocation and freeing sites; ++ this is needed for the rewriting phase. ++ ++ FIXME: If there is a top-level inline-asm, the pass immediately returns. ++ ++ 2. Prune out the types which are considered escaping. ++ Examples of types which are considered escaping: ++ a. A declaration has been marked as having the attribute used or ++ has user defined alignment (type too). ++ b. Accesses are via a BIT_FIELD_REF. ++ FIXME: Handle VECTOR_TYPE for this case. ++ c. The "allocation" site is not a known builtin function. ++ d. Casting to/from an integer. ++ ++ 3. Analyze the types for which optimization to do. ++ a. Split the fields into two different structs. ++ (FIXME: two field case handled only) ++ Look at all structs which contain two fields, if one of the fields ++ is hotter then split it and put it on the rewritting for accesses. ++ Allocations and freeing are marked to split into two functions; ++ all uses of that type will now be considered as two. ++ b. Reorder fields hottest to the coldest. TODO: Implement. ++ ++ 4. Rewrite each access and allocation and free whichis marked as ++ rewriting. ++ ++*/ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "tm.h" ++#include "tree.h" ++#include "tree-pass.h" ++#include "cgraph.h" ++#include "diagnostic-core.h" ++#include "function.h" ++#include "basic-block.h" ++#include "gimple.h" ++#include "vec.h" ++#include "tree-pretty-print.h" ++#include "gimple-pretty-print.h" ++#include "gimple-iterator.h" ++#include "cfg.h" ++#include "ssa.h" ++#include "tree-dfa.h" ++#include "fold-const.h" ++#include "tree-inline.h" ++#include "stor-layout.h" ++#include "tree-into-ssa.h" ++#include "tree-cfg.h" ++#include "alloc-pool.h" ++#include "symbol-summary.h" ++#include "ipa-prop.h" ++#include "ipa-struct-reorg.h" ++#include "tree-eh.h" ++#include "bitmap.h" ++#include "tree-ssa-live.h" /* For remove_unused_locals. */ ++#include "ipa-param-manipulation.h" ++#include "gimplify-me.h" ++ ++namespace { ++ ++using namespace struct_reorg; ++ ++#define VOID_POINTER_P(type) \ ++ (POINTER_TYPE_P (type) && VOID_TYPE_P (TREE_TYPE (type))) ++ ++/* Return true iff TYPE is stdarg va_list type. */ ++ ++static inline bool ++is_va_list_type (tree type) ++{ ++ return TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (va_list_type_node); ++} ++ ++static const char * ++get_type_name (tree type) ++{ ++ const char *tname = NULL; ++ ++ if (type == NULL) ++ return NULL; ++ ++ if (TYPE_NAME (type) != NULL) ++ { ++ if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE) ++ tname = IDENTIFIER_POINTER (TYPE_NAME (type)); ++ else if (DECL_NAME (TYPE_NAME (type)) != NULL) ++ tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); ++ } ++ return tname; ++} ++ ++/* Return the inner most type for arrays and pointers of TYPE. */ ++ ++static tree ++inner_type (tree type) ++{ ++ while (POINTER_TYPE_P (type) ++ || TREE_CODE (type) == ARRAY_TYPE) ++ type = TREE_TYPE (type); ++ return type; ++} ++ ++/* Return true if TYPE is a type which struct reorg should handled. */ ++ ++static bool ++handled_type (tree type) ++{ ++ type = inner_type (type); ++ if (TREE_CODE (type) == RECORD_TYPE) ++ return !is_va_list_type (type); ++ return false; ++} ++ ++/* The gimplify_buildN API is moved to tree-vect-generic.c locally ++ at commit b972e036f40c12b106f9070c3e8adea0eb8a45fa. ++ ++ The gimplify_buildN API is copied from gcc 10 implementation. ++*/ ++ ++/* Build a binary operation and gimplify it. Emit code before GSI. ++ Return the gimple_val holding the result. */ ++ ++static tree ++gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code, ++ tree type, tree a, tree b) ++{ ++ tree ret; ++ ++ ret = fold_build2_loc (gimple_location (gsi_stmt (*gsi)), code, type, a, b); ++ return force_gimple_operand_gsi (gsi, ret, true, NULL, true, ++ GSI_SAME_STMT); ++} ++ ++/* Build a unary operation and gimplify it. Emit code before GSI. ++ Return the gimple_val holding the result. */ ++ ++static tree ++gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type, ++ tree a) ++{ ++ tree ret; ++ ++ ret = fold_build1_loc (gimple_location (gsi_stmt (*gsi)), code, type, a); ++ return force_gimple_operand_gsi (gsi, ret, true, NULL, true, ++ GSI_SAME_STMT); ++} ++ ++} // anon namespace ++ ++ ++namespace struct_reorg { ++ ++/* Constructor of srfunction. */ ++ ++srfunction::srfunction (cgraph_node *n) ++ : node (n), ++ old (NULL), ++ newnode (NULL), ++ newf (NULL) ++{} ++ ++/* Add an ARG to the list of arguments for the function. */ ++ ++void ++srfunction::add_arg (srdecl *arg) ++{ ++ args.safe_push (arg); ++} ++ ++/* Dump the SRFUNCTION to the file FILE. */ ++ ++void ++srfunction::dump (FILE *file) ++{ ++ if (node) ++ { ++ fprintf (file, "function : "); ++ print_generic_expr (file, node->decl); ++ fprintf (file, " with arguments: "); ++ for (unsigned i = 0; i < args.length (); i++) ++ { ++ if (i == 0) ++ fprintf (file, "\n "); ++ else ++ fprintf (file, "\n, "); ++ args[i]->dump (file); ++ } ++ ++ fprintf (file, "\nuses globals: "); ++ for (unsigned i = 0; i < globals.length (); i++) ++ { ++ fprintf (file, "\n "); ++ globals[i]->dump (file); ++ } ++ ++ fprintf (file, "\ndecls: "); ++ } ++ else ++ fprintf (file, "globals : "); ++ ++ for (unsigned i = 0; i < decls.length (); i++) ++ { ++ fprintf (file, "\n "); ++ decls[i]->dump (file); ++ } ++} ++ ++/* Simple dump the SRFUNCTION to the file FILE; ++ used so it is not recusive. */ ++ ++void ++srfunction::simple_dump (FILE *file) ++{ ++ print_generic_expr (file, node->decl); ++} ++ ++/* Constructor of FIELD. */ ++ ++srfield::srfield (tree field, srtype *base) ++ : offset (int_byte_position (field)), ++ fieldtype (TREE_TYPE (field)), ++ fielddecl (field), ++ base (base), ++ type (NULL), ++ clusternum (0) ++{ ++ for (int i = 0; i < max_split; i++) ++ newfield[i] = NULL_TREE; ++} ++ ++/* Constructor of TYPE. */ ++ ++srtype::srtype (tree type) ++ : type (type), ++ chain_type (false), ++ escapes (does_not_escape), ++ visited (false) ++{ ++ for (int i = 0; i < max_split; i++) ++ newtype[i] = NULL_TREE; ++ ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ if (DECL_BIT_FIELD (field)) ++ { ++ escapes = escape_bitfields; ++ continue; ++ } ++ else if (!DECL_SIZE (field) ++ || TREE_CODE (DECL_SIZE (field)) != INTEGER_CST) ++ { ++ escapes = escape_variable_sized_array; ++ break; ++ } ++ srfield *t = new srfield (field, this); ++ fields.safe_push (t); ++ } ++ } ++} ++ ++/* Mark the type as escaping type E at statement STMT. */ ++ ++void ++srtype::mark_escape (escape_type e, gimple *stmt) ++{ ++ /* Once the type has escaped, it should never ++ change back to non escaping. */ ++ gcc_assert (e != does_not_escape); ++ if (has_escaped ()) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nO type: "); ++ simple_dump (dump_file); ++ fprintf (dump_file, " has already escaped."); ++ fprintf (dump_file, " old = \"%s\" ", ++ escape_type_string[escapes - 1]); ++ fprintf (dump_file, " new = \"%s\"\n", escape_type_string[e - 1]); ++ if (stmt) ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ return; ++ } ++ escapes = e; ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nN type: "); ++ simple_dump (dump_file); ++ fprintf (dump_file, " new = \"%s\"\n", escape_reason ()); ++ if (stmt) ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++} ++ ++/* Add FIELD to the list of fields that use this type. */ ++ ++void ++srtype::add_field_site (srfield *field) ++{ ++ field_sites.safe_push (field); ++} ++ ++/* Constructor of DECL. */ ++ ++srdecl::srdecl (srtype *tp, tree decl, int argnum) ++ : type (tp), ++ decl (decl), ++ func (NULL_TREE), ++ argumentnum (argnum), ++ visited (false) ++{ ++ if (TREE_CODE (decl) == SSA_NAME) ++ func = current_function_decl; ++ else if (!is_global_var (decl)) ++ func = DECL_CONTEXT (decl); ++ for (int i = 0; i < max_split; i++) ++ newdecl[i] = NULL_TREE; ++} ++ ++/* Find DECL in the function. */ ++ ++srdecl * ++srfunction::find_decl (tree decl) ++{ ++ for (unsigned i = 0; i < decls.length (); i++) ++ if (decls[i]->decl == decl) ++ return decls[i]; ++ return NULL; ++} ++ ++/* Record DECL of the TYPE with argument num ARG. */ ++ ++srdecl * ++srfunction::record_decl (srtype *type, tree decl, int arg) ++{ ++ // Search for the decl to see if it is already there. ++ srdecl *decl1 = find_decl (decl); ++ ++ if (decl1) ++ return decl1; ++ ++ gcc_assert (type); ++ ++ decl1 = new srdecl (type, decl, arg); ++ decls.safe_push (decl1); ++ return decl1; ++} ++ ++/* Find the field at OFF offset. */ ++ ++srfield * ++srtype::find_field (unsigned HOST_WIDE_INT off) ++{ ++ unsigned int i; ++ srfield *field; ++ ++ /* FIXME: handle array/struct field inside the current struct. */ ++ /* NOTE This does not need to be fixed to handle libquatumn. */ ++ FOR_EACH_VEC_ELT (fields, i, field) ++ { ++ if (off == field->offset) ++ return field; ++ } ++ return NULL; ++} ++ ++/* Add the function FN to the list of functions if it ++ is there not already. */ ++ ++void ++srtype::add_function (srfunction *fn) ++{ ++ unsigned decluid; ++ unsigned i; ++ decluid = DECL_UID (fn->node->decl); ++ ++ srfunction *fn1; ++ // Search for the decl to see if it is already there. ++ FOR_EACH_VEC_ELT (functions, i, fn1) ++ { ++ if (DECL_UID (fn1->node->decl) == decluid) ++ return; ++ } ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Recording new function: %u.\n", decluid); ++ ++ functions.safe_push (fn); ++} ++ ++/* Dump out the type structure to FILE. */ ++ ++void ++srtype::dump (FILE *f) ++{ ++ unsigned int i; ++ srfield *field; ++ srfunction *fn; ++ sraccess *access; ++ ++ if (chain_type) ++ fprintf (f, "chain decl "); ++ ++ fprintf (f, "type : "); ++ print_generic_expr (f, type); ++ fprintf (f, "(%d) { ", TYPE_UID (type)); ++ if (escapes != does_not_escape) ++ fprintf (f, " escapes = \"%s\"\n", escape_reason ()); ++ fprintf (f, " fields = { "); ++ FOR_EACH_VEC_ELT (fields, i, field) ++ { ++ if (i == 0) ++ fprintf (f, "\n "); ++ else ++ fprintf (f, "\n, "); ++ field->dump (f); ++ } ++ fprintf (f, " }\n "); ++ fprintf (f, "\n accesses = {"); ++ FOR_EACH_VEC_ELT (accesses, i, access) ++ { ++ fprintf (f, "\n"); ++ access->dump (f); ++ } ++ fprintf (f, " }\n "); ++ fprintf (f, "\n functions = {"); ++ FOR_EACH_VEC_ELT (functions, i, fn) ++ { ++ fprintf (f, " \n"); ++ fn->simple_dump (f); ++ } ++ fprintf (f, "\n }\n"); ++ fprintf (f, "\n field_sites = {"); ++ FOR_EACH_VEC_ELT (field_sites, i, field) ++ { ++ fprintf (f, " \n"); ++ field->simple_dump (f); ++ } ++ fprintf (f, "\n }\n"); ++ fprintf (f, "}\n"); ++} ++ ++/* A simplified dump out the type structure to FILE. */ ++ ++void ++srtype::simple_dump (FILE *f) ++{ ++ print_generic_expr (f, type); ++} ++ ++/* Analyze the type and decide what to be done with it. */ ++ ++void ++srtype::analyze (void) ++{ ++ /* Chain decl types can't be split ++ so don't try. */ ++ if (chain_type) ++ return; ++ ++ /* If there is only one field then there is nothing ++ to be done. */ ++ if (fields.length () == 1) ++ return; ++ ++ /* For now we unconditionally split only structures with 2 fields ++ into 2 different structures. In future we intend to add profile ++ info and/or static heuristics to differentiate splitting process. */ ++ if (fields.length () == 2) ++ fields[1]->clusternum = 1; ++ ++ /* Otherwise we do nothing. */ ++ if (fields.length () >= 3) ++ return; ++} ++ ++/* Create the new fields for this field. */ ++ ++void ++srfield::create_new_fields (tree newtype[max_split], ++ tree newfields[max_split], ++ tree newlast[max_split]) ++{ ++ tree nt[max_split]; ++ ++ for (unsigned i = 0; i < max_split; i++) ++ nt[i] = NULL; ++ ++ if (type == NULL) ++ nt[0] = fieldtype; ++ else ++ memcpy (nt, type->newtype, sizeof (type->newtype)); ++ ++ for (unsigned i = 0; i < max_split && nt[i] != NULL; i++) ++ { ++ tree field = make_node (FIELD_DECL); ++ if (nt[1] != NULL && DECL_NAME (fielddecl)) ++ { ++ const char *tname = IDENTIFIER_POINTER (DECL_NAME (fielddecl)); ++ char id[10]; ++ char *name; ++ ++ sprintf (id, "%d", i); ++ name = concat (tname, ".reorg.", id, NULL); ++ DECL_NAME (field) = get_identifier (name); ++ free (name); ++ } ++ else ++ DECL_NAME (field) = DECL_NAME (fielddecl); ++ ++ TREE_TYPE (field) = reconstruct_complex_type ( ++ TREE_TYPE (fielddecl), nt[i]); ++ DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl); ++ SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl)); ++ DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl); ++ TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl); ++ DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl); ++ TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (fielddecl); ++ DECL_CONTEXT (field) = newtype[clusternum]; ++ ++ if (newfields[clusternum] == NULL) ++ newfields[clusternum] = newlast[clusternum] = field; ++ else ++ { ++ DECL_CHAIN (newlast[clusternum]) = field; ++ newlast[clusternum] = field; ++ } ++ newfield[i] = field; ++ } ++} ++ ++/* Create the new TYPE corresponding to THIS type. */ ++ ++bool ++srtype::create_new_type (void) ++{ ++ /* If the type has been visited, ++ then return if a new type was ++ created or not. */ ++ if (visited) ++ return has_new_type (); ++ ++ visited = true; ++ ++ if (escapes != does_not_escape) ++ { ++ newtype[0] = type; ++ return false; ++ } ++ ++ bool createnewtype = false; ++ unsigned maxclusters = 0; ++ ++ /* Create a new type for each field. */ ++ for (unsigned i = 0; i < fields.length (); i++) ++ { ++ srfield *field = fields[i]; ++ if (field->type) ++ createnewtype |= field->type->create_new_type (); ++ if (field->clusternum > maxclusters) ++ maxclusters = field->clusternum; ++ } ++ ++ /* If the fields' types did have a change or ++ we are not splitting the struct into two clusters, ++ then just return false and don't change the type. */ ++ if (!createnewtype && maxclusters == 0) ++ { ++ newtype[0] = type; ++ return false; ++ } ++ ++ /* Should have at most max_split clusters. */ ++ gcc_assert (maxclusters < max_split); ++ ++ tree newfields[max_split]; ++ tree newlast[max_split]; ++ ++ maxclusters++; ++ ++ const char *tname = NULL; ++ ++ if (TYPE_NAME (type) != NULL) ++ { ++ if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE) ++ tname = IDENTIFIER_POINTER (TYPE_NAME (type)); ++ else if (DECL_NAME (TYPE_NAME (type)) != NULL) ++ tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); ++ } ++ ++ for (unsigned i = 0; i < maxclusters; i++) ++ { ++ newfields[i] = NULL_TREE; ++ newlast[i] = NULL_TREE; ++ newtype[i] = make_node (RECORD_TYPE); ++ ++ char *name = NULL; ++ char id[10]; ++ sprintf (id, "%d", i); ++ if (tname) ++ { ++ name = concat (tname, ".reorg.", id, NULL); ++ TYPE_NAME (newtype[i]) = get_identifier (name); ++ free (name); ++ } ++ } ++ ++ for (unsigned i = 0; i < fields.length (); i++) ++ { ++ srfield *f = fields[i]; ++ f->create_new_fields (newtype, newfields, newlast); ++ } ++ ++ /* No reason to warn about these structs since the warning would ++ have happened already. */ ++ int save_warn_padded = warn_padded; ++ warn_padded = 0; ++ ++ for (unsigned i = 0; i < maxclusters; i++) ++ { ++ TYPE_FIELDS (newtype[i]) = newfields[i]; ++ layout_type (newtype[i]); ++ } ++ ++ warn_padded = save_warn_padded; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Created %d types:\n", maxclusters); ++ for (unsigned i = 0; i < maxclusters; i++) ++ { ++ print_generic_expr (dump_file, newtype[i]); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ ++ return true; ++} ++ ++/* Helper function to copy some attributes from ORIG_DECL to the NEW_DECL. */ ++ ++static inline void ++copy_var_attributes (tree new_decl, tree orig_decl) ++{ ++ DECL_ARTIFICIAL (new_decl) = 1; ++ DECL_EXTERNAL (new_decl) = DECL_EXTERNAL (orig_decl); ++ TREE_STATIC (new_decl) = TREE_STATIC (orig_decl); ++ TREE_PUBLIC (new_decl) = TREE_PUBLIC (orig_decl); ++ TREE_USED (new_decl) = TREE_USED (orig_decl); ++ DECL_CONTEXT (new_decl) = DECL_CONTEXT (orig_decl); ++ TREE_THIS_VOLATILE (new_decl) = TREE_THIS_VOLATILE (orig_decl); ++ TREE_ADDRESSABLE (new_decl) = TREE_ADDRESSABLE (orig_decl); ++ TREE_READONLY (new_decl) = TREE_READONLY (orig_decl); ++ if (is_global_var (orig_decl)) ++ set_decl_tls_model (new_decl, DECL_TLS_MODEL (orig_decl)); ++} ++ ++/* Create all of the new decls (SSA_NAMES included) for THIS function. */ ++ ++void ++srfunction::create_new_decls (void) ++{ ++ /* If this function has been cloned, we don't need to ++ create the new decls. */ ++ if (newnode) ++ return; ++ ++ if (node) ++ set_cfun (DECL_STRUCT_FUNCTION (node->decl)); ++ ++ for (unsigned i = 0; i < decls.length (); i++) ++ { ++ srdecl *decl = decls[i]; ++ srtype *type = decl->type; ++ /* If the type of the decl does not change, ++ then don't create a new decl. */ ++ if (!type->has_new_type ()) ++ { ++ decl->newdecl[0] = decl->decl; ++ continue; ++ } ++ ++ /* Handle SSA_NAMEs. */ ++ if (TREE_CODE (decl->decl) == SSA_NAME) ++ { ++ tree newtype1[max_split]; ++ tree inner = SSA_NAME_VAR (decl->decl); ++ tree newinner[max_split]; ++ memset (newinner, 0, sizeof (newinner)); ++ for (unsigned j = 0; j < max_split && type->newtype[j]; j++) ++ newtype1[j] = reconstruct_complex_type (TREE_TYPE (decls[i]->decl), ++ type->newtype[j]); ++ if (inner) ++ { ++ srdecl *in = find_decl (inner); ++ gcc_assert (in); ++ memcpy (newinner, in->newdecl, sizeof (newinner)); ++ } ++ tree od = decls[i]->decl; ++ /* Create the new ssa names and copy some attributes ++ from the old one. */ ++ for (unsigned j = 0; j < max_split && type->newtype[j]; j++) ++ { ++ tree nd = make_ssa_name (newinner[j] ? newinner[j] ++ : newtype1[j]); ++ decl->newdecl[j] = nd; ++ /* If the old decl was a default definition, ++ handle it specially. */ ++ if (SSA_NAME_IS_DEFAULT_DEF (od)) ++ { ++ SSA_NAME_IS_DEFAULT_DEF (nd) = true; ++ SSA_NAME_DEF_STMT (nd) = gimple_build_nop (); ++ ++ /* Set the default definition for the ssaname if needed. */ ++ if (inner) ++ { ++ gcc_assert (newinner[j]); ++ set_ssa_default_def (cfun, newinner[j], nd); ++ } ++ } ++ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (nd) ++ = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (od); ++ statistics_counter_event (cfun, "Create new ssa_name", 1); ++ } ++ } ++ else if (TREE_CODE (decls[i]->decl) == VAR_DECL) ++ { ++ tree orig_var = decl->decl; ++ const char *tname = NULL; ++ if (DECL_NAME (orig_var)) ++ tname = IDENTIFIER_POINTER (DECL_NAME (orig_var)); ++ for (unsigned j = 0; j < max_split && type->newtype[j]; j++) ++ { ++ tree new_name = NULL; ++ char *name = NULL; ++ char id[10]; ++ sprintf (id, "%d", j); ++ if (tname) ++ { ++ name = concat (tname, ".reorg.", id, NULL); ++ new_name = get_identifier (name); ++ free (name); ++ } ++ tree newtype1 = reconstruct_complex_type (TREE_TYPE (orig_var), ++ type->newtype[j]); ++ decl->newdecl[j] = build_decl (DECL_SOURCE_LOCATION (orig_var), ++ VAR_DECL, new_name, newtype1); ++ copy_var_attributes (decl->newdecl[j], orig_var); ++ if (!is_global_var (orig_var)) ++ add_local_decl (cfun, decl->newdecl[j]); ++ else ++ varpool_node::add (decl->newdecl[j]); ++ statistics_counter_event (cfun, "Create new var decl", 1); ++ } ++ } ++ /* Paramater decls are already handled in create_new_functions. */ ++ else if (TREE_CODE (decls[i]->decl) == PARM_DECL) ++ ; ++ else ++ internal_error ("Unhandled declaration type stored"); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Created New decls for decl:\n"); ++ fprintf (dump_file, "\n"); ++ decls[i]->dump (dump_file); ++ fprintf (dump_file, "\n"); ++ for (unsigned j = 0; j < max_split && decls[i]->newdecl[j]; j++) ++ { ++ print_generic_expr (dump_file, decls[i]->newdecl[j]); ++ fprintf (dump_file, "\n"); ++ } ++ fprintf (dump_file, "\n"); ++ } ++ } ++ ++ set_cfun (NULL); ++} ++ ++/* Dump out the field structure to FILE. */ ++ ++void ++srfield::dump (FILE *f) ++{ ++ fprintf (f, "field (%d) { ", DECL_UID (fielddecl)); ++ fprintf (f, "base = "); ++ base->simple_dump (f); ++ fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset); ++ fprintf (f, ", type = "); ++ print_generic_expr (f, fieldtype); ++ if (type) ++ { ++ fprintf (f, "( srtype = "); ++ type->simple_dump (f); ++ fprintf (f, ")"); ++ } ++ fprintf (f, "\n}\n"); ++} ++ ++/* A simplified dump out the field structure to FILE. */ ++ ++void ++srfield::simple_dump (FILE *f) ++{ ++ fprintf (f, "field (%d)", DECL_UID (fielddecl)); ++} ++ ++/* Dump out the access structure to FILE. */ ++ ++void ++sraccess::dump (FILE *f) ++{ ++ fprintf (f, "access { "); ++ fprintf (f, "type = '("); ++ type->simple_dump (f); ++ fprintf (f, ")'"); ++ if (field) ++ { ++ fprintf (f, ", field = '("); ++ field->simple_dump (f); ++ fprintf (f, ")'"); ++ } ++ else ++ fprintf (f, ", whole type"); ++ fprintf (f, " in function: %s/%d", node->name (), node->order); ++ fprintf (f, ", stmt:\n"); ++ print_gimple_stmt (f, stmt, 0); ++ fprintf (f, "\n }\n"); ++} ++ ++/* Dump out the decl structure to FILE. */ ++ ++void ++srdecl::dump (FILE *file) ++{ ++ if (!func) ++ fprintf (file, "global "); ++ if (argumentnum != -1) ++ fprintf (file, "argument(%d) ", argumentnum); ++ fprintf (file, "decl: "); ++ print_generic_expr (file, decl); ++ fprintf (file, " type: "); ++ type->simple_dump (file); ++} ++ ++} // namespace struct_reorg ++ ++ ++namespace { ++ ++struct ipa_struct_reorg ++{ ++public: ++ // Constructors ++ ipa_struct_reorg (void) ++ : current_function (NULL), ++ done_recording (false) ++ {} ++ ++ // Public methods ++ unsigned execute (void); ++ void mark_type_as_escape (tree type, escape_type, gimple *stmt = NULL); ++private: ++ // Fields ++ auto_vec_del types; ++ auto_vec_del functions; ++ srglobal globals; ++ srfunction *current_function; ++ ++ bool done_recording; ++ ++ // Private methods ++ void dump_types (FILE *f); ++ void dump_types_escaped (FILE *f); ++ void dump_functions (FILE *f); ++ void record_accesses (void); ++ void detect_cycles (void); ++ bool walk_field_for_cycles (srtype *); ++ void prune_escaped_types (void); ++ void propagate_escape (void); ++ void analyze_types (void); ++ void clear_visited (void); ++ bool create_new_types (void); ++ void restore_field_type (void); ++ void create_new_decls (void); ++ srdecl *find_decl (tree); ++ void create_new_functions (void); ++ void create_new_args (cgraph_node *new_node); ++ unsigned rewrite_functions (void); ++ srdecl *record_var (tree decl, ++ escape_type escapes = does_not_escape, ++ int arg = -1); ++ srfunction *record_function (cgraph_node *node); ++ srfunction *find_function (cgraph_node *node); ++ srtype *record_type (tree type); ++ void process_union (tree type); ++ srtype *find_type (tree type); ++ void maybe_record_stmt (cgraph_node *, gimple *); ++ void maybe_record_assign (cgraph_node *, gassign *); ++ void maybe_record_call (cgraph_node *, gcall *); ++ void maybe_record_allocation_site (cgraph_node *, gimple *); ++ void record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt); ++ void mark_expr_escape (tree, escape_type, gimple *stmt); ++ tree allocate_size (srtype *t, gimple *stmt); ++ ++ void mark_decls_in_as_not_needed (tree fn); ++ ++ bool rewrite_stmt (gimple *, gimple_stmt_iterator *); ++ bool rewrite_assign (gassign *, gimple_stmt_iterator *); ++ bool rewrite_call (gcall *, gimple_stmt_iterator *); ++ bool rewrite_cond (gcond *, gimple_stmt_iterator *); ++ bool rewrite_debug (gimple *, gimple_stmt_iterator *); ++ bool rewrite_phi (gphi *); ++ bool rewrite_expr (tree expr, ++ tree newexpr[max_split], ++ bool ignore_missing_decl = false); ++ bool rewrite_lhs_rhs (tree lhs, tree rhs, tree newlhs[max_split], ++ tree newrhs[max_split]); ++ bool get_type_field (tree expr, tree &base, bool &indirect, ++ srtype *&type, srfield *&field, ++ bool &realpart, bool &imagpart, ++ bool &address, bool should_create = false, ++ bool can_escape = false); ++ bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t); ++ ++ void check_definition (srdecl *decl, vec &); ++ void check_uses (srdecl *decl, vec &); ++ void check_use (srdecl *decl, gimple *stmt, vec &); ++ void check_type_and_push (tree newdecl, srtype *type, ++ vec &worklist, gimple *stmt); ++ void check_other_side (srdecl *decl, tree other, gimple *stmt, ++ vec &worklist); ++ ++ void find_vars (gimple *stmt); ++ void find_var (tree expr, gimple *stmt); ++ void mark_types_asm (gasm *astmt); ++ ++ bool has_rewritten_type (srfunction *); ++ void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt); ++}; ++ ++/* Dump all of the recorded types to file F. */ ++ ++void ++ipa_struct_reorg::dump_types (FILE *f) ++{ ++ unsigned i; ++ srtype *type; ++ FOR_EACH_VEC_ELT (types, i, type) ++ { ++ type->dump (f); ++ } ++ fprintf (f, "\n"); ++} ++ ++/* Dump all of the recorded types to file F. */ ++ ++void ++ipa_struct_reorg::dump_types_escaped (FILE *f) ++{ ++ unsigned i; ++ srtype *type; ++ FOR_EACH_VEC_ELT (types, i, type) ++ { ++ if (type->has_escaped ()) ++ { ++ type->simple_dump (f); ++ fprintf (f, " has escaped: \"%s\"\n", type->escape_reason ()); ++ } ++ } ++ fprintf (f, "\n"); ++} ++ ++/* Dump all of the record functions to file F. */ ++ ++void ++ipa_struct_reorg::dump_functions (FILE *f) ++{ ++ unsigned i; ++ srfunction *fn; ++ ++ fprintf (f, "\n\n"); ++ globals.dump (f); ++ fprintf (f, "\n\n"); ++ FOR_EACH_VEC_ELT (functions, i, fn) ++ { ++ fn->dump (f); ++ fprintf (f, "\n"); ++ } ++ fprintf (f, "\n\n"); ++} ++ ++/* Find the recorded srtype corresponding to TYPE. */ ++ ++srtype * ++ipa_struct_reorg::find_type (tree type) ++{ ++ unsigned i; ++ /* Get the main variant as we are going ++ to find that type only. */ ++ type = TYPE_MAIN_VARIANT (type); ++ ++ srtype *type1; ++ // Search for the type to see if it is already there. ++ FOR_EACH_VEC_ELT (types, i, type1) ++ { ++ if (types_compatible_p (type1->type, type)) ++ return type1; ++ } ++ return NULL; ++} ++ ++/* Is TYPE a volatile type or one which points ++ to a volatile type. */ ++ ++static bool ++isvolatile_type (tree type) ++{ ++ if (TYPE_VOLATILE (type)) ++ return true; ++ while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) ++ { ++ type = TREE_TYPE (type); ++ if (TYPE_VOLATILE (type)) ++ return true; ++ } ++ return false; ++} ++ ++/* Is TYPE an array type or points to an array type. */ ++ ++static bool ++isarraytype (tree type) ++{ ++ if (TREE_CODE (type) == ARRAY_TYPE) ++ return true; ++ while (POINTER_TYPE_P (type)) ++ { ++ type = TREE_TYPE (type); ++ if (TREE_CODE (type) == ARRAY_TYPE) ++ return true; ++ } ++ return false; ++} ++ ++/* Is TYPE a pointer to another pointer. */ ++ ++static bool ++isptrptr (tree type) ++{ ++ bool firstptr = false; ++ while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) ++ { ++ if (POINTER_TYPE_P (type)) ++ { ++ if (firstptr) ++ return true; ++ firstptr = true; ++ } ++ type = TREE_TYPE (type); ++ } ++ return false; ++} ++ ++/* Return the escape type which corresponds to if ++ this is an volatile type, an array type or a pointer ++ to a pointer type. */ ++ ++static escape_type ++escape_type_volatile_array_or_ptrptr (tree type) ++{ ++ if (isvolatile_type (type)) ++ return escape_volatile; ++ if (isarraytype (type)) ++ return escape_array; ++ if (isptrptr (type)) ++ return escape_ptr_ptr; ++ return does_not_escape; ++} ++ ++/* Record TYPE if not already recorded. */ ++ ++srtype * ++ipa_struct_reorg::record_type (tree type) ++{ ++ unsigned typeuid; ++ ++ /* Get the main variant as we are going ++ to record that type only. */ ++ type = TYPE_MAIN_VARIANT (type); ++ typeuid = TYPE_UID (type); ++ ++ srtype *type1; ++ ++ type1 = find_type (type); ++ if (type1) ++ return type1; ++ ++ /* If already done recording just return NULL. */ ++ if (done_recording) ++ return NULL; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Recording new type: %u.\n", typeuid); ++ ++ type1 = new srtype (type); ++ types.safe_push (type1); ++ ++ /* If the type has an user alignment set, ++ that means the user most likely already setup the type. */ ++ if (TYPE_USER_ALIGN (type)) ++ type1->mark_escape (escape_user_alignment, NULL); ++ ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ tree t = TREE_TYPE (field); ++ process_union (t); ++ if (TREE_CODE (inner_type (t)) == UNION_TYPE ++ || TREE_CODE (inner_type (t)) == QUAL_UNION_TYPE) ++ type1->mark_escape (escape_union, NULL); ++ if (isvolatile_type (t)) ++ type1->mark_escape (escape_volatile, NULL); ++ escape_type e = escape_type_volatile_array_or_ptrptr (t); ++ if (e != does_not_escape) ++ type1->mark_escape (e, NULL); ++ if (handled_type (t)) ++ { ++ srtype *t1 = record_type (inner_type (t)); ++ srfield *f = type1->find_field (int_byte_position (field)); ++ /* We might have an variable sized type which ++ we don't set the handle. */ ++ if (f) ++ { ++ f->type = t1; ++ t1->add_field_site (f); ++ } ++ if (t1 == type1) ++ type1->mark_escape (escape_rescusive_type, NULL); ++ } ++ } ++ } ++ ++ return type1; ++} ++ ++/* Mark TYPE as escaping with ESCAPES as the reason. */ ++ ++void ++ipa_struct_reorg::mark_type_as_escape (tree type, ++ escape_type escapes, ++ gimple *stmt) ++{ ++ if (handled_type (type)) ++ { ++ srtype *stype = record_type (inner_type (type)); ++ ++ if (!stype) ++ return; ++ ++ stype->mark_escape (escapes, stmt); ++ } ++} ++ ++/* Maybe process the union of type TYPE, such that marking all of the fields' ++ types as being escaping. */ ++ ++void ++ipa_struct_reorg::process_union (tree type) ++{ ++ static hash_set unions_recorded; ++ ++ type = inner_type (type); ++ if (TREE_CODE (type) != UNION_TYPE ++ && TREE_CODE (type) != QUAL_UNION_TYPE) ++ return; ++ ++ type = TYPE_MAIN_VARIANT (type); ++ ++ /* We already processed this type. */ ++ if (unions_recorded.add (type)) ++ return; ++ ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ mark_type_as_escape (TREE_TYPE (field), escape_union); ++ process_union (TREE_TYPE (field)); ++ } ++ } ++} ++ ++/* Used by record_var function as a callback to walk_tree. ++ Mark the type as escaping if it has expressions which ++ cannot be converted for global initializations. */ ++ ++static tree ++record_init_types (tree *tp, int *walk_subtrees, void *data) ++{ ++ ipa_struct_reorg *c = (ipa_struct_reorg *)data; ++ switch (TREE_CODE (*tp)) ++ { ++ CASE_CONVERT: ++ case COMPONENT_REF: ++ case VIEW_CONVERT_EXPR: ++ case ARRAY_REF: ++ { ++ tree typeouter = TREE_TYPE (*tp); ++ tree typeinner = TREE_TYPE (TREE_OPERAND (*tp, 0)); ++ c->mark_type_as_escape (typeouter, escape_via_global_init); ++ c->mark_type_as_escape (typeinner, escape_via_global_init); ++ break; ++ } ++ case INTEGER_CST: ++ if (!integer_zerop (*tp)) ++ c->mark_type_as_escape (TREE_TYPE (*tp), escape_via_global_init); ++ break; ++ case VAR_DECL: ++ case PARM_DECL: ++ case FIELD_DECL: ++ c->mark_type_as_escape (TREE_TYPE (*tp), escape_via_global_init); ++ *walk_subtrees = false; ++ break; ++ default: ++ *walk_subtrees = true; ++ break; ++ } ++ return NULL_TREE; ++} ++ ++/* Record var DECL; optionally specify the escape reason and the argument ++ number in a function. */ ++ ++srdecl * ++ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) ++{ ++ srtype *type; ++ srdecl *sd = NULL; ++ ++ process_union (TREE_TYPE (decl)); ++ ++ if (handled_type (TREE_TYPE (decl))) ++ { ++ type = record_type (inner_type (TREE_TYPE (decl))); ++ escape_type e; ++ ++ if (done_recording && !type) ++ return NULL; ++ ++ gcc_assert (type); ++ if (TREE_CODE (decl) == VAR_DECL && is_global_var (decl)) ++ sd = globals.record_decl (type, decl, arg); ++ else ++ { ++ gcc_assert (current_function); ++ sd = current_function->record_decl (type, decl, arg); ++ } ++ ++ /* If the variable has the "used" attribute, ++ then treat the type as escaping. */ ++ if (escapes != does_not_escape) ++ e = escapes; ++ else if (TREE_CODE (decl) != SSA_NAME && DECL_PRESERVE_P (decl)) ++ e = escape_marked_as_used; ++ else if (TREE_THIS_VOLATILE (decl)) ++ e = escape_volatile; ++ else if (TREE_CODE (decl) != SSA_NAME && DECL_USER_ALIGN (decl)) ++ e = escape_user_alignment; ++ else if (TREE_CODE (decl) != SSA_NAME && TREE_STATIC (decl) ++ && TREE_PUBLIC (decl)) ++ e = escape_via_global_var; ++ /* We don't have an initlizer. */ ++ else if (TREE_CODE (decl) != SSA_NAME ++ && DECL_INITIAL (decl) == error_mark_node) ++ e = escape_via_global_var; ++ else ++ e = escape_type_volatile_array_or_ptrptr (TREE_TYPE (decl)); ++ ++ if (e != does_not_escape) ++ type->mark_escape (e, NULL); ++ } ++ ++ /* Record the initial usage of variables as types escapes. */ ++ if (TREE_CODE (decl) != SSA_NAME && TREE_STATIC (decl) ++ && DECL_INITIAL (decl)) ++ { ++ walk_tree_without_duplicates (&DECL_INITIAL (decl), ++ record_init_types, this); ++ if (!integer_zerop (DECL_INITIAL (decl)) ++ && DECL_INITIAL (decl) != error_mark_node) ++ mark_type_as_escape (TREE_TYPE (decl), escape_via_global_init); ++ } ++ return sd; ++} ++ ++/* Find void* ssa_names which are used inside MEM[] or if we have &a.c, ++ mark the type as escaping. */ ++ ++void ++ipa_struct_reorg::find_var (tree expr, gimple *stmt) ++{ ++ /* If we have VCE mark the outer type as escaping and the inner one ++ Also mark the inner most operand. */ ++ if (TREE_CODE (expr) == VIEW_CONVERT_EXPR) ++ { ++ mark_type_as_escape (TREE_TYPE (expr), escape_vce, stmt); ++ mark_type_as_escape (TREE_TYPE (TREE_OPERAND (expr, 0)), ++ escape_vce, stmt); ++ } ++ ++ /* If we have &b.c then we need to mark the type of b ++ as escaping as tracking a will be hard. */ ++ if (TREE_CODE (expr) == ADDR_EXPR ++ || TREE_CODE (expr) == VIEW_CONVERT_EXPR) ++ { ++ tree r = TREE_OPERAND (expr, 0); ++ if (handled_component_p (r) ++ || TREE_CODE (r) == MEM_REF) ++ { ++ while (handled_component_p (r) ++ || TREE_CODE (r) == MEM_REF) ++ { ++ if (TREE_CODE (r) == VIEW_CONVERT_EXPR) ++ { ++ mark_type_as_escape (TREE_TYPE (r), escape_vce, stmt); ++ mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 0)), ++ escape_vce, stmt); ++ } ++ if (TREE_CODE (r) == MEM_REF) ++ mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 1)), ++ escape_addr, stmt); ++ r = TREE_OPERAND (r, 0); ++ } ++ mark_expr_escape (r, escape_addr, stmt); ++ } ++ } ++ ++ tree base; ++ bool indirect; ++ srtype *type; ++ srfield *field; ++ bool realpart, imagpart, address; ++ get_type_field (expr, base, indirect, type, field, ++ realpart, imagpart, address, true, true); ++} ++ ++void ++ipa_struct_reorg::find_vars (gimple *stmt) ++{ ++ gasm *astmt; ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_ASSIGN: ++ if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS ++ || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs = gimple_assign_rhs1 (stmt); ++ find_var (gimple_assign_lhs (stmt), stmt); ++ find_var (gimple_assign_rhs1 (stmt), stmt); ++ if (TREE_CODE (lhs) == SSA_NAME ++ && VOID_POINTER_P (TREE_TYPE (lhs)) ++ && handled_type (TREE_TYPE (rhs))) ++ { ++ srtype *t = find_type (inner_type (TREE_TYPE (rhs))); ++ srdecl *d = find_decl (lhs); ++ if (!d && t) ++ current_function->record_decl (t, lhs, -1); ++ } ++ if (TREE_CODE (rhs) == SSA_NAME ++ && VOID_POINTER_P (TREE_TYPE (rhs)) ++ && handled_type (TREE_TYPE (lhs))) ++ { ++ srtype *t = find_type (inner_type (TREE_TYPE (lhs))); ++ srdecl *d = find_decl (rhs); ++ if (!d && t) ++ current_function->record_decl (t, rhs, -1); ++ } ++ } ++ break; ++ ++ case GIMPLE_CALL: ++ if (gimple_call_lhs (stmt)) ++ find_var (gimple_call_lhs (stmt), stmt); ++ ++ if (gimple_call_chain (stmt)) ++ find_var (gimple_call_chain (stmt), stmt); ++ ++ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) ++ find_var (gimple_call_arg (stmt, i), stmt); ++ break; ++ ++ case GIMPLE_ASM: ++ astmt = as_a (stmt); ++ for (unsigned i = 0; i < gimple_asm_ninputs (astmt); i++) ++ find_var (TREE_VALUE (gimple_asm_input_op (astmt, i)), stmt); ++ for (unsigned i = 0; i < gimple_asm_noutputs (astmt); i++) ++ find_var (TREE_VALUE (gimple_asm_output_op (astmt, i)), stmt); ++ mark_types_asm (astmt); ++ break; ++ ++ case GIMPLE_RETURN: ++ { ++ tree expr = gimple_return_retval (as_a (stmt)); ++ if (expr) ++ find_var (expr, stmt); ++ /* return &a; should mark the type of a as escaping ++ through a return. */ ++ if (expr && TREE_CODE (expr) == ADDR_EXPR) ++ { ++ expr = TREE_OPERAND (expr, 0); ++ srdecl *d = find_decl (expr); ++ if (d) ++ d->type->mark_escape (escape_return, stmt); ++ } ++ } ++ break; ++ ++ default: ++ break; ++ } ++} ++ ++/* Maybe record access of statement for further analaysis. */ ++ ++void ++ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt) ++{ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_ASSIGN: ++ maybe_record_assign (node, as_a (stmt)); ++ break; ++ case GIMPLE_CALL: ++ maybe_record_call (node, as_a (stmt)); ++ break; ++ case GIMPLE_DEBUG: ++ break; ++ case GIMPLE_GOTO: ++ case GIMPLE_SWITCH: ++ break; ++ default: ++ break; ++ } ++} ++ ++/* This function checks whether ARG is a result of multiplication ++ of some number by STRUCT_SIZE. If yes, the function returns true ++ and this number is filled into NUM. */ ++ ++static bool ++is_result_of_mult (tree arg, tree *num, tree struct_size) ++{ ++ if (!struct_size ++ || TREE_CODE (struct_size) != INTEGER_CST ++ || integer_zerop (struct_size)) ++ return false; ++ ++ /* If we have a integer, just check if it is a multiply of STRUCT_SIZE. */ ++ if (TREE_CODE (arg) == INTEGER_CST) ++ { ++ if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg, struct_size))) ++ { ++ *num = size_binop (FLOOR_DIV_EXPR, arg, struct_size); ++ return true; ++ } ++ return false; ++ } ++ gimple *size_def_stmt = SSA_NAME_DEF_STMT (arg); ++ ++ /* If the allocation statement was of the form ++ D.2229_10 = (D.2228_9); ++ then size_def_stmt can be D.2228_9 = num.3_8 * 8; */ ++ ++ while (size_def_stmt && is_gimple_assign (size_def_stmt)) ++ { ++ tree lhs = gimple_assign_lhs (size_def_stmt); ++ ++ /* We expect temporary here. */ ++ if (!is_gimple_reg (lhs)) ++ return false; ++ ++ // FIXME: this should handle SHIFT also. ++ if (gimple_assign_rhs_code (size_def_stmt) == PLUS_EXPR) ++ { ++ tree num1, num2; ++ tree arg0 = gimple_assign_rhs1 (size_def_stmt); ++ tree arg1 = gimple_assign_rhs2 (size_def_stmt); ++ if (!is_result_of_mult (arg0, &num1, struct_size)) ++ return false; ++ if (!is_result_of_mult (arg1, &num2, struct_size)) ++ return false; ++ *num = size_binop (PLUS_EXPR, num1, num2); ++ return true; ++ } ++ else if (gimple_assign_rhs_code (size_def_stmt) == MULT_EXPR) ++ { ++ tree arg0 = gimple_assign_rhs1 (size_def_stmt); ++ tree arg1 = gimple_assign_rhs2 (size_def_stmt); ++ tree num1; ++ ++ if (is_result_of_mult (arg0, &num1, struct_size)) ++ { ++ *num = size_binop (MULT_EXPR, arg1, num1); ++ return true; ++ } ++ if (is_result_of_mult (arg1, &num1, struct_size)) ++ { ++ *num = size_binop (MULT_EXPR, arg0, num1); ++ return true; ++ } ++ ++ *num = NULL_TREE; ++ return false; ++ } ++ else if (gimple_assign_rhs_code (size_def_stmt) == SSA_NAME) ++ { ++ arg = gimple_assign_rhs1 (size_def_stmt); ++ size_def_stmt = SSA_NAME_DEF_STMT (arg); ++ } ++ else ++ { ++ *num = NULL_TREE; ++ return false; ++ } ++ } ++ ++ *num = NULL_TREE; ++ return false; ++} ++ ++/* Return TRUE if STMT is an allocation statement that is handled. */ ++ ++static bool ++handled_allocation_stmt (gimple *stmt) ++{ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN)) ++ return true; ++ return false; ++} ++ ++/* Returns the allocated size / T size for STMT. That is the number of ++ elements in the array allocated. */ ++ ++tree ++ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt) ++{ ++ if (!stmt ++ || gimple_code (stmt) != GIMPLE_CALL ++ || !handled_allocation_stmt (stmt)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nNot a allocate statment:\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ return NULL; ++ } ++ ++ if (type->has_escaped ()) ++ return NULL; ++ ++ tree struct_size = TYPE_SIZE_UNIT (type->type); ++ ++ tree size = gimple_call_arg (stmt, 0); ++ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC)) ++ size = gimple_call_arg (stmt, 1); ++ else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) ++ { ++ tree arg1; ++ arg1 = gimple_call_arg (stmt, 1); ++ /* Check that second argument is a constant equal to ++ the size of structure. */ ++ if (operand_equal_p (arg1, struct_size, 0)) ++ return size; ++ /* Check that first argument is a constant equal to ++ the size of structure. */ ++ if (operand_equal_p (size, struct_size, 0)) ++ return arg1; ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\ncalloc the correct size:\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ return NULL; ++ } ++ ++ tree num; ++ if (!is_result_of_mult (size, &num, struct_size)) ++ return NULL; ++ ++ return num; ++} ++ ++void ++ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, ++ gimple *stmt) ++{ ++ gcc_assert (TREE_CODE (side) == SSA_NAME || TREE_CODE (side) == ADDR_EXPR); ++ srtype *type = NULL; ++ if (handled_type (TREE_TYPE (other))) ++ type = record_type (inner_type (TREE_TYPE (other))); ++ if (TREE_CODE (side) == ADDR_EXPR) ++ side = TREE_OPERAND (side, 0); ++ srdecl *d = find_decl (side); ++ if (!type) ++ { ++ if (!d) ++ return; ++ if (TREE_CODE (side) == SSA_NAME ++ && VOID_POINTER_P (TREE_TYPE (side))) ++ return; ++ d->type->mark_escape (escape_cast_another_ptr, stmt); ++ return; ++ } ++ ++ if (!d) ++ { ++ if (VOID_POINTER_P (TREE_TYPE (side)) ++ && TREE_CODE (side) == SSA_NAME) ++ current_function->record_decl (type, side, -1); ++ else ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ } ++ else if (type != d->type) ++ { ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ d->type->mark_escape (escape_cast_another_ptr, stmt); ++ } ++} ++ ++/* Record accesses in an assignment statement STMT. */ ++ ++void ++ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt) ++{ ++ if (gimple_clobber_p (stmt)) ++ { ++ record_stmt_expr (gimple_assign_lhs (stmt), node, stmt); ++ return; ++ } ++ ++ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree num; ++ if (!handled_type (TREE_TYPE (lhs))) ++ return; ++ /* Check if rhs2 is a multiplication of the size of the type. */ ++ if (is_result_of_mult (rhs2, &num, ++ TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))))) ++ { ++ record_stmt_expr (lhs, node, stmt); ++ record_stmt_expr (rhs1, node, stmt); ++ } ++ else ++ { ++ mark_expr_escape (lhs, escape_non_multiply_size, stmt); ++ mark_expr_escape (rhs1, escape_non_multiply_size, stmt); ++ } ++ return; ++ } ++ /* Copies, References, Taking addresses. */ ++ if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs = gimple_assign_rhs1 (stmt); ++ /* If we have a = &b.c then we need to mark the type of b ++ as escaping as tracking a will be hard. */ ++ if (TREE_CODE (rhs) == ADDR_EXPR) ++ { ++ tree r = TREE_OPERAND (rhs, 0); ++ if (handled_component_p (r)) ++ { ++ while (handled_component_p (r)) ++ r = TREE_OPERAND (r, 0); ++ mark_expr_escape (r, escape_addr, stmt); ++ return; ++ } ++ } ++ if ((TREE_CODE (rhs) == SSA_NAME || TREE_CODE (rhs) == ADDR_EXPR)) ++ maybe_mark_or_record_other_side (rhs, lhs, stmt); ++ if (TREE_CODE (lhs) == SSA_NAME) ++ maybe_mark_or_record_other_side (lhs, rhs, stmt); ++ } ++} ++ ++static tree ++get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, ++ bool &realpart, bool &imagpart, ++ tree &accesstype) ++{ ++ offset = 0; ++ realpart = false; ++ imagpart = false; ++ accesstype = NULL_TREE; ++ if (TREE_CODE (e) == REALPART_EXPR) ++ { ++ e = TREE_OPERAND (e, 0); ++ realpart = true; ++ } ++ if (TREE_CODE (e) == IMAGPART_EXPR) ++ { ++ e = TREE_OPERAND (e, 0); ++ imagpart = true; ++ } ++ tree expr = e; ++ while (true) ++ { ++ switch (TREE_CODE (expr)) ++ { ++ case COMPONENT_REF: ++ { ++ tree field = TREE_OPERAND (expr, 1); ++ tree field_off = byte_position (field); ++ if (TREE_CODE (field_off) != INTEGER_CST) ++ return NULL; ++ offset += tree_to_shwi (field_off); ++ expr = TREE_OPERAND (expr, 0); ++ accesstype = NULL; ++ break; ++ } ++ case MEM_REF: ++ { ++ tree field_off = TREE_OPERAND (expr, 1); ++ gcc_assert (TREE_CODE (field_off) == INTEGER_CST); ++ /* So we can mark the types as escaping if different. */ ++ accesstype = TREE_TYPE (field_off); ++ offset += tree_to_uhwi (field_off); ++ return TREE_OPERAND (expr, 0); ++ } ++ default: ++ return expr; ++ } ++ } ++} ++ ++/* Return true if EXPR was accessing the whole type T. */ ++ ++bool ++ipa_struct_reorg::wholeaccess (tree expr, tree base, ++ tree accesstype, srtype *t) ++{ ++ if (expr == base) ++ return true; ++ ++ if (TREE_CODE (expr) == ADDR_EXPR && TREE_OPERAND (expr, 0) == base) ++ return true; ++ ++ if (!accesstype) ++ return false; ++ ++ if (!types_compatible_p (TREE_TYPE (expr), TREE_TYPE (accesstype))) ++ return false; ++ ++ if (!handled_type (TREE_TYPE (expr))) ++ return false; ++ ++ srtype *other_type = find_type (inner_type (TREE_TYPE (expr))); ++ ++ if (t == other_type) ++ return true; ++ ++ return false; ++} ++ ++bool ++ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, ++ srtype *&type, srfield *&field, ++ bool &realpart, bool &imagpart, ++ bool &address, bool should_create, ++ bool can_escape) ++{ ++ HOST_WIDE_INT offset; ++ tree accesstype; ++ address = false; ++ bool mark_as_bit_field = false; ++ ++ if (TREE_CODE (expr) == BIT_FIELD_REF) ++ { ++ expr = TREE_OPERAND (expr, 0); ++ mark_as_bit_field = true; ++ } ++ ++ base = get_ref_base_and_offset (expr, offset, realpart, imagpart, ++ accesstype); ++ ++ /* Variable access, unkown type. */ ++ if (base == NULL) ++ return false; ++ ++ if (TREE_CODE (base) == ADDR_EXPR) ++ { ++ address = true; ++ base = TREE_OPERAND (base, 0); ++ } ++ ++ if (offset != 0 && accesstype) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Non zero offset (%d) with MEM.\n", (int)offset); ++ print_generic_expr (dump_file, expr); ++ fprintf (dump_file, "\n"); ++ print_generic_expr (dump_file, base); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ ++ srdecl *d = find_decl (base); ++ srtype *t; ++ ++ if (integer_zerop (base)) ++ { ++ gcc_assert (!d); ++ if (!accesstype) ++ return false; ++ t = find_type (inner_type (inner_type (accesstype))); ++ if (!t && should_create && handled_type (accesstype)) ++ t = record_type (inner_type (accesstype)); ++ if (!t) ++ return false; ++ } ++ else if (!d && accesstype) ++ { ++ if (!should_create) ++ return false; ++ if (!handled_type (accesstype)) ++ return false; ++ t = find_type (inner_type (inner_type (accesstype))); ++ if (!t) ++ t = record_type (inner_type (accesstype)); ++ if (!t || t->has_escaped ()) ++ return false; ++ /* If base is not void* mark the type as escaping. */ ++ if (!VOID_POINTER_P (TREE_TYPE (base))) ++ { ++ gcc_assert (can_escape); ++ t->mark_escape (escape_cast_another_ptr, NULL); ++ return false; ++ } ++ if (TREE_CODE (base) == SSA_NAME) ++ current_function->record_decl (t, base, -1); ++ } ++ else if (!d) ++ return false; ++ else ++ t = d->type; ++ ++ if (t->has_escaped ()) ++ return false; ++ ++ if (mark_as_bit_field) ++ { ++ gcc_assert (can_escape); ++ t->mark_escape (escape_bitfields, NULL); ++ return false; ++ } ++ ++ if (wholeaccess (expr, base, accesstype, t)) ++ { ++ field = NULL; ++ type = t; ++ indirect = accesstype != NULL; ++ return true; ++ } ++ ++ srfield *f = t->find_field (offset); ++ if (!f) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nunkown field\n"); ++ print_generic_expr (dump_file, expr); ++ fprintf (dump_file, "\n"); ++ print_generic_expr (dump_file, base); ++ fprintf (dump_file, "\n"); ++ } ++ gcc_assert (can_escape); ++ t->mark_escape (escape_unkown_field, NULL); ++ return false; ++ } ++ if (!types_compatible_p (f->fieldtype, TREE_TYPE (expr))) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nfieldtype = "); ++ print_generic_expr (dump_file, f->fieldtype); ++ fprintf (dump_file, "\naccess type = "); ++ print_generic_expr (dump_file, TREE_TYPE (expr)); ++ fprintf (dump_file, "original expr = "); ++ print_generic_expr (dump_file, expr); ++ fprintf (dump_file, "\n"); ++ } ++ gcc_assert (can_escape); ++ t->mark_escape (escape_unkown_field, NULL); ++ return false; ++ } ++ field = f; ++ type = t; ++ indirect = accesstype != NULL; ++ return true; ++} ++ ++/* Mark the type used in EXPR as escaping. */ ++ ++void ++ipa_struct_reorg::mark_expr_escape (tree expr, escape_type escapes, ++ gimple *stmt) ++{ ++ tree base; ++ bool indirect; ++ srtype *type; ++ srfield *field; ++ bool realpart, imagpart, address; ++ if (!get_type_field (expr, base, indirect, type, field, ++ realpart, imagpart, address)) ++ return; ++ ++ type->mark_escape (escapes, stmt); ++} ++ ++/* Record accesses in a call statement STMT. */ ++ ++void ++ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt) ++{ ++ tree argtype; ++ tree fndecl; ++ escape_type escapes = does_not_escape; ++ bool free_or_realloc = gimple_call_builtin_p (stmt, BUILT_IN_FREE) ++ || gimple_call_builtin_p (stmt, BUILT_IN_REALLOC); ++ ++ /* We check allocation sites in a different location. */ ++ if (handled_allocation_stmt (stmt)) ++ return; ++ ++ /* A few cases here: ++ 1) assigned from the lhs ++ 2) Used in argument ++ If a function being called is global (or indirect) ++ then we reject the types as being escaping. */ ++ ++ if (tree chain = gimple_call_chain (stmt)) ++ record_stmt_expr (chain, node, stmt); ++ ++ /* Assigned from LHS. */ ++ if (tree lhs = gimple_call_lhs (stmt)) ++ { ++ /* FIXME: handle return types. */ ++ mark_type_as_escape (TREE_TYPE (lhs), escape_return); ++ } ++ ++ /* If we have an internal call, just record the stmt. */ ++ if (gimple_call_internal_p (stmt)) ++ { ++ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) ++ record_stmt_expr (gimple_call_arg (stmt, i), node, stmt); ++ return; ++ } ++ ++ fndecl = gimple_call_fndecl (stmt); ++ ++ /* If we have an indrect call, just mark the types as escape. */ ++ if (!fndecl) ++ escapes = escape_pointer_function; ++ /* Non local functions cause escape except for calls to free ++ and realloc. ++ FIXME: should support function annotations too. */ ++ else if (!free_or_realloc ++ && !cgraph_node::local_info_node (fndecl)->local) ++ escapes = escape_external_function; ++ else if (!free_or_realloc ++ && !cgraph_node::local_info_node (fndecl)->can_change_signature) ++ escapes = escape_cannot_change_signature; ++ /* FIXME: we should be able to handle functions in other partitions. */ ++ else if (symtab_node::get (fndecl)->in_other_partition) ++ escapes = escape_external_function; ++ ++ if (escapes != does_not_escape) ++ { ++ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) ++ mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)), ++ escapes); ++ return; ++ } ++ ++ argtype = TYPE_ARG_TYPES (gimple_call_fntype (stmt)); ++ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) ++ { ++ tree arg = gimple_call_arg (stmt, i); ++ if (argtype) ++ { ++ tree argtypet = TREE_VALUE (argtype); ++ if (!free_or_realloc ++ && VOID_POINTER_P (argtypet)) ++ mark_type_as_escape (TREE_TYPE (arg), escape_cast_void); ++ else ++ record_stmt_expr (arg, node, stmt); ++ } ++ else ++ mark_type_as_escape (TREE_TYPE (arg), escape_var_arg_function); ++ ++ argtype = argtype ? TREE_CHAIN (argtype) : NULL_TREE; ++ } ++} ++ ++void ++ipa_struct_reorg::record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt) ++{ ++ tree base; ++ bool indirect; ++ srtype *type; ++ srfield *field; ++ bool realpart, imagpart, address; ++ if (!get_type_field (expr, base, indirect, type, field, ++ realpart, imagpart, address)) ++ return; ++ ++ if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg)) ++ type->mark_escape (escape_non_optimize, stmt); ++ ++ /* Record it. */ ++ type->add_access (new sraccess (stmt, node, type, field)); ++} ++ ++/* Find function corresponding to NODE. */ ++ ++srfunction * ++ipa_struct_reorg::find_function (cgraph_node *node) ++{ ++ for (unsigned i = 0; i < functions.length (); i++) ++ if (functions[i]->node == node) ++ return functions[i]; ++ return NULL; ++} ++ ++void ++ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, ++ vec &worklist, ++ gimple *stmt) ++{ ++ if (integer_zerop (newdecl)) ++ return; ++ ++ if (TREE_CODE (newdecl) == ADDR_EXPR) ++ { ++ srdecl *d = find_decl (TREE_OPERAND (newdecl, 0)); ++ if (!d) ++ { ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ return; ++ } ++ if (d->type == type) ++ return; ++ ++ srtype *type1 = d->type; ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ type1->mark_escape (escape_cast_another_ptr, stmt); ++ return; ++ } ++ ++ srdecl *d = find_decl (newdecl); ++ if (!d) ++ { ++ if (TREE_CODE (newdecl) == INTEGER_CST) ++ { ++ type->mark_escape (escape_int_const, stmt); ++ return; ++ } ++ /* If we have a non void* or a decl (which is hard to track), ++ then mark the type as escaping. */ ++ if (!VOID_POINTER_P (TREE_TYPE (newdecl)) ++ || DECL_P (newdecl)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nunkown decl: "); ++ print_generic_expr (dump_file, newdecl); ++ fprintf (dump_file, " in type:\n"); ++ print_generic_expr (dump_file, TREE_TYPE (newdecl)); ++ fprintf (dump_file, "\n"); ++ } ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ return; ++ } ++ /* At this point there should only be unkown void* ssa names. */ ++ gcc_assert (TREE_CODE (newdecl) == SSA_NAME); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nrecording unkown decl: "); ++ print_generic_expr (dump_file, newdecl); ++ fprintf (dump_file, " as type:\n"); ++ type->simple_dump (dump_file); ++ fprintf (dump_file, "\n"); ++ } ++ d = current_function->record_decl (type, newdecl, -1); ++ worklist.safe_push (d); ++ return; ++ } ++ ++ /* Only add to the worklist if the decl is a SSA_NAME. */ ++ if (TREE_CODE (newdecl) == SSA_NAME) ++ worklist.safe_push (d); ++ if (d->type == type) ++ return; ++ ++ srtype *type1 = d->type; ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ type1->mark_escape (escape_cast_another_ptr, stmt); ++} ++ ++/* ++ 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) ++ a) if the SSA_NAME is sourced from a pointer plus, record the pointer and ++ check to make sure the addition was a multiple of the size. ++ check the pointer type too. ++ b) If the name is sourced from an allocation check the allocation ++ i) Add SSA_NAME (void*) to the worklist if allocated from realloc ++ c) if the name is from a param, make sure the param type was of the ++ original type ++ d) if the name is from a cast/assignment, make sure it is used as that ++ type or void* ++ i) If void* then push the ssa_name into worklist ++*/ ++void ++ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist) ++{ ++ tree ssa_name = decl->decl; ++ srtype *type = decl->type; ++ ++ /* ++ c) if the name is from a param, make sure the param type was of the ++ original type. ++ */ ++ if (SSA_NAME_IS_DEFAULT_DEF (ssa_name)) ++ { ++ tree var = SSA_NAME_VAR (ssa_name); ++ if (var ++ && TREE_CODE (var) == PARM_DECL ++ && VOID_POINTER_P (TREE_TYPE (ssa_name))) ++ type->mark_escape (escape_cast_void, NULL); ++ return; ++ } ++ gimple *stmt = SSA_NAME_DEF_STMT (ssa_name); ++ ++ /* ++ b) If the name is sourced from an allocation check the allocation ++ i) Add SSA_NAME (void*) to the worklist if allocated from realloc ++ */ ++ if (gimple_code (stmt) == GIMPLE_CALL) ++ { ++ /* For realloc, check the type of the argument. */ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) ++ check_type_and_push (gimple_call_arg (stmt, 0), type, worklist, stmt); ++ ++ if (!handled_allocation_stmt (stmt) ++ || !allocate_size (type, stmt)) ++ type->mark_escape (escape_return, stmt); ++ return; ++ } ++ /* If the SSA_NAME is sourced from an inline-asm, ++ just mark the type as escaping. */ ++ if (gimple_code (stmt) == GIMPLE_ASM) ++ { ++ type->mark_escape (escape_inline_asm, stmt); ++ return; ++ } ++ ++ /* If the SSA_NAME is sourced from a PHI check add ++ each name to the worklist and check to make sure ++ they are used correctly. */ ++ if (gimple_code (stmt) == GIMPLE_PHI) ++ { ++ for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) ++ check_type_and_push (gimple_phi_arg_def (stmt, i), ++ type, worklist, stmt); ++ return; ++ } ++ ++ gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN); ++ /* ++ a) if the SSA_NAME is sourced from a pointer plus, record the pointer and ++ check to make sure the addition was a multiple of the size. ++ check the pointer type too. ++ */ ++ ++ tree rhs = gimple_assign_rhs1 (stmt); ++ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree num; ++ if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type))) ++ type->mark_escape (escape_non_multiply_size, stmt); ++ ++ if (TREE_CODE (rhs) == SSA_NAME) ++ check_type_and_push (rhs, type, worklist, stmt); ++ return; ++ } ++ ++ /* Casts between pointers and integer are escaping. */ ++ if (gimple_assign_cast_p (stmt)) ++ { ++ type->mark_escape (escape_cast_int, stmt); ++ return; ++ } ++ ++ /* ++ d) if the name is from a cast/assignment, make sure it is used as that ++ type or void* ++ i) If void* then push the ssa_name into worklist ++ */ ++ gcc_assert (gimple_assign_single_p (stmt)); ++ check_other_side (decl, rhs, stmt, worklist); ++} ++ ++/* Mark the types used by the inline-asm as escaping. ++ It is unkown what happens inside an inline-asm. */ ++ ++void ++ipa_struct_reorg::mark_types_asm (gasm *astmt) ++{ ++ for (unsigned i = 0; i < gimple_asm_ninputs (astmt); i++) ++ { ++ tree v = TREE_VALUE (gimple_asm_input_op (astmt, i)); ++ /* If we have &b, just strip the & here. */ ++ if (TREE_CODE (v) == ADDR_EXPR) ++ v = TREE_OPERAND (v, 0); ++ mark_expr_escape (v, escape_inline_asm, astmt); ++ } ++ for (unsigned i = 0; i < gimple_asm_noutputs (astmt); i++) ++ { ++ tree v = TREE_VALUE (gimple_asm_output_op (astmt, i)); ++ /* If we have &b, just strip the & here. */ ++ if (TREE_CODE (v) == ADDR_EXPR) ++ v = TREE_OPERAND (v, 0); ++ mark_expr_escape (v, escape_inline_asm, astmt); ++ } ++} ++ ++void ++ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, ++ vec &worklist) ++{ ++ srtype *type = decl->type; ++ ++ if (TREE_CODE (other) == SSA_NAME ++ || DECL_P (other) ++ || TREE_CODE (other) == INTEGER_CST) ++ { ++ check_type_and_push (other, type, worklist, stmt); ++ return; ++ } ++ ++ tree t = TREE_TYPE (other); ++ if (!handled_type (t)) ++ { ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ return; ++ } ++ ++ srtype *t1 = find_type (inner_type (t)); ++ if (t1 == type) ++ { ++ tree base; ++ bool indirect; ++ srtype *type1; ++ srfield *field; ++ bool realpart, imagpart, address; ++ if (!get_type_field (other, base, indirect, type1, field, ++ realpart, imagpart, address)) ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ ++ return; ++ } ++ ++ if (t1) ++ t1->mark_escape (escape_cast_another_ptr, stmt); ++ ++ type->mark_escape (escape_cast_another_ptr, stmt); ++} ++ ++void ++ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, ++ vec &worklist) ++{ ++ srtype *type = decl->type; ++ ++ if (gimple_code (stmt) == GIMPLE_RETURN) ++ { ++ type->mark_escape (escape_return, stmt); ++ return; ++ } ++ /* If the SSA_NAME PHI check and add the src to the worklist and ++ check to make sure they are used correctly. */ ++ if (gimple_code (stmt) == GIMPLE_PHI) ++ { ++ check_type_and_push (gimple_phi_result (stmt), type, worklist, stmt); ++ return; ++ } ++ ++ if (gimple_code (stmt) == GIMPLE_ASM) ++ { ++ mark_types_asm (as_a (stmt)); ++ return; ++ } ++ ++ if (gimple_code (stmt) == GIMPLE_COND) ++ { ++ tree rhs1 = gimple_cond_lhs (stmt); ++ tree rhs2 = gimple_cond_rhs (stmt); ++ tree orhs = rhs1; ++ if (gimple_cond_code (stmt) != EQ_EXPR ++ && gimple_cond_code (stmt) != NE_EXPR) ++ { ++ mark_expr_escape (rhs1, escape_non_eq, stmt); ++ mark_expr_escape (rhs2, escape_non_eq, stmt); ++ } ++ if (rhs1 == decl->decl) ++ orhs = rhs2; ++ if (integer_zerop (orhs)) ++ return; ++ if (TREE_CODE (orhs) != SSA_NAME) ++ mark_expr_escape (rhs1, escape_non_eq, stmt); ++ check_type_and_push (orhs, type, worklist, stmt); ++ return; ++ } ++ ++ /* Casts between pointers and integer are escaping. */ ++ if (gimple_assign_cast_p (stmt)) ++ { ++ type->mark_escape (escape_cast_int, stmt); ++ return; ++ } ++ ++ /* We might have a_1 = ptr_2 == ptr_3; */ ++ if (is_gimple_assign (stmt) ++ && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison) ++ { ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree orhs = rhs1; ++ if (gimple_assign_rhs_code (stmt) != EQ_EXPR ++ && gimple_assign_rhs_code (stmt) != NE_EXPR) ++ { ++ mark_expr_escape (rhs1, escape_non_eq, stmt); ++ mark_expr_escape (rhs2, escape_non_eq, stmt); ++ } ++ if (rhs1 == decl->decl) ++ orhs = rhs2; ++ if (integer_zerop (orhs)) ++ return; ++ if (TREE_CODE (orhs) != SSA_NAME) ++ mark_expr_escape (rhs1, escape_non_eq, stmt); ++ check_type_and_push (orhs, type, worklist, stmt); ++ return; ++ } ++ ++ if (gimple_assign_single_p (stmt)) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs = gimple_assign_rhs1 (stmt); ++ /* Check if we have a_1 = b_2; that a_1 is in the correct type. */ ++ if (decl->decl == rhs) ++ { ++ check_other_side (decl, lhs, stmt, worklist); ++ return; ++ } ++ } ++ ++ if (is_gimple_assign (stmt) ++ && gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree lhs = gimple_assign_lhs (stmt); ++ tree num; ++ check_other_side (decl, lhs, stmt, worklist); ++ if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type))) ++ type->mark_escape (escape_non_multiply_size, stmt); ++ } ++} ++ ++/* ++ 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) ++ d) if the name is from a cast/assignment, make sure it is used as that ++ type or void* ++ i) If void* then push the ssa_name into worklist ++ e) if used in conditional check the other side ++ i) If the conditional is non NE/EQ then mark the type as non rejecting ++ f) Check if the use in a Pointer PLUS EXPR Is used by mulitplication ++ of its size ++ */ ++void ++ipa_struct_reorg::check_uses (srdecl *decl, vec &worklist) ++{ ++ tree ssa_name = decl->decl; ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name) ++ { ++ gimple *stmt = USE_STMT (use_p); ++ ++ if (is_gimple_debug (stmt)) ++ continue; ++ ++ check_use (decl, stmt, worklist); ++ } ++} ++ ++/* Record function corresponding to NODE. */ ++ ++srfunction * ++ipa_struct_reorg::record_function (cgraph_node *node) ++{ ++ function *fn; ++ tree parm, var; ++ unsigned int i; ++ srfunction *sfn; ++ escape_type escapes = does_not_escape; ++ ++ sfn = new srfunction (node); ++ functions.safe_push (sfn); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, ++ "\nRecording accesses and types from function: %s/%u\n", ++ node->name (), node->order); ++ ++ /* Nodes without a body are not interesting. Especially do not ++ visit clones at this point for now - we get duplicate decls ++ there for inline clones at least. */ ++ if (!node->has_gimple_body_p () || node->inlined_to) ++ return sfn; ++ ++ node->get_body (); ++ fn = DECL_STRUCT_FUNCTION (node->decl); ++ ++ if (!fn) ++ return sfn; ++ ++ current_function = sfn; ++ ++ if (DECL_PRESERVE_P (node->decl)) ++ escapes = escape_marked_as_used; ++ else if (!node->local) ++ escapes = escape_visible_function; ++ else if (!node->can_change_signature) ++ escapes = escape_cannot_change_signature; ++ else if (!tree_versionable_function_p (node->decl)) ++ escapes = escape_noclonable_function; ++ else if (!opt_for_fn (node->decl, flag_ipa_struct_reorg)) ++ escapes = escape_non_optimize; ++ ++ basic_block bb; ++ gimple_stmt_iterator si; ++ ++ /* Record the static chain decl. */ ++ if (fn->static_chain_decl) ++ { ++ srdecl *sd = record_var (fn->static_chain_decl, ++ escapes, -2); ++ if (sd) ++ { ++ /* Specify that this type is used by the static ++ chain so it cannot be split. */ ++ sd->type->chain_type = true; ++ sfn->add_arg (sd); ++ sd->type->add_function (sfn); ++ } ++ } ++ ++ /* Record the arguments. */ ++ for (parm = DECL_ARGUMENTS (node->decl), i = 0; ++ parm; ++ parm = DECL_CHAIN (parm), i++) ++ { ++ srdecl *sd = record_var (parm, escapes, i); ++ if (sd) ++ { ++ sfn->add_arg (sd); ++ sd->type->add_function (sfn); ++ } ++ } ++ ++ /* Mark the return type as escaping. */ ++ { ++ tree return_type = TREE_TYPE (TREE_TYPE (node->decl)); ++ mark_type_as_escape (return_type, escape_return, NULL); ++ } ++ ++ /* If the cfg does not exist for the function, ++ don't process the function. */ ++ if (!fn->cfg) ++ { ++ current_function = NULL; ++ return sfn; ++ } ++ ++ /* The following order is done for recording stage: ++ 0) Record all variables/SSA_NAMES that are of struct type ++ 1) Record MEM_REF/COMPONENT_REFs ++ a) Record SSA_NAMEs (void*) and record that as the accessed type. ++ */ ++ ++ push_cfun (fn); ++ ++ FOR_EACH_LOCAL_DECL (cfun, i, var) ++ { ++ if (TREE_CODE (var) != VAR_DECL) ++ continue; ++ ++ record_var (var); ++ } ++ ++ for (i = 1; i < num_ssa_names; ++i) ++ { ++ tree name = ssa_name (i); ++ if (!name ++ || has_zero_uses (name) ++ || virtual_operand_p (name)) ++ continue; ++ ++ record_var (name); ++ } ++ ++ /* Find the variables which are used via MEM_REF and are void* types. */ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) ++ { ++ gimple *stmt = gsi_stmt (si); ++ find_vars (stmt); ++ } ++ } ++ ++ auto_vec worklist; ++ for (unsigned i = 0; i < current_function->decls.length (); i++) ++ { ++ srdecl *decl = current_function->decls[i]; ++ if (TREE_CODE (decl->decl) == SSA_NAME) ++ { ++ decl->visited = false; ++ worklist.safe_push (decl); ++ } ++ } ++ ++/* ++ 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) ++ a) if the SSA_NAME is sourced from a pointer plus, record the pointer and ++ check to make sure the addition was a multiple of the size. ++ check the pointer type too. ++ b) If the name is sourced from an allocation check the allocation ++ i) Add SSA_NAME (void*) to the worklist if allocated from realloc ++ c) if the name is from a param, make sure the param type was of the ++ original type ++ d) if the name is used in a cast/assignment, make sure it is used as that ++ type or void* ++ i) If void* then push the ssa_name into worklist ++ e) if used in conditional check the other side ++ i) If the conditional is non NE/EQ then mark the type as non rejecting ++ f) Check if the use in a POinter PLUS EXPR Is used by mulitplication ++ of its size ++*/ ++ ++ while (!worklist.is_empty ()) ++ { ++ srdecl *decl = worklist.pop (); ++ if (decl->visited) ++ continue; ++ decl->visited = true; ++ check_definition (decl, worklist); ++ check_uses (decl, worklist); ++ } ++ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) ++ { ++ gimple *stmt = gsi_stmt (si); ++ maybe_record_stmt (node, stmt); ++ } ++ } ++ ++ pop_cfun (); ++ current_function = NULL; ++ return sfn; ++} ++ ++/* Record all accesses for all types including global variables. */ ++ ++void ++ipa_struct_reorg::record_accesses (void) ++{ ++ varpool_node *var; ++ cgraph_node *cnode; ++ ++ /* Record global (non-auto) variables first. */ ++ FOR_EACH_VARIABLE (var) ++ { ++ if (!var->real_symbol_p ()) ++ continue; ++ ++ /* Record all variables including the accesses inside a variable. */ ++ escape_type escapes = does_not_escape; ++ if (var->externally_visible || !var->definition) ++ escapes = escape_via_global_var; ++ if (var->in_other_partition) ++ escapes = escape_via_global_var; ++ if (!var->externally_visible && var->definition) ++ var->get_constructor (); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Recording global variable: "); ++ print_generic_expr (dump_file, var->decl); ++ fprintf (dump_file, "\n"); ++ } ++ record_var (var->decl, escapes); ++ } ++ ++ FOR_EACH_FUNCTION (cnode) ++ { ++ if (!cnode->real_symbol_p ()) ++ continue; ++ ++ /* Record accesses inside a function. */ ++ if (cnode->definition) ++ record_function (cnode); ++ } ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "all types (before pruning):\n"); ++ dump_types (dump_file); ++ fprintf (dump_file, "all functions (before pruning):\n"); ++ dump_functions (dump_file); ++ } ++ done_recording = true; ++} ++ ++/* A helper function to detect cycles (recusive) types. ++ Return TRUE if TYPE was a rescusive type. */ ++ ++bool ++ipa_struct_reorg::walk_field_for_cycles (srtype *type) ++{ ++ unsigned i; ++ srfield *field; ++ ++ type->visited = true; ++ if (type->escaped_rescusive ()) ++ return true; ++ ++ if (type->has_escaped ()) ++ return false; ++ ++ FOR_EACH_VEC_ELT (type->fields, i, field) ++ { ++ if (!field->type) ++ ; ++ else if (field->type->visited ++ || walk_field_for_cycles (field->type)) ++ { ++ type->mark_escape (escape_rescusive_type, NULL); ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++/* Clear visited on all types. */ ++ ++void ++ipa_struct_reorg::clear_visited (void) ++{ ++ for (unsigned i = 0; i < types.length (); i++) ++ types[i]->visited = false; ++} ++ ++/* Detect recusive types and mark them as escaping. */ ++ ++void ++ipa_struct_reorg::detect_cycles (void) ++{ ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ if (types[i]->has_escaped ()) ++ continue; ++ ++ clear_visited (); ++ walk_field_for_cycles (types[i]); ++ } ++} ++ ++/* Propagate escaping to depdenent types. */ ++ ++void ++ipa_struct_reorg::propagate_escape (void) ++{ ++ unsigned i; ++ srtype *type; ++ bool changed = false; ++ ++ do ++ { ++ changed = false; ++ FOR_EACH_VEC_ELT (types, i, type) ++ { ++ for (tree field = TYPE_FIELDS (type->type); ++ field; ++ field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL ++ && handled_type (TREE_TYPE (field))) ++ { ++ tree t = inner_type (TREE_TYPE (field)); ++ srtype *type1 = find_type (t); ++ if (!type1) ++ continue; ++ if (type1->has_escaped () ++ && !type->has_escaped ()) ++ { ++ type->mark_escape (escape_dependent_type_escapes, NULL); ++ changed = true; ++ } ++ if (type->has_escaped () ++ && !type1->has_escaped ()) ++ { ++ type1->mark_escape (escape_dependent_type_escapes, NULL); ++ changed = true; ++ } ++ } ++ } ++ } ++ } while (changed); ++} ++ ++/* Prune the escaped types and their decls from what was recorded. */ ++ ++void ++ipa_struct_reorg::prune_escaped_types (void) ++{ ++ detect_cycles (); ++ propagate_escape (); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "all types (after prop but before pruning):\n"); ++ dump_types (dump_file); ++ fprintf (dump_file, "all functions (after prop but before pruning):\n"); ++ dump_functions (dump_file); ++ } ++ ++ if (dump_file) ++ dump_types_escaped (dump_file); ++ ++ /* Prune the function arguments which escape ++ and functions which have no types as arguments. */ ++ for (unsigned i = 0; i < functions.length ();) ++ { ++ srfunction *function = functions[i]; ++ ++ /* Prune function arguments of types that escape. */ ++ for (unsigned j = 0; j < function->args.length ();) ++ { ++ if (function->args[j]->type->has_escaped ()) ++ function->args.ordered_remove (j); ++ else ++ j++; ++ } ++ ++ /* Prune global variables that the function uses of types ++ that escape. */ ++ for (unsigned j = 0; j < function->globals.length ();) ++ { ++ if (function->globals[j]->type->has_escaped ()) ++ function->globals.ordered_remove (j); ++ else ++ j++; ++ } ++ ++ /* Prune variables that the function uses of types that escape. */ ++ for (unsigned j = 0; j < function->decls.length ();) ++ { ++ srdecl *decl = function->decls[j]; ++ if (decl->type->has_escaped ()) ++ { ++ function->decls.ordered_remove (j); ++ delete decl; ++ } ++ else ++ j++; ++ } ++ ++ /* Prune functions which don't refer to any variables any more. */ ++ if (function->args.is_empty () ++ && function->decls.is_empty () ++ && function->globals.is_empty ()) ++ { ++ delete function; ++ functions.ordered_remove (i); ++ } ++ else ++ i++; ++ } ++ ++ /* Prune globals of types that escape, all references to those decls ++ will have been removed in the first loop. */ ++ for (unsigned j = 0; j < globals.decls.length ();) ++ { ++ srdecl *decl = globals.decls[j]; ++ if (decl->type->has_escaped ()) ++ { ++ globals.decls.ordered_remove (j); ++ delete decl; ++ } ++ else ++ j++; ++ } ++ ++ /* Prune types that escape, all references to those types ++ will have been removed in the above loops. */ ++ for (unsigned i = 0; i < types.length ();) ++ { ++ srtype *type = types[i]; ++ if (type->has_escaped ()) ++ { ++ /* All references to this type should have been removed now. */ ++ delete type; ++ types.ordered_remove (i); ++ } ++ else ++ i++; ++ } ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "all types (after pruning):\n"); ++ dump_types (dump_file); ++ fprintf (dump_file, "all functions (after pruning):\n"); ++ dump_functions (dump_file); ++ } ++} ++ ++/* Analyze all of the types. */ ++ ++void ++ipa_struct_reorg::analyze_types (void) ++{ ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ if (!types[i]->has_escaped ()) ++ types[i]->analyze (); ++ } ++} ++ ++/* When struct A has a struct B member, B's type info ++ is not stored in ++ TYPE_FIELDS (TREE_TYPE (TYPE_FIELDS (typeA))) ++ Try to restore B's type information. */ ++ ++void ++ipa_struct_reorg::restore_field_type (void) ++{ ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ for (unsigned j = 0; j < types[i]->fields.length (); j++) ++ { ++ srfield *field = types[i]->fields[j]; ++ if (TREE_CODE (inner_type (field->fieldtype)) == RECORD_TYPE) ++ { ++ /* If field type has TYPE_FIELDS information, ++ we do not need to do this. */ ++ if (TYPE_FIELDS (field->type->type) != NULL) ++ continue; ++ for (unsigned k = 0; k < types.length (); k++) ++ { ++ if (i == k) ++ continue; ++ const char *type1 = get_type_name (field->type->type); ++ const char *type2 = get_type_name (types[k]->type); ++ if (type1 == NULL || type2 == NULL) ++ continue; ++ if (type1 == type2 ++ && TYPE_FIELDS (types[k]->type)) ++ field->type = types[k]; ++ } ++ } ++ } ++ } ++} ++ ++/* Create all new types we want to create. */ ++ ++bool ++ipa_struct_reorg::create_new_types (void) ++{ ++ int newtypes = 0; ++ clear_visited (); ++ for (unsigned i = 0; i < types.length (); i++) ++ newtypes += types[i]->create_new_type (); ++ ++ if (dump_file) ++ { ++ if (newtypes) ++ fprintf (dump_file, "\nNumber of structures to transform is %d\n", ++ newtypes); ++ else ++ fprintf (dump_file, "\nNo structures to transform.\n"); ++ } ++ ++ return newtypes != 0; ++} ++ ++/* Create all the new decls except for the new arguments ++ which create_new_functions would have created. */ ++ ++void ++ipa_struct_reorg::create_new_decls (void) ++{ ++ globals.create_new_decls (); ++ for (unsigned i = 0; i < functions.length (); i++) ++ functions[i]->create_new_decls (); ++} ++ ++/* Create the new arguments for the function corresponding to NODE. */ ++ ++void ++ipa_struct_reorg::create_new_args (cgraph_node *new_node) ++{ ++ tree decl = new_node->decl; ++ auto_vec params; ++ push_function_arg_decls (¶ms, decl); ++ vec *adjs = NULL; ++ vec_safe_reserve (adjs, params.length ()); ++ for (unsigned i = 0; i < params.length (); i++) ++ { ++ struct ipa_adjusted_param adj; ++ tree parm = params[i]; ++ memset (&adj, 0, sizeof (adj)); ++ adj.base_index = i; ++ adj.prev_clone_index = i; ++ srtype *t = find_type (inner_type (TREE_TYPE (parm))); ++ if (!t ++ || t->has_escaped () ++ || !t->has_new_type ()) ++ { ++ adj.op = IPA_PARAM_OP_COPY; ++ vec_safe_push (adjs, adj); ++ continue; ++ } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Creating a new argument for: "); ++ print_generic_expr (dump_file, params[i]); ++ fprintf (dump_file, " in function: "); ++ print_generic_expr (dump_file, decl); ++ fprintf (dump_file, "\n"); ++ } ++ adj.op = IPA_PARAM_OP_NEW; ++ adj.param_prefix_index = IPA_PARAM_PREFIX_REORG; ++ for (unsigned j = 0; j < max_split && t->newtype[j]; j++) ++ { ++ adj.type = reconstruct_complex_type (TREE_TYPE (parm), ++ t->newtype[j]); ++ vec_safe_push (adjs, adj); ++ } ++ } ++ ipa_param_body_adjustments *adjustments ++ = new ipa_param_body_adjustments (adjs, decl); ++ adjustments->modify_formal_parameters (); ++ auto_vec new_params; ++ push_function_arg_decls (&new_params, decl); ++ unsigned veclen = vec_safe_length (adjs); ++ for (unsigned i = 0; i < veclen; i++) ++ { ++ if ((*adjs)[i].op != IPA_PARAM_OP_NEW) ++ continue; ++ tree decl = params[(*adjs)[i].base_index]; ++ srdecl *d = find_decl (decl); ++ if (!d) ++ continue; ++ unsigned j = 0; ++ while (j < max_split && d->newdecl[j]) ++ j++; ++ d->newdecl[j] = new_params[i]; ++ } ++ ++ function *fn = DECL_STRUCT_FUNCTION (decl); ++ ++ if (!fn->static_chain_decl) ++ return; ++ srdecl *chain = find_decl (fn->static_chain_decl); ++ if (!chain) ++ return; ++ ++ srtype *type = chain->type; ++ tree orig_var = chain->decl; ++ const char *tname = NULL; ++ if (DECL_NAME (orig_var)) ++ tname = IDENTIFIER_POINTER (DECL_NAME (orig_var)); ++ gcc_assert (!type->newtype[1]); ++ tree new_name = NULL; ++ char *name = NULL; ++ if (tname) ++ { ++ name = concat (tname, ".reorg.0", NULL); ++ new_name = get_identifier (name); ++ free (name); ++ } ++ tree newtype1 = reconstruct_complex_type (TREE_TYPE (orig_var), ++ type->newtype[0]); ++ chain->newdecl[0] = build_decl (DECL_SOURCE_LOCATION (orig_var), ++ PARM_DECL, new_name, newtype1); ++ copy_var_attributes (chain->newdecl[0], orig_var); ++ fn->static_chain_decl = chain->newdecl[0]; ++} ++ ++/* Find the refered DECL in the current function or globals. ++ If this is a global decl, record that as being used ++ in the current function. */ ++ ++srdecl * ++ipa_struct_reorg::find_decl (tree decl) ++{ ++ srdecl *d; ++ d = globals.find_decl (decl); ++ if (d) ++ { ++ /* Record the global usage in the current function. */ ++ if (!done_recording && current_function) ++ { ++ bool add = true; ++ /* No reason to add it to the current function if it is ++ already recorded as such. */ ++ for (unsigned i = 0; i < current_function->globals.length (); i++) ++ { ++ if (current_function->globals[i] == d) ++ { ++ add = false; ++ break; ++ } ++ } ++ if (add) ++ current_function->globals.safe_push (d); ++ } ++ return d; ++ } ++ if (current_function) ++ return current_function->find_decl (decl); ++ return NULL; ++} ++ ++/* Create new function clones for the cases where the arguments ++ need to be changed. */ ++ ++void ++ipa_struct_reorg::create_new_functions (void) ++{ ++ for (unsigned i = 0; i < functions.length (); i++) ++ { ++ srfunction *f = functions[i]; ++ bool anyargchanges = false; ++ cgraph_node *new_node; ++ cgraph_node *node = f->node; ++ int newargs = 0; ++ if (f->old) ++ continue; ++ ++ if (f->args.length () == 0) ++ continue; ++ ++ for (unsigned j = 0; j < f->args.length (); j++) ++ { ++ srdecl *d = f->args[j]; ++ srtype *t = d->type; ++ if (t->has_new_type ()) ++ { ++ newargs += t->newtype[1] != NULL; ++ anyargchanges = true; ++ } ++ } ++ if (!anyargchanges) ++ continue; ++ ++ if (dump_file) ++ { ++ fprintf (dump_file, "Creating a clone of function: "); ++ f->simple_dump (dump_file); ++ fprintf (dump_file, "\n"); ++ } ++ statistics_counter_event (NULL, "Create new function", 1); ++ new_node = node->create_version_clone_with_body (vNULL, NULL, ++ NULL, NULL, NULL, ++ "struct_reorg"); ++ new_node->can_change_signature = node->can_change_signature; ++ new_node->make_local (); ++ f->newnode = new_node; ++ srfunction *n = record_function (new_node); ++ current_function = n; ++ n->old = f; ++ f->newf = n; ++ /* Create New arguments. */ ++ create_new_args (new_node); ++ current_function = NULL; ++ } ++} ++ ++bool ++ipa_struct_reorg::rewrite_lhs_rhs (tree lhs, tree rhs, ++ tree newlhs[max_split], ++ tree newrhs[max_split]) ++{ ++ bool l = rewrite_expr (lhs, newlhs); ++ bool r = rewrite_expr (rhs, newrhs); ++ ++ /* Handle NULL pointer specially. */ ++ if (l && !r && integer_zerop (rhs)) ++ { ++ r = true; ++ for (unsigned i = 0; i < max_split && newlhs[i]; i++) ++ newrhs[i] = fold_convert (TREE_TYPE (newlhs[i]), rhs); ++ } ++ ++ return l || r; ++} ++ ++bool ++ipa_struct_reorg::rewrite_expr (tree expr, ++ tree newexpr[max_split], ++ bool ignore_missing_decl) ++{ ++ tree base; ++ bool indirect; ++ srtype *t; ++ srfield *f; ++ bool realpart, imagpart; ++ bool address; ++ ++ tree newbase[max_split]; ++ memset (newexpr, 0, sizeof (tree[max_split])); ++ ++ if (TREE_CODE (expr) == CONSTRUCTOR) ++ { ++ srtype *t = find_type (TREE_TYPE (expr)); ++ if (!t) ++ return false; ++ gcc_assert (CONSTRUCTOR_NELTS (expr) == 0); ++ if (!t->has_new_type ()) ++ return false; ++ for (unsigned i = 0; i < max_split && t->newtype[i]; i++) ++ newexpr[i] = build_constructor (t->newtype[i], NULL); ++ return true; ++ } ++ ++ if (!get_type_field (expr, base, indirect, t, f, ++ realpart, imagpart, address)) ++ return false; ++ ++ /* If the type is not changed, then just return false. */ ++ if (!t->has_new_type ()) ++ return false; ++ ++ /* NULL pointer handling is "special". */ ++ if (integer_zerop (base)) ++ { ++ gcc_assert (indirect && !address); ++ for (unsigned i = 0; i < max_split && t->newtype[i]; i++) ++ { ++ tree newtype1 = reconstruct_complex_type (TREE_TYPE (base), ++ t->newtype[i]); ++ newbase[i] = fold_convert (newtype1, base); ++ } ++ } ++ else ++ { ++ srdecl *d = find_decl (base); ++ ++ if (!d && dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Can't find decl:\n"); ++ print_generic_expr (dump_file, base); ++ fprintf (dump_file, "\ntype:\n"); ++ t->dump (dump_file); ++ } ++ if (!d && ignore_missing_decl) ++ return true; ++ gcc_assert (d); ++ memcpy (newbase, d->newdecl, sizeof (d->newdecl)); ++ } ++ ++ if (f == NULL) ++ { ++ memcpy (newexpr, newbase, sizeof (newbase)); ++ for (unsigned i = 0; i < max_split && newexpr[i]; i++) ++ { ++ if (address) ++ newexpr[i] = build_fold_addr_expr (newexpr[i]); ++ if (indirect) ++ newexpr[i] = build_simple_mem_ref (newexpr[i]); ++ if (imagpart) ++ newexpr[i] = build1 (IMAGPART_EXPR, ++ TREE_TYPE (TREE_TYPE (newexpr[i])), ++ newexpr[i]); ++ if (realpart) ++ newexpr[i] = build1 (REALPART_EXPR, ++ TREE_TYPE (TREE_TYPE (newexpr[i])), ++ newexpr[i]); ++ } ++ return true; ++ } ++ ++ tree newdecl = newbase[f->clusternum]; ++ for (unsigned i = 0; i < max_split && f->newfield[i]; i++) ++ { ++ tree newbase1 = newdecl; ++ if (address) ++ newbase1 = build_fold_addr_expr (newbase1); ++ if (indirect) ++ newbase1 = build_simple_mem_ref (newbase1); ++ newexpr[i] = build3 (COMPONENT_REF, TREE_TYPE (f->newfield[i]), ++ newbase1, f->newfield[i], NULL_TREE); ++ if (imagpart) ++ newexpr[i] = build1 (IMAGPART_EXPR, ++ TREE_TYPE (TREE_TYPE (newexpr[i])), ++ newexpr[i]); ++ if (realpart) ++ newexpr[i] = build1 (REALPART_EXPR, ++ TREE_TYPE (TREE_TYPE (newexpr[i])), ++ newexpr[i]); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "cluster: %d. decl = ", (int)f->clusternum); ++ print_generic_expr (dump_file, newbase1); ++ fprintf (dump_file, "\nnewexpr = "); ++ print_generic_expr (dump_file, newexpr[i]); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ return true; ++} ++ ++bool ++ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) ++{ ++ bool remove = false; ++ if (gimple_clobber_p (stmt)) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree newlhs[max_split]; ++ if (!rewrite_expr (lhs, newlhs)) ++ return false; ++ for (unsigned i = 0; i < max_split && newlhs[i]; i++) ++ { ++ tree clobber = build_constructor (TREE_TYPE (newlhs[i]), NULL); ++ TREE_THIS_VOLATILE (clobber) = true; ++ gimple *newstmt = gimple_build_assign (newlhs[i], clobber); ++ gsi_insert_before (gsi, newstmt, GSI_SAME_STMT); ++ remove = true; ++ } ++ return remove; ++ } ++ ++ if (gimple_assign_rhs_code (stmt) == EQ_EXPR ++ || gimple_assign_rhs_code (stmt) == NE_EXPR) ++ { ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree newrhs1[max_split]; ++ tree newrhs2[max_split]; ++ tree_code rhs_code = gimple_assign_rhs_code (stmt); ++ tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; ++ if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2)) ++ return false; ++ tree newexpr = NULL_TREE; ++ for (unsigned i = 0; i < max_split && newrhs1[i]; i++) ++ { ++ tree expr = gimplify_build2 (gsi, rhs_code, boolean_type_node, ++ newrhs1[i], newrhs2[i]); ++ if (!newexpr) ++ newexpr = expr; ++ else ++ newexpr = gimplify_build2 (gsi, code, boolean_type_node, ++ newexpr, expr); ++ } ++ ++ if (newexpr) ++ { ++ newexpr = fold_convert (TREE_TYPE (gimple_assign_lhs (stmt)), ++ newexpr); ++ gimple_assign_set_rhs_from_tree (gsi, newexpr); ++ update_stmt (stmt); ++ } ++ return false; ++ } ++ ++ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree newlhs[max_split]; ++ tree newrhs[max_split]; ++ ++ if (!rewrite_lhs_rhs (lhs, rhs1, newlhs, newrhs)) ++ return false; ++ tree size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))); ++ tree num; ++ /* Check if rhs2 is a multiplication of the size of the type. */ ++ if (!is_result_of_mult (rhs2, &num, size)) ++ internal_error ( ++ "The rhs of pointer is not a multiplicate and it slips through"); ++ ++ num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num); ++ for (unsigned i = 0; i < max_split && newlhs[i]; i++) ++ { ++ gimple *new_stmt; ++ ++ tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhs[i]))); ++ newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize); ++ new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR, ++ newrhs[i], newsize); ++ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); ++ remove = true; ++ } ++ return remove; ++ } ++ if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs = gimple_assign_rhs1 (stmt); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "rewriting statement:\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ tree newlhs[max_split]; ++ tree newrhs[max_split]; ++ if (!rewrite_lhs_rhs (lhs, rhs, newlhs, newrhs)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nDid nothing to statement.\n"); ++ return false; ++ } ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nreplaced with:\n"); ++ for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++) ++ { ++ gimple *newstmt = gimple_build_assign (newlhs[i] ? newlhs[i] : lhs, ++ newrhs[i] ? newrhs[i] : rhs); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ print_gimple_stmt (dump_file, newstmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ gsi_insert_before (gsi, newstmt, GSI_SAME_STMT); ++ remove = true; ++ } ++ return remove; ++ } ++ ++ return remove; ++} ++ ++/* Rewrite function call statement STMT. Return TRUE if the statement ++ is to be removed. */ ++ ++bool ++ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) ++{ ++ /* Handled allocation calls are handled seperately from normal ++ function calls. */ ++ if (handled_allocation_stmt (stmt)) ++ { ++ tree lhs = gimple_call_lhs (stmt); ++ tree newrhs1[max_split]; ++ srdecl *decl = find_decl (lhs); ++ if (!decl || !decl->type) ++ return false; ++ srtype *type = decl->type; ++ tree num = allocate_size (type, stmt); ++ gcc_assert (num); ++ memset (newrhs1, 0, sizeof (newrhs1)); ++ ++ /* The realloc call needs to have its first argument rewritten. */ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) ++ { ++ tree rhs1 = gimple_call_arg (stmt, 0); ++ if (integer_zerop (rhs1)) ++ { ++ for (unsigned i = 0; i < max_split; i++) ++ newrhs1[i] = rhs1; ++ } ++ else if (!rewrite_expr (rhs1, newrhs1)) ++ internal_error ("Rewrite failed for realloc"); ++ } ++ ++ /* Go through each new lhs. */ ++ for (unsigned i = 0; i < max_split && decl->newdecl[i]; i++) ++ { ++ tree newsize = TYPE_SIZE_UNIT (type->type); ++ gimple *g; ++ /* Every allocation except for calloc needs ++ the size multiplied out. */ ++ if (!gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) ++ newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize); ++ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA)) ++ g = gimple_build_call (gimple_call_fndecl (stmt), ++ 1, newsize); ++ else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) ++ g = gimple_build_call (gimple_call_fndecl (stmt), ++ 2, num, newsize); ++ else if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) ++ g = gimple_build_call (gimple_call_fndecl (stmt), ++ 2, newrhs1[i], newsize); ++ else ++ gcc_assert (false); ++ gimple_call_set_lhs (g, decl->newdecl[i]); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++ } ++ return true; ++ } ++ ++ /* The function call free needs to be handled special. */ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_FREE)) ++ { ++ tree expr = gimple_call_arg (stmt, 0); ++ tree newexpr[max_split]; ++ if (!rewrite_expr (expr, newexpr)) ++ return false; ++ ++ if (newexpr[1] == NULL) ++ { ++ gimple_call_set_arg (stmt, 0, newexpr[0]); ++ update_stmt (stmt); ++ return false; ++ } ++ ++ for (unsigned i = 0; i < max_split && newexpr[i]; i++) ++ { ++ gimple *g = gimple_build_call (gimple_call_fndecl (stmt), ++ 1, newexpr[i]); ++ gsi_insert_before (gsi, g, GSI_SAME_STMT); ++ } ++ return true; ++ } ++ ++ /* Otherwise, look up the function to see if we have cloned it ++ and rewrite the arguments. */ ++ tree fndecl = gimple_call_fndecl (stmt); ++ ++ /* Indirect calls are already marked as escaping so ignore. */ ++ if (!fndecl) ++ return false; ++ ++ cgraph_node *node = cgraph_node::get (fndecl); ++ gcc_assert (node); ++ srfunction *f = find_function (node); ++ ++ /* Did not find the function or had not cloned it return saying don't ++ change the function call. */ ++ if (!f || !f->newf) ++ return false; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Changing arguments for function call :\n"); ++ print_gimple_expr (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ ++ /* Move over to the new function. */ ++ f = f->newf; ++ ++ tree chain = gimple_call_chain (stmt); ++ unsigned nargs = gimple_call_num_args (stmt); ++ auto_vec vargs (nargs); ++ ++ if (chain) ++ { ++ tree newchains[max_split]; ++ if (rewrite_expr (chain, newchains)) ++ { ++ /* Chain decl's type cannot be split and but it can change. */ ++ gcc_assert (newchains[1] == NULL); ++ chain = newchains[0]; ++ } ++ } ++ ++ for (unsigned i = 0; i < nargs; i++) ++ vargs.quick_push (gimple_call_arg (stmt, i)); ++ ++ int extraargs = 0; ++ ++ for (unsigned i = 0; i < f->args.length (); i++) ++ { ++ srdecl *d = f->args[i]; ++ if (d->argumentnum == -2) ++ continue; ++ gcc_assert (d->argumentnum != -1); ++ tree arg = vargs[d->argumentnum + extraargs]; ++ tree newargs[max_split]; ++ if (!rewrite_expr (arg, newargs)) ++ continue; ++ ++ /* If this ARG has a replacement handle the replacement. */ ++ for (unsigned j = 0; j < max_split && d->newdecl[j]; j++) ++ { ++ gcc_assert (newargs[j]); ++ /* If this is the first replacement of the arugment, ++ then just replace it. */ ++ if (j == 0) ++ vargs[d->argumentnum + extraargs] = newargs[j]; ++ else ++ { ++ /* More than one replacement, ++ we need to insert into the array. */ ++ extraargs++; ++ vargs.safe_insert (d->argumentnum + extraargs, newargs[j]); ++ } ++ } ++ } ++ ++ gcall *new_stmt; ++ ++ new_stmt = gimple_build_call_vec (f->node->decl, vargs); ++ ++ if (gimple_call_lhs (stmt)) ++ gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); ++ ++ gimple_set_vuse (new_stmt, gimple_vuse (stmt)); ++ gimple_set_vdef (new_stmt, gimple_vdef (stmt)); ++ ++ if (gimple_has_location (stmt)) ++ gimple_set_location (new_stmt, gimple_location (stmt)); ++ gimple_call_copy_flags (new_stmt, stmt); ++ gimple_call_set_chain (new_stmt, chain); ++ ++ gimple_set_modified (new_stmt, true); ++ ++ if (gimple_vdef (new_stmt) ++ && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME) ++ SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; ++ ++ gsi_replace (gsi, new_stmt, false); ++ ++ /* We need to defer cleaning EH info on the new statement to ++ fixup-cfg. We may not have dominator information at this point ++ and thus would end up with unreachable blocks and have no way ++ to communicate that we need to run CFG cleanup then. */ ++ int lp_nr = lookup_stmt_eh_lp (stmt); ++ if (lp_nr != 0) ++ { ++ remove_stmt_from_eh_lp (stmt); ++ add_stmt_to_eh_lp (new_stmt, lp_nr); ++ } ++ ++ return false; ++} ++ ++/* Rewrite the conditional statement STMT. Return TRUE if the ++ old statement is to be removed. */ ++ ++bool ++ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) ++{ ++ tree_code rhs_code = gimple_cond_code (stmt); ++ ++ /* Handle only equals or not equals conditionals. */ ++ if (rhs_code != EQ_EXPR ++ && rhs_code != NE_EXPR) ++ return false; ++ tree rhs1 = gimple_cond_lhs (stmt); ++ tree rhs2 = gimple_cond_rhs (stmt); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "COND: Rewriting\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ print_generic_expr (dump_file, rhs1); ++ fprintf (dump_file, "\n"); ++ print_generic_expr (dump_file, rhs2); ++ fprintf (dump_file, "\n"); ++ } ++ ++ tree newrhs1[max_split]; ++ tree newrhs2[max_split]; ++ tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; ++ if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nDid nothing to statement.\n"); ++ return false; ++ } ++ ++ tree newexpr = NULL_TREE; ++ for (unsigned i = 0; i < max_split && newrhs1[i]; i++) ++ { ++ tree expr = gimplify_build2 (gsi, rhs_code, boolean_type_node, ++ newrhs1[i], newrhs2[i]); ++ if (!newexpr) ++ newexpr = expr; ++ else ++ newexpr = gimplify_build2 (gsi, code, boolean_type_node, ++ newexpr, expr); ++ } ++ ++ if (newexpr) ++ { ++ gimple_cond_set_lhs (stmt, newexpr); ++ gimple_cond_set_rhs (stmt, boolean_true_node); ++ update_stmt (stmt); ++ } ++ return false; ++} ++ ++/* Rewrite debug statments if possible. Return TRUE if the statement ++ should be removed. */ ++ ++bool ++ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *) ++{ ++ bool remove = false; ++ if (gimple_debug_bind_p (stmt)) ++ { ++ tree var = gimple_debug_bind_get_var (stmt); ++ tree newvar[max_split]; ++ if (rewrite_expr (var, newvar, true)) ++ remove = true; ++ if (gimple_debug_bind_has_value_p (stmt)) ++ { ++ var = gimple_debug_bind_get_value (stmt); ++ if (TREE_CODE (var) == POINTER_PLUS_EXPR) ++ var = TREE_OPERAND (var, 0); ++ if (rewrite_expr (var, newvar, true)) ++ remove = true; ++ } ++ } ++ else if (gimple_debug_source_bind_p (stmt)) ++ { ++ tree var = gimple_debug_source_bind_get_var (stmt); ++ tree newvar[max_split]; ++ if (rewrite_expr (var, newvar, true)) ++ remove = true; ++ var = gimple_debug_source_bind_get_value (stmt); ++ if (TREE_CODE (var) == POINTER_PLUS_EXPR) ++ var = TREE_OPERAND (var, 0); ++ if (rewrite_expr (var, newvar, true)) ++ remove = true; ++ } ++ ++ return remove; ++} ++ ++/* Rewrite PHI nodes, return true if the PHI was replaced. */ ++ ++bool ++ipa_struct_reorg::rewrite_phi (gphi *phi) ++{ ++ tree newlhs[max_split]; ++ gphi *newphi[max_split]; ++ tree result = gimple_phi_result (phi); ++ gphi_iterator gsi; ++ ++ memset (newphi, 0, sizeof (newphi)); ++ ++ if (!rewrite_expr (result, newlhs)) ++ return false; ++ ++ if (newlhs[0] == NULL) ++ return false; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nrewriting PHI:"); ++ print_gimple_stmt (dump_file, phi, 0); ++ } ++ ++ for (unsigned i = 0; i < max_split && newlhs[i]; i++) ++ newphi[i] = create_phi_node (newlhs[i], gimple_bb (phi)); ++ ++ for (unsigned i = 0; i < gimple_phi_num_args (phi); i++) ++ { ++ tree newrhs[max_split]; ++ phi_arg_d rhs = *gimple_phi_arg (phi, i); ++ rewrite_expr (rhs.def, newrhs); ++ for (unsigned j = 0; j < max_split && newlhs[j]; j++) ++ { ++ SET_PHI_ARG_DEF (newphi[j], i, newrhs[j]); ++ gimple_phi_arg_set_location (newphi[j], i, rhs.locus); ++ update_stmt (newphi[j]); ++ } ++ } ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\ninto\n:"); ++ for (unsigned i = 0; i < max_split && newlhs[i]; i++) ++ { ++ print_gimple_stmt (dump_file, newphi[i], 0); ++ fprintf (dump_file, "\n"); ++ } ++ } ++ ++ gsi = gsi_for_phi (phi); ++ remove_phi_node (&gsi, false); ++ ++ return true; ++} ++ ++/* Rewrite gimple statement STMT, return true if the STATEMENT ++ is to be removed. */ ++ ++bool ++ipa_struct_reorg::rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi) ++{ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_ASSIGN: ++ return rewrite_assign (as_a (stmt), gsi); ++ case GIMPLE_CALL: ++ return rewrite_call (as_a (stmt), gsi); ++ case GIMPLE_COND: ++ return rewrite_cond (as_a (stmt), gsi); ++ break; ++ case GIMPLE_GOTO: ++ case GIMPLE_SWITCH: ++ break; ++ case GIMPLE_DEBUG: ++ case GIMPLE_ASM: ++ break; ++ default: ++ break; ++ } ++ return false; ++} ++ ++/* Does the function F uses any decl which has changed. */ ++ ++bool ++ipa_struct_reorg::has_rewritten_type (srfunction *f) ++{ ++ for (unsigned i = 0; i < f->decls.length (); i++) ++ { ++ srdecl *d = f->decls[i]; ++ if (d->newdecl[0] != d->decl) ++ return true; ++ } ++ ++ for (unsigned i = 0; i < f->globals.length (); i++) ++ { ++ srdecl *d = f->globals[i]; ++ if (d->newdecl[0] != d->decl) ++ return true; ++ } ++ return false; ++} ++ ++/* Rewrite the functions if needed, return ++ the TODOs requested. */ ++ ++unsigned ++ipa_struct_reorg::rewrite_functions (void) ++{ ++ unsigned retval = 0; ++ ++ restore_field_type (); ++ /* Create new types, if we did not create any new types, ++ then don't rewrite any accesses. */ ++ if (!create_new_types ()) ++ return 0; ++ ++ if (functions.length ()) ++ { ++ retval = TODO_remove_functions; ++ create_new_functions (); ++ } ++ ++ create_new_decls (); ++ ++ for (unsigned i = 0; i < functions.length (); i++) ++ { ++ srfunction *f = functions[i]; ++ if (f->newnode) ++ continue; ++ ++ /* Function uses no rewriten types so don't cause a rewrite. */ ++ if (!has_rewritten_type (f)) ++ continue; ++ ++ cgraph_node *node = f->node; ++ basic_block bb; ++ ++ push_cfun (DECL_STRUCT_FUNCTION (node->decl)); ++ current_function = f; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nBefore rewrite:\n"); ++ dump_function_to_file (current_function_decl, dump_file, ++ dump_flags | TDF_VOPS); ++ } ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);) ++ { ++ if (rewrite_phi (si.phi ())) ++ si = gsi_start_phis (bb); ++ else ++ gsi_next (&si); ++ } ++ ++ for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);) ++ { ++ gimple *stmt = gsi_stmt (si); ++ if (rewrite_stmt (stmt, &si)) ++ gsi_remove (&si, true); ++ else ++ gsi_next (&si); ++ } ++ } ++ ++ /* Debug statements need to happen after all other statements ++ have changed. */ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);) ++ { ++ gimple *stmt = gsi_stmt (si); ++ if (gimple_code (stmt) == GIMPLE_DEBUG ++ && rewrite_debug (stmt, &si)) ++ gsi_remove (&si, true); ++ else ++ gsi_next (&si); ++ } ++ } ++ ++ /* Release the old SSA_NAMES for old arguments. */ ++ if (f->old) ++ { ++ for (unsigned i = 0; i < f->args.length (); i++) ++ { ++ srdecl *d = f->args[i]; ++ if (d->newdecl[0] != d->decl) ++ { ++ tree ssa_name = ssa_default_def (cfun, d->decl); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Found "); ++ print_generic_expr (dump_file, ssa_name); ++ fprintf (dump_file, " to be released.\n"); ++ } ++ release_ssa_name (ssa_name); ++ } ++ } ++ } ++ ++ update_ssa (TODO_update_ssa_only_virtuals); ++ ++ if (flag_tree_pta) ++ compute_may_aliases (); ++ ++ remove_unused_locals (); ++ ++ cgraph_edge::rebuild_edges (); ++ ++ free_dominance_info (CDI_DOMINATORS); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nAfter rewrite:\n"); ++ dump_function_to_file (current_function_decl, dump_file, ++ dump_flags | TDF_VOPS); ++ } ++ ++ pop_cfun (); ++ current_function = NULL; ++ } ++ ++ return retval | TODO_verify_all; ++} ++ ++unsigned int ++ipa_struct_reorg::execute (void) ++{ ++ /* FIXME: If there is a top-level inline-asm, ++ the pass immediately returns. */ ++ if (symtab->first_asm_symbol ()) ++ return 0; ++ record_accesses (); ++ prune_escaped_types (); ++ analyze_types (); ++ ++ return rewrite_functions (); ++} ++ ++const pass_data pass_data_ipa_struct_reorg = ++{ ++ SIMPLE_IPA_PASS, // type ++ "struct_reorg", // name ++ OPTGROUP_NONE, // optinfo_flags ++ TV_IPA_STRUCT_REORG, // tv_id ++ 0, // properties_required ++ 0, // properties_provided ++ 0, // properties_destroyed ++ 0, // todo_flags_start ++ 0, // todo_flags_finish ++}; ++ ++class pass_ipa_struct_reorg : public simple_ipa_opt_pass ++{ ++public: ++ pass_ipa_struct_reorg (gcc::context *ctxt) ++ : simple_ipa_opt_pass (pass_data_ipa_struct_reorg, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *); ++ virtual unsigned int execute (function *) ++ { ++ return ipa_struct_reorg ().execute (); ++ } ++ ++}; // class pass_ipa_struct_reorg ++ ++bool ++pass_ipa_struct_reorg::gate (function *) ++{ ++ return (optimize ++ && flag_ipa_struct_reorg ++ /* Don't bother doing anything if the program has errors. */ ++ && !seen_error ()); ++} ++ ++} // anon namespace ++ ++ ++simple_ipa_opt_pass * ++make_pass_ipa_struct_reorg (gcc::context *ctxt) ++{ ++ return new pass_ipa_struct_reorg (ctxt); ++} +\ No newline at end of file +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +new file mode 100644 +index 000000000..a58794070 +--- /dev/null ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +@@ -0,0 +1,235 @@ ++/* Struct-reorg optimizations. ++ Copyright (C) 2016-2023 Free Software Foundation, Inc. ++ Contributed by Andrew Pinski ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it under ++the terms of the GNU General Public License as published by the Free ++Software Foundation; either version 3, or (at your option) any later ++version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ANY ++WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#ifndef IPA_STRUCT_REORG_H ++#define IPA_STRUCT_REORG_H ++ ++namespace struct_reorg { ++ ++const int max_split = 2; ++ ++template ++struct auto_vec_del : auto_vec ++{ ++ ~auto_vec_del (); ++}; ++ ++template ++auto_vec_del::~auto_vec_del (void) ++{ ++ unsigned i; ++ T *t; ++ FOR_EACH_VEC_ELT (*this, i, t) ++ { ++ delete t; ++ } ++} ++ ++enum escape_type ++{ ++ does_not_escape, ++#define DEF_ESCAPE(ENUM, TEXT) ENUM, ++#include "escapes.def" ++ escape_max_escape ++}; ++ ++const char *escape_type_string[escape_max_escape - 1] = ++{ ++#define DEF_ESCAPE(ENUM, TEXT) TEXT, ++#include "escapes.def" ++}; ++ ++struct srfield; ++struct srtype; ++struct sraccess; ++struct srdecl; ++struct srfunction; ++ ++struct srfunction ++{ ++ cgraph_node *node; ++ auto_vec args; ++ auto_vec globals; ++ auto_vec_del decls; ++ srdecl *record_decl (srtype *, tree, int arg); ++ ++ srfunction *old; ++ cgraph_node *newnode; ++ srfunction *newf; ++ ++ // Constructors ++ srfunction (cgraph_node *n); ++ ++ // Methods ++ void add_arg (srdecl *arg); ++ void dump (FILE *file); ++ void simple_dump (FILE *file); ++ ++ bool check_args (void); ++ void create_new_decls (void); ++ srdecl *find_decl (tree); ++}; ++ ++struct srglobal : private srfunction ++{ ++ srglobal () ++ : srfunction (NULL) ++ {} ++ ++ using srfunction::dump; ++ using srfunction::create_new_decls; ++ using srfunction::find_decl; ++ using srfunction::record_decl; ++ using srfunction::decls; ++}; ++ ++struct srtype ++{ ++ tree type; ++ auto_vec_del fields; ++ ++ // array of fields that use this type. ++ auto_vec field_sites; ++ ++ // array of functions which use directly the type ++ auto_vec functions; ++ ++ auto_vec_del accesses; ++ bool chain_type; ++ ++private: ++ escape_type escapes; ++ ++public: ++ tree newtype[max_split]; ++ bool visited; ++ ++ // Constructors ++ srtype (tree type); ++ ++ // Methods ++ void dump (FILE *file); ++ void simple_dump (FILE *file); ++ void add_function (srfunction *); ++ void add_access (sraccess *a) ++ { ++ accesses.safe_push (a); ++ } ++ void add_field_site (srfield *); ++ ++ srfield *find_field (unsigned HOST_WIDE_INT offset); ++ ++ bool create_new_type (void); ++ void analyze (void); ++ void mark_escape (escape_type, gimple *stmt); ++ bool has_escaped (void) ++ { ++ return escapes != does_not_escape; ++ } ++ const char *escape_reason (void) ++ { ++ if (!has_escaped ()) ++ return NULL; ++ return escape_type_string[escapes - 1]; ++ } ++ bool escaped_rescusive (void) ++ { ++ return escapes == escape_rescusive_type; ++ } ++ bool has_new_type (void) ++ { ++ return newtype[0] && newtype[0] != type; ++ } ++}; ++ ++struct srfield ++{ ++ unsigned HOST_WIDE_INT offset; ++ tree fieldtype; ++ tree fielddecl; ++ srtype *base; ++ srtype *type; ++ ++ unsigned clusternum; ++ ++ tree newfield[max_split]; ++ ++ // Constructors ++ srfield (tree field, srtype *base); ++ ++ // Methods ++ void dump (FILE *file); ++ void simple_dump (FILE *file); ++ ++ void create_new_fields (tree newtype[max_split], ++ tree newfields[max_split], ++ tree newlast[max_split]); ++}; ++ ++struct sraccess ++{ ++ gimple *stmt; ++ cgraph_node *node; ++ ++ srtype *type; ++ // NULL field means the whole type is accessed ++ srfield *field; ++ ++ // Constructors ++ sraccess (gimple *s, cgraph_node *n, srtype *t, srfield *f = NULL) ++ : stmt (s), ++ node (n), ++ type (t), ++ field (f) ++ {} ++ ++ // Methods ++ void dump (FILE *file); ++}; ++ ++struct srdecl ++{ ++ srtype *type; ++ tree decl; ++ tree func; ++ /* -1 : not an argument ++ -2 : static chain ++ */ ++ int argumentnum; ++ ++ bool visited; ++ ++ tree newdecl[max_split]; ++ ++ // Constructors ++ srdecl (srtype *type, tree decl, int argumentnum = -1); ++ ++ // Methods ++ void dump (FILE *file); ++ bool has_new_decl (void) ++ { ++ return newdecl[0] && newdecl[0] != decl; ++ } ++}; ++ ++ ++} // namespace struct_reorg ++ ++#endif +diff --git a/gcc/params.opt b/gcc/params.opt +index e0ff9e210..1ddf1343f 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -865,6 +865,10 @@ Enum(parloops_schedule_type) String(runtime) Value(PARLOOPS_SCHEDULE_RUNTIME) + Common Joined UInteger Var(param_partial_inlining_entry_probability) Init(70) Optimization IntegerRange(0, 100) Param + Maximum probability of the entry BB of split region (in percent relative to entry BB of the function) to make partial inlining happen. + ++-param=struct-reorg-cold-struct-ratio= ++Common Joined UInteger Var(param_struct_reorg_cold_struct_ratio) Init(10) IntegerRange(0, 100) Param Optimization ++The threshold ratio between current and hottest structure counts. ++ + -param=predictable-branch-outcome= + Common Joined UInteger Var(param_predictable_branch_outcome) Init(2) IntegerRange(0, 50) Param Optimization + Maximal estimated outcome of branch considered predictable. +diff --git a/gcc/passes.def b/gcc/passes.def +index 375d3d62d..1c1658c4a 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -177,6 +177,8 @@ along with GCC; see the file COPYING3. If not see + compiled unit. */ + INSERT_PASSES_AFTER (all_late_ipa_passes) + NEXT_PASS (pass_ipa_pta); ++ /* FIXME: this should be a normal IP pass. */ ++ NEXT_PASS (pass_ipa_struct_reorg); + NEXT_PASS (pass_omp_simd_clone); + TERMINATE_PASS_LIST (all_late_ipa_passes) + +diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +new file mode 100644 +index 000000000..43913104e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +@@ -0,0 +1,35 @@ ++# Copyright (C) 1997-2023 Free Software Foundation, Inc. ++ ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++load_lib gcc-dg.exp ++load_lib torture-options.exp ++ ++# Initialize `dg'. ++dg-init ++torture-init ++ ++set STRUCT_REORG_TORTURE_OPTIONS [list \ ++ { -O3 } \ ++ { -Ofast } ] ++ ++set-torture-options $STRUCT_REORG_TORTURE_OPTIONS {{}} ++ ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \ ++ "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" ++ ++# All done. ++torture-finish ++dg-finish +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c +new file mode 100644 +index 000000000..6565fe8dd +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c +@@ -0,0 +1,24 @@ ++// { dg-do compile } ++// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" } ++ ++struct a ++{ ++ int t, t1; ++}; ++ ++static struct a *b; ++ ++void *xmalloc(int); ++ ++ ++void f(void) ++{ ++ b = xmalloc (sizeof(*b)); ++} ++ ++int g(void) ++{ ++ return b->t; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c +new file mode 100644 +index 000000000..44babd35b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c +@@ -0,0 +1,29 @@ ++// { dg-do run } ++ ++#include ++ ++struct a ++{ ++ int t; ++ int t1; ++}; ++ ++__attribute__((noinline)) int f(int i, int j) ++{ ++ struct a *t; ++ struct a t1 = {i, j}; ++ t = &t1; ++ auto int g(void) __attribute__((noinline)); ++ int g(void) ++ { ++ return t->t + t->t1; ++ } ++ return g(); ++} ++ ++int main() ++{ ++ assert (f(1, 2) == 3); ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c +new file mode 100644 +index 000000000..5864ad46f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c +@@ -0,0 +1,23 @@ ++// { dg-do compile } ++// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" } ++ ++#include ++typedef struct { ++ long laststart_offset; ++ unsigned regnum; ++} compile_stack_elt_t; ++typedef struct { ++ compile_stack_elt_t *stack; ++ unsigned size; ++} compile_stack_type; ++void f (const char *p, const char *pend, int c) ++{ ++ compile_stack_type compile_stack; ++ while (p != pend) ++ if (c) ++ compile_stack.stack = realloc (compile_stack.stack, ++ (compile_stack.size << 1) ++ * sizeof (compile_stack_elt_t)); ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c +new file mode 100644 +index 000000000..e5a8a6c84 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c +@@ -0,0 +1,59 @@ ++/* { dg-do run } */ ++ ++extern void abort (void); ++ ++struct S ++{ ++ int b; ++ int *c; ++}; ++static int d, e; ++ ++static struct S s; ++ ++static int * ++__attribute__((noinline, const)) ++foo (void) ++{ ++ return &s.b; ++} ++ ++int * ++__attribute__((noinline)) ++bar (int **f) ++{ ++ s.c = &d; ++ *f = &e; ++ /* As nothing ever takes the address of any int * field in struct S, ++ the write to *f can't alias with the s.c field. */ ++ return s.c; ++} ++ ++int ++__attribute__((noinline)) ++baz (int *x) ++{ ++ s.b = 1; ++ *x = 4; ++ /* Function foo takes address of an int field in struct S, ++ so *x can alias with the s.b field (and it does in this testcase). */ ++ return s.b; ++} ++ ++int ++__attribute__((noinline)) ++t (void) ++{ ++ int *f = (int *) 0; ++ return 10 * (bar (&f) != &d) + baz (foo ()); ++} ++ ++int ++main (void) ++{ ++ if (t () != 4) ++ abort (); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c +new file mode 100644 +index 000000000..733413a94 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c +@@ -0,0 +1,29 @@ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#define N 1000 ++str_t A[N]; ++ ++int ++main () ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ { ++ A[i].a = 0; ++ } ++ ++ for (i = 0; i < N; i++) ++ if (A[i].a != 0) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c +new file mode 100644 +index 000000000..0ef686e74 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c +@@ -0,0 +1,42 @@ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++str_t *p; ++ ++int ++main () ++{ ++ int i, sum; ++ ++ p = malloc (N * sizeof (str_t)); ++ if (p == NULL) ++ return 0; ++ for (i = 0; i < N; i++) ++ p[i].b = i; ++ ++ for (i = 0; i < N; i++) ++ p[i].a = p[i].b + 1; ++ ++ for (i = 0; i < N; i++) ++ if (p[i].a != p[i].b + 1) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c +new file mode 100644 +index 000000000..23a53be53 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c +@@ -0,0 +1,37 @@ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ int i; ++ str_t A[N]; ++ ++ for (i = 0; i < N; i++) ++ { ++ A[i].a = 0; ++ } ++ ++ for (i = 0; i < N; i++) ++ if (A[i].a != 0) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c +new file mode 100644 +index 000000000..0cbb172f2 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c +@@ -0,0 +1,40 @@ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ int i, sum; ++ ++ str_t * p = malloc (N * sizeof (str_t)); ++ if (p == NULL) ++ return 0; ++ for (i = 0; i < N; i++) ++ p[i].b = i; ++ ++ for (i = 0; i < N; i++) ++ p[i].a = p[i].b + 1; ++ ++ for (i = 0; i < N; i++) ++ if (p[i].a != p[i].b + 1) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c +new file mode 100644 +index 000000000..f900b1349 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c +@@ -0,0 +1,31 @@ ++#include ++typedef struct ++{ ++ int a; ++ int b; ++}str_t; ++ ++#define N 3 ++ ++str_t str; ++ ++int ++main () ++{ ++ int i; ++ int res = 1<<(1< ++ ++typedef struct ++{ ++ int a; ++ float b; ++}str_t1; ++ ++typedef struct ++{ ++ int c; ++ float d; ++}str_t2; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 16000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/16) ++#endif ++#else ++#define N 1000 ++#endif ++ ++str_t1 *p1; ++str_t2 *p2; ++int num; ++ ++void ++foo (void) ++{ ++ int i; ++ ++ for (i=0; i < num; i++) ++ p2[i].c = 2; ++} ++ ++int ++main () ++{ ++ int i, r; ++ ++ r = rand (); ++ num = r > N ? N : r; ++ p1 = malloc (num * sizeof (str_t1)); ++ p2 = malloc (num * sizeof (str_t2)); ++ ++ if (p1 == NULL || p2 == NULL) ++ return 0; ++ ++ for (i = 0; i < num; i++) ++ p1[i].a = 1; ++ ++ foo (); ++ ++ for (i = 0; i < num; i++) ++ if (p1[i].a != 1 || p2[i].c != 2) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c +new file mode 100644 +index 000000000..dcc545964 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c +@@ -0,0 +1,43 @@ ++#include ++typedef struct ++{ ++ int a; ++ int b; ++}str_t1; ++ ++typedef struct ++{ ++ float a; ++ float b; ++}str_t2; ++ ++#define N1 1000 ++#define N2 100 ++str_t1 A1[N1]; ++str_t2 A2[N2]; ++ ++int ++main () ++{ ++ int i; ++ ++ for (i = 0; i < N1; i++) ++ A1[i].a = 0; ++ ++ for (i = 0; i < N2; i++) ++ A2[i].a = 0; ++ ++ for (i = 0; i < N1; i++) ++ if (A1[i].a != 0) ++ abort (); ++ ++ for (i = 0; i < N2; i++) ++ if (A2[i].a != 0) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* Arrays are not handled. */ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c +new file mode 100644 +index 000000000..6d6375fc1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c +@@ -0,0 +1,26 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct basic ++{ ++ int a; ++ int b[10]; ++} type_struct; ++ ++type_struct *str1; ++ ++int main() ++{ ++ int i; ++ ++ str1 = malloc (10 * sizeof (type_struct)); ++ ++ for (i=0; i<=9; i++) ++ str1[i].a = str1[i].b[0]; ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c +new file mode 100644 +index 000000000..9d3213408 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c +@@ -0,0 +1,38 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ int i; ++ str_t A[N]; ++ str_t *p = A; ++ ++ for (i = 0; i < N; i++) ++ p[i].a = 0; ++ ++ for (i = 0; i < N; i++) ++ if (p[i].a != 0) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c +new file mode 100644 +index 000000000..d79992a53 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++ ++typedef struct test_struct ++{ ++ int a; ++ int b; ++} type_struct; ++ ++typedef type_struct **struct_pointer2; ++ ++struct_pointer2 str1; ++ ++int main() ++{ ++ int i, j; ++ ++ str1 = malloc (2 * sizeof (type_struct *)); ++ ++ for (i = 0; i <= 1; i++) ++ str1[i] = malloc (2 * sizeof (type_struct)); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c +new file mode 100644 +index 000000000..ee9b0d765 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c +@@ -0,0 +1,44 @@ ++/* { dg-do run } */ ++ ++#include ++ ++struct S { int a; struct V *b; }; ++typedef struct { int c; } T; ++typedef struct { int d; int e; } U; ++ ++void * ++fn (void *x) ++{ ++ return x; ++} ++ ++int ++foo (struct S *s) ++{ ++ T x; ++ ++ T y = *(T *)fn (&x); ++ return y.c; ++} ++ ++int ++bar (struct S *s) ++{ ++ U x; ++ ++ U y = *(U *)fn (&x); ++ return y.d + s->a; ++} ++ ++int ++main () ++{ ++ struct S s; ++ ++ foo(&s) + bar (&s); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "No structures to transform" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c +new file mode 100644 +index 000000000..9ebb2b4cc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c +@@ -0,0 +1,44 @@ ++/* { dg-do run } */ ++ ++#include ++struct str ++{ ++ int a; ++ float b; ++}; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++foo (struct str * p_str) ++{ ++ static int sum = 0; ++ ++ sum = sum + p_str->a; ++ return sum; ++} ++ ++int ++main () ++{ ++ int i, sum; ++ struct str * p = malloc (N * sizeof (struct str)); ++ if (p == NULL) ++ return 0; ++ for (i = 0; i < N; i++) ++ sum = foo (p+i); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ ++ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c +new file mode 100644 +index 000000000..d0dce8b53 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c +@@ -0,0 +1,33 @@ ++/* { dg-do run } */ ++/* { dg-additional-options "-fno-ipa-sra" } */ ++ ++#include ++ ++struct A { ++ int d; ++ int d1; ++}; ++ ++struct A a; ++ ++struct A *foo () __attribute__((noinline)); ++struct A *foo () ++{ ++ a.d = 5; ++ return &a; ++} ++ ++int ++main () ++{ ++ a.d = 0; ++ foo (); ++ ++ if (a.d != 5) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "has escaped. .Type escapes via a return" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c +new file mode 100644 +index 000000000..71167182d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c +@@ -0,0 +1,32 @@ ++/* { dg-do run } */ ++/* { dg-additional-options "-fno-ipa-sra" } */ ++ ++#include ++ ++struct A { ++ int d; ++}; ++ ++struct A a; ++ ++struct A foo () __attribute__((noinline)); ++struct A foo () ++{ ++ a.d = 5; ++ return a; ++} ++ ++int ++main () ++{ ++ a.d = 0; ++ foo (); ++ ++ if (a.d != 5) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "has escaped: \"Type escapes via a return" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c +new file mode 100644 +index 000000000..74fa11f39 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ int b; ++}str_t; ++ ++#define N 2 ++ ++str_t A[2] = {{1,1},{2,2}}; ++ ++int ++main () ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ A[i].b = A[i].a; ++ ++ for (i = 0; i < N; i++) ++ if (A[i].b != A[i].a) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c +new file mode 100644 +index 000000000..60d2466e1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#define N 1000 ++ ++typedef struct ++{ ++ str_t A[N]; ++ int c; ++}str_with_substr_t; ++ ++str_with_substr_t a; ++ ++int ++main () ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ a.A[i].b = 0; ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c +new file mode 100644 +index 000000000..baf617816 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c +@@ -0,0 +1,48 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 16000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/16) ++#endif ++#else ++#define N 1000 ++#endif ++ ++typedef struct ++{ ++ str_t * sub_str; ++ int c; ++}str_with_substr_t; ++ ++int foo; ++ ++int ++main (void) ++{ ++ int i; ++ str_with_substr_t A[N]; ++ str_t a[N]; ++ ++ for (i=0; i < N; i++) ++ A[i].sub_str = &(a[i]); ++ ++ for (i=0; i < N; i++) ++ A[i].sub_str->a = 5; ++ ++ foo = A[56].sub_str->a; ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c +new file mode 100644 +index 000000000..33fce3b23 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++ ++typedef struct ++{ ++ str_t sub_str; ++ int c; ++}str_with_substr_t; ++ ++int ++main () ++{ ++ int i; ++ str_with_substr_t A[N]; ++ ++ for (i = 0; i < N; i++) ++ A[i].sub_str.a = 5; ++ ++ for (i = 0; i < N; i++) ++ if (A[i].sub_str.a != 5) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c +new file mode 100644 +index 000000000..1c5a3aa15 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c +@@ -0,0 +1,32 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#define N 1000 ++str_t A[N]; ++ ++int ++main () ++{ ++ int i; ++ ++ for (i = 0; i < N; i++) ++ { ++ A[i].a = 0; ++ } ++ ++ for (i = 0; i < N; i++) ++ if (A[i].a != 0) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c +new file mode 100644 +index 000000000..a0d1467fe +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c +@@ -0,0 +1,45 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++str_t *p; ++ ++int ++main () ++{ ++ int i, sum; ++ ++ p = malloc (N * sizeof (str_t)); ++ if (p == NULL) ++ return 0; ++ for (i = 0; i < N; i++) ++ p[i].b = i; ++ ++ for (i = 0; i < N; i++) ++ p[i].b = p[i].a + 1; ++ ++ for (i = 0; i < N; i++) ++ if (p[i].b != p[i].a + 1) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c +new file mode 100644 +index 000000000..6c24e1c8b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c +@@ -0,0 +1,40 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ int i; ++ str_t A[N]; ++ ++ for (i = 0; i < N; i++) ++ { ++ A[i].a = 0; ++ } ++ ++ for (i = 0; i < N; i++) ++ if (A[i].a != 0) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c +new file mode 100644 +index 000000000..8f2f8143f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c +@@ -0,0 +1,43 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ int i, sum; ++ ++ str_t * p = malloc (N * sizeof (str_t)); ++ if (p == NULL) ++ return 0; ++ for (i = 0; i < N; i++) ++ p[i].b = i; ++ ++ for (i = 0; i < N; i++) ++ p[i].b = p[i].a + 1; ++ ++ for (i = 0; i < N; i++) ++ if (p[i].b != p[i].a + 1) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c +new file mode 100644 +index 000000000..98bf01a6d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c +@@ -0,0 +1,47 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ long i, num; ++ ++ num = rand(); ++ num = num > N ? N : num; ++ str_t * p = malloc (num * sizeof (str_t)); ++ ++ if (p == 0) ++ return 0; ++ ++ for (i = 1; i <= num; i++) ++ p[i-1].b = i; ++ ++ for (i = 1; i <= num; i++) ++ p[i-1].a = p[i-1].b + 1; ++ ++ for (i = 0; i < num; i++) ++ if (p[i].a != p[i].b + 1) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c +new file mode 100644 +index 000000000..66b0f967c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c +@@ -0,0 +1,47 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 8000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/8) ++#endif ++#else ++#define N 1000 ++#endif ++ ++int ++main () ++{ ++ int i, num; ++ ++ num = rand(); ++ num = num > N ? N : num; ++ str_t * p = malloc (num * sizeof (str_t)); ++ ++ if (p == 0) ++ return 0; ++ ++ for (i = 0; i < num; i++) ++ p[i].b = i; ++ ++ for (i = 0; i < num; i++) ++ p[i].a = p[i].b + 1; ++ ++ for (i = 0; i < num; i++) ++ if (p[i].a != p[i].b + 1) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c +new file mode 100644 +index 000000000..d28bcfb02 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c +@@ -0,0 +1,42 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ float b; ++ int c; ++ float d; ++}str_t; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 1600 ++#define N 100 ++#else ++#define N (STACK_SIZE/16) ++#endif ++#else ++#define N 100 ++#endif ++ ++int ++main () ++{ ++ int i; ++ str_t *p = malloc (N * sizeof (str_t)); ++ if (p == NULL) ++ return 0; ++ for (i = 0; i < N; i++) ++ p[i].a = 5; ++ ++ for (i = 0; i < N; i++) ++ if (p[i].a != 5) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* Two more fields structure is not splitted. */ ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c +new file mode 100644 +index 000000000..37a6a43a8 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++typedef struct ++{ ++ int a; ++ int b; ++}str_t; ++ ++#define N 3 ++ ++str_t str; ++ ++int ++main () ++{ ++ int i; ++ int res = 1<<(1< ++typedef struct ++{ ++ int a; ++ int b; ++}str_t; ++ ++#define N 3 ++ ++int ++main () ++{ ++ int i; ++ int res = 1<<(1< ++typedef struct ++{ ++ int a; ++ int *b; ++}str_t; ++ ++#define N 3 ++ ++str_t *p; ++ ++int ++main () ++{ ++ str_t str; ++ int i; ++ int res = 1 << (1 << N); ++ p = &str; ++ str.a = 2; ++ ++ p->b = &(p->a); ++ ++ for (i=0; i < N; i++) ++ p->a = *(p->b)*(*(p->b)); ++ ++ if (p->a != res) ++ abort (); ++ ++ /* POSIX ignores all but the 8 low-order bits, but other ++ environments may not. */ ++ return (p->a & 255); ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "has escaped...Type escapes a cast to a different" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c +new file mode 100644 +index 000000000..cba92e995 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c +@@ -0,0 +1,67 @@ ++/* { dg-do compile } */ ++/* { dg-do run } */ ++ ++#include ++ ++typedef struct ++{ ++ int a; ++ float b; ++}str_t1; ++ ++typedef struct ++{ ++ int c; ++ float d; ++}str_t2; ++ ++#ifdef STACK_SIZE ++#if STACK_SIZE > 16000 ++#define N 1000 ++#else ++#define N (STACK_SIZE/16) ++#endif ++#else ++#define N 1000 ++#endif ++ ++str_t1 *p1; ++str_t2 *p2; ++int num; ++ ++void ++foo (void) ++{ ++ int i; ++ ++ for (i=0; i < num; i++) ++ p2[i].c = 2; ++} ++ ++int ++main () ++{ ++ int i, r; ++ ++ r = rand (); ++ num = r > N ? N : r; ++ p1 = malloc (num * sizeof (str_t1)); ++ p2 = malloc (num * sizeof (str_t2)); ++ ++ if (p1 == NULL || p2 == NULL) ++ return 0; ++ ++ for (i = 0; i < num; i++) ++ p1[i].a = 1; ++ ++ foo (); ++ ++ for (i = 0; i < num; i++) ++ if (p1[i].a != 1 || p2[i].c != 2) ++ abort (); ++ ++ return 0; ++} ++ ++/*--------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 2dae5e1c7..366118126 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -80,6 +80,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp") + DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") + DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") + DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") ++DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") + DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") + DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression") + DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream compression") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 606d1d60b..ec7be874c 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -526,6 +526,7 @@ extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); ++extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_target_clone (gcc::context *ctxt); +-- +2.33.0 + diff --git a/0016-CompleteStructRelayout-Complete-Structure-Relayout.patch b/0016-CompleteStructRelayout-Complete-Structure-Relayout.patch new file mode 100644 index 0000000..37657ef --- /dev/null +++ b/0016-CompleteStructRelayout-Complete-Structure-Relayout.patch @@ -0,0 +1,2056 @@ +From 699caeaa2d89966e4af1d36bc96b53eb4dac0a09 Mon Sep 17 00:00:00 2001 +From: eastb233 +Date: Fri, 25 Aug 2023 09:59:39 +0800 +Subject: [PATCH 16/22] [CompleteStructRelayout] Complete Structure Relayout + +Introduce complete structure reorganization based on original +structure reorganization optimization, which change array of +structure to structure of array in order to better utilize +spatial locality. +--- + gcc/ipa-struct-reorg/escapes.def | 2 + + gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 994 ++++++++++++++++-- + gcc/ipa-struct-reorg/ipa-struct-reorg.h | 33 + + .../g++.dg/struct/no-body-function.cpp | 18 + + .../g++.dg/struct/struct-reorg-1.cpp | 13 + + .../g++.dg/struct/struct-reorg-2.cpp | 17 + + .../g++.dg/struct/struct-reorg-3.cpp | 24 + + gcc/testsuite/g++.dg/struct/struct-reorg.exp | 26 + + gcc/testsuite/gcc.dg/struct/csr_1.c | 60 ++ + .../gcc.dg/struct/csr_allocation-1.c | 46 + + .../gcc.dg/struct/csr_allocation-2.c | 59 ++ + .../gcc.dg/struct/csr_allocation-3.c | 77 ++ + gcc/testsuite/gcc.dg/struct/csr_cast_int.c | 52 + + .../gcc.dg/struct/csr_separate_instance.c | 48 + + .../gcc.dg/struct/sr_address_of_field.c | 37 + + gcc/testsuite/gcc.dg/struct/sr_convert_mem.c | 23 + + gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c | 25 + + gcc/testsuite/gcc.dg/struct/sr_pointer_and.c | 17 + + .../gcc.dg/struct/sr_pointer_minus.c | 33 + + 19 files changed, 1539 insertions(+), 65 deletions(-) + create mode 100644 gcc/testsuite/g++.dg/struct/no-body-function.cpp + create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp + create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp + create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp + create mode 100644 gcc/testsuite/g++.dg/struct/struct-reorg.exp + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_1.c + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_allocation-1.c + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_allocation-2.c + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_allocation-3.c + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_cast_int.c + create mode 100644 gcc/testsuite/gcc.dg/struct/csr_separate_instance.c + create mode 100644 gcc/testsuite/gcc.dg/struct/sr_address_of_field.c + create mode 100644 gcc/testsuite/gcc.dg/struct/sr_convert_mem.c + create mode 100644 gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/sr_pointer_and.c + create mode 100644 gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c + +diff --git a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def +index c4c8e0739..d825eb3e6 100644 +--- a/gcc/ipa-struct-reorg/escapes.def ++++ b/gcc/ipa-struct-reorg/escapes.def +@@ -56,5 +56,7 @@ DEF_ESCAPE (escape_non_optimize, "Type used by a function which turns off struct + DEF_ESCAPE (escape_array, "Type is used in an array [not handled yet]") + DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]") + DEF_ESCAPE (escape_return, "Type escapes via a return [not handled yet]") ++DEF_ESCAPE (escape_separate_instance, "Type escapes via a separate instance") ++DEF_ESCAPE (escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt") + + #undef DEF_ESCAPE +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +index 238530860..c8b975a92 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +@@ -104,10 +104,12 @@ along with GCC; see the file COPYING3. If not see + #include "tree-ssa-live.h" /* For remove_unused_locals. */ + #include "ipa-param-manipulation.h" + #include "gimplify-me.h" ++#include "cfgloop.h" + + namespace { + + using namespace struct_reorg; ++using namespace struct_relayout; + + #define VOID_POINTER_P(type) \ + (POINTER_TYPE_P (type) && VOID_TYPE_P (TREE_TYPE (type))) +@@ -194,6 +196,14 @@ gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type, + GSI_SAME_STMT); + } + ++enum srmode ++{ ++ NORMAL = 0, ++ COMPLETE_STRUCT_RELAYOUT ++}; ++ ++static bool is_result_of_mult (tree, tree *, tree); ++ + } // anon namespace + + +@@ -283,7 +293,8 @@ srtype::srtype (tree type) + : type (type), + chain_type (false), + escapes (does_not_escape), +- visited (false) ++ visited (false), ++ has_alloc_array (0) + { + for (int i = 0; i < max_split; i++) + newtype[i] = NULL_TREE; +@@ -483,13 +494,6 @@ srtype::dump (FILE *f) + fn->simple_dump (f); + } + fprintf (f, "\n }\n"); +- fprintf (f, "\n field_sites = {"); +- FOR_EACH_VEC_ELT (field_sites, i, field) +- { +- fprintf (f, " \n"); +- field->simple_dump (f); +- } +- fprintf (f, "\n }\n"); + fprintf (f, "}\n"); + } + +@@ -631,15 +635,7 @@ srtype::create_new_type (void) + + maxclusters++; + +- const char *tname = NULL; +- +- if (TYPE_NAME (type) != NULL) +- { +- if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE) +- tname = IDENTIFIER_POINTER (TYPE_NAME (type)); +- else if (DECL_NAME (TYPE_NAME (type)) != NULL) +- tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); +- } ++ const char *tname = get_type_name (type); + + for (unsigned i = 0; i < maxclusters; i++) + { +@@ -653,7 +649,10 @@ srtype::create_new_type (void) + if (tname) + { + name = concat (tname, ".reorg.", id, NULL); +- TYPE_NAME (newtype[i]) = get_identifier (name); ++ TYPE_NAME (newtype[i]) = build_decl (UNKNOWN_LOCATION, ++ TYPE_DECL, ++ get_identifier (name), ++ newtype[i]); + free (name); + } + } +@@ -673,6 +672,8 @@ srtype::create_new_type (void) + { + TYPE_FIELDS (newtype[i]) = newfields[i]; + layout_type (newtype[i]); ++ if (TYPE_NAME (newtype[i]) != NULL) ++ layout_decl (TYPE_NAME (newtype[i]), 0); + } + + warn_padded = save_warn_padded; +@@ -841,12 +842,6 @@ srfield::dump (FILE *f) + fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset); + fprintf (f, ", type = "); + print_generic_expr (f, fieldtype); +- if (type) +- { +- fprintf (f, "( srtype = "); +- type->simple_dump (f); +- fprintf (f, ")"); +- } + fprintf (f, "\n}\n"); + } + +@@ -855,7 +850,8 @@ srfield::dump (FILE *f) + void + srfield::simple_dump (FILE *f) + { +- fprintf (f, "field (%d)", DECL_UID (fielddecl)); ++ if (fielddecl) ++ fprintf (f, "field (%d)", DECL_UID (fielddecl)); + } + + /* Dump out the access structure to FILE. */ +@@ -899,6 +895,92 @@ srdecl::dump (FILE *file) + } // namespace struct_reorg + + ++namespace struct_relayout { ++ ++/* Complete Structure Relayout Optimization. ++ It reorganizes all structure members, and puts same member together. ++ struct s { ++ long a; ++ int b; ++ struct s *c; ++ }; ++ Array looks like ++ abcabcabcabc... ++ will be transformed to ++ aaaa...bbbb...cccc... ++*/ ++ ++#define GPTR_SIZE(i) \ ++ TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (gptr[i]))) ++ ++unsigned transformed = 0; ++ ++unsigned ++csrtype::calculate_field_num (tree field_offset) ++{ ++ if (field_offset == NULL) ++ return 0; ++ ++ HOST_WIDE_INT off = int_byte_position (field_offset); ++ unsigned i = 1; ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (off == int_byte_position (field)) ++ return i; ++ i++; ++ } ++ return 0; ++} ++ ++void ++csrtype::init_type_info (void) ++{ ++ if (!type) ++ return; ++ new_size = old_size = tree_to_uhwi (TYPE_SIZE_UNIT (type)); ++ ++ /* Close enough to pad to improve performance. ++ 33~63 should pad to 64 but 33~48 (first half) are too far away, and ++ 65~127 should pad to 128 but 65~96 (first half) are too far away. */ ++ if (old_size > 48 && old_size < 64) ++ new_size = 64; ++ if (old_size > 96 && old_size < 128) ++ new_size = 128; ++ ++ /* For performance reasons, only allow structure size ++ that is a power of 2 and not too big. */ ++ if (new_size != 1 && new_size != 2 ++ && new_size != 4 && new_size != 8 ++ && new_size != 16 && new_size != 32 ++ && new_size != 64 && new_size != 128) ++ { ++ new_size = 0; ++ field_count = 0; ++ return; ++ } ++ ++ unsigned i = 0; ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ if (TREE_CODE (field) == FIELD_DECL) ++ i++; ++ field_count = i; ++ ++ struct_size = build_int_cstu (TREE_TYPE (TYPE_SIZE_UNIT (type)), ++ new_size); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Type: "); ++ print_generic_expr (dump_file, type); ++ fprintf (dump_file, " has %d members.\n", field_count); ++ fprintf (dump_file, "Modify struct size from %ld to %ld.\n", ++ old_size, new_size); ++ } ++} ++ ++} // namespace struct_relayout ++ ++ + namespace { + + struct ipa_struct_reorg +@@ -907,13 +989,10 @@ public: + // Constructors + ipa_struct_reorg (void) + : current_function (NULL), +- done_recording (false) ++ done_recording (false), ++ current_mode (NORMAL) + {} + +- // Public methods +- unsigned execute (void); +- void mark_type_as_escape (tree type, escape_type, gimple *stmt = NULL); +-private: + // Fields + auto_vec_del types; + auto_vec_del functions; +@@ -921,8 +1000,13 @@ private: + srfunction *current_function; + + bool done_recording; ++ srmode current_mode; ++ ++ // Methods ++ unsigned execute (enum srmode mode); ++ void mark_type_as_escape (tree type, escape_type escapes, ++ gimple *stmt = NULL); + +- // Private methods + void dump_types (FILE *f); + void dump_types_escaped (FILE *f); + void dump_functions (FILE *f); +@@ -954,6 +1038,7 @@ private: + void maybe_record_allocation_site (cgraph_node *, gimple *); + void record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt); + void mark_expr_escape (tree, escape_type, gimple *stmt); ++ bool handled_allocation_stmt (gimple *stmt); + tree allocate_size (srtype *t, gimple *stmt); + + void mark_decls_in_as_not_needed (tree fn); +@@ -976,6 +1061,7 @@ private: + bool can_escape = false); + bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t); + ++ void check_alloc_num (gimple *stmt, srtype *type); + void check_definition (srdecl *decl, vec &); + void check_uses (srdecl *decl, vec &); + void check_use (srdecl *decl, gimple *stmt, vec &); +@@ -990,8 +1076,591 @@ private: + + bool has_rewritten_type (srfunction *); + void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt); ++ ++ unsigned execute_struct_relayout (void); + }; + ++struct ipa_struct_relayout ++{ ++public: ++ // Fields ++ tree gptr[max_relayout_split + 1]; ++ csrtype ctype; ++ ipa_struct_reorg *sr; ++ cgraph_node *current_node; ++ ++ // Constructors ++ ipa_struct_relayout (tree type, ipa_struct_reorg *sr_) ++ { ++ ctype.type = type; ++ sr = sr_; ++ current_node = NULL; ++ for (int i = 0; i < max_relayout_split + 1; i++) ++ gptr[i] = NULL; ++ } ++ ++ // Methods ++ tree create_new_vars (tree type, const char *name); ++ void create_global_ptrs (void); ++ unsigned int rewrite (void); ++ void rewrite_stmt_in_function (void); ++ bool rewrite_debug (gimple *stmt, gimple_stmt_iterator *gsi); ++ bool rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi); ++ bool handled_allocation_stmt (gcall *stmt); ++ void init_global_ptrs (gcall *stmt, gimple_stmt_iterator *gsi); ++ bool check_call_uses (gcall *stmt); ++ bool rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi); ++ tree create_ssa (tree node, gimple_stmt_iterator *gsi); ++ bool is_candidate (tree xhs); ++ tree rewrite_address (tree xhs, gimple_stmt_iterator *gsi); ++ tree rewrite_offset (tree offset, HOST_WIDE_INT num); ++ bool rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi); ++ bool maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi, ++ HOST_WIDE_INT ×); ++ unsigned int execute (void); ++}; ++ ++} // anon namespace ++ ++namespace { ++ ++/* Methods for ipa_struct_relayout. */ ++ ++static void ++set_var_attributes (tree var) ++{ ++ if (!var) ++ return; ++ gcc_assert (TREE_CODE (var) == VAR_DECL); ++ ++ DECL_ARTIFICIAL (var) = 1; ++ DECL_EXTERNAL (var) = 0; ++ TREE_STATIC (var) = 1; ++ TREE_PUBLIC (var) = 0; ++ TREE_USED (var) = 1; ++ DECL_CONTEXT (var) = NULL; ++ TREE_THIS_VOLATILE (var) = 0; ++ TREE_ADDRESSABLE (var) = 0; ++ TREE_READONLY (var) = 0; ++ if (is_global_var (var)) ++ set_decl_tls_model (var, TLS_MODEL_NONE); ++} ++ ++tree ++ipa_struct_relayout::create_new_vars (tree type, const char *name) ++{ ++ gcc_assert (type); ++ tree new_type = build_pointer_type (type); ++ ++ tree new_name = NULL; ++ if (name) ++ new_name = get_identifier (name); ++ ++ tree new_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, new_name, new_type); ++ ++ /* Set new_var's attributes. */ ++ set_var_attributes (new_var); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Created new var: "); ++ print_generic_expr (dump_file, new_var); ++ fprintf (dump_file, "\n"); ++ } ++ return new_var; ++} ++ ++void ++ipa_struct_relayout::create_global_ptrs (void) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "Create global gptrs: {\n"); ++ ++ char *gptr0_name = NULL; ++ const char *type_name = get_type_name (ctype.type); ++ ++ if (type_name) ++ gptr0_name = concat (type_name, "_gptr0", NULL); ++ tree var_gptr0 = create_new_vars (ctype.type, gptr0_name); ++ gptr[0] = var_gptr0; ++ varpool_node::add (var_gptr0); ++ ++ unsigned i = 1; ++ for (tree field = TYPE_FIELDS (ctype.type); field; ++ field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ tree type = TREE_TYPE (field); ++ ++ char *name = NULL; ++ char id[10] = {0}; ++ sprintf (id, "%d", i); ++ const char *decl_name = IDENTIFIER_POINTER (DECL_NAME (field)); ++ ++ if (type_name && decl_name) ++ name = concat (type_name, "_", decl_name, "_gptr", id, NULL); ++ tree var = create_new_vars (type, name); ++ ++ gptr[i] = var; ++ varpool_node::add (var); ++ i++; ++ } ++ } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nTotally create %d gptrs. }\n\n", i); ++ gcc_assert (ctype.field_count == i - 1); ++} ++ ++void ++ipa_struct_relayout::rewrite_stmt_in_function (void) ++{ ++ gcc_assert (cfun); ++ ++ basic_block bb = NULL; ++ gimple_stmt_iterator si; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (si = gsi_start_bb (bb); !gsi_end_p (si);) ++ { ++ gimple *stmt = gsi_stmt (si); ++ if (rewrite_stmt (stmt, &si)) ++ gsi_remove (&si, true); ++ else ++ gsi_next (&si); ++ } ++ } ++ ++ /* Debug statements need to happen after all other statements ++ have changed. */ ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ for (si = gsi_start_bb (bb); !gsi_end_p (si);) ++ { ++ gimple *stmt = gsi_stmt (si); ++ if (gimple_code (stmt) == GIMPLE_DEBUG ++ && rewrite_debug (stmt, &si)) ++ gsi_remove (&si, true); ++ else ++ gsi_next (&si); ++ } ++ } ++} ++ ++unsigned int ++ipa_struct_relayout::rewrite (void) ++{ ++ cgraph_node *cnode = NULL; ++ function *fn = NULL; ++ FOR_EACH_FUNCTION (cnode) ++ { ++ if (!cnode->real_symbol_p () || !cnode->has_gimple_body_p ()) ++ continue; ++ if (cnode->definition) ++ { ++ fn = DECL_STRUCT_FUNCTION (cnode->decl); ++ if (fn == NULL) ++ continue; ++ ++ current_node = cnode; ++ push_cfun (fn); ++ ++ rewrite_stmt_in_function (); ++ ++ update_ssa (TODO_update_ssa_only_virtuals); ++ ++ if (flag_tree_pta) ++ compute_may_aliases (); ++ ++ remove_unused_locals (); ++ ++ cgraph_edge::rebuild_edges (); ++ ++ free_dominance_info (CDI_DOMINATORS); ++ ++ pop_cfun (); ++ current_node = NULL; ++ } ++ } ++ return TODO_verify_all; ++} ++ ++bool ++ipa_struct_relayout::rewrite_debug (gimple *stmt ATTRIBUTE_UNUSED, ++ gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED) ++{ ++ /* Delete debug gimple now. */ ++ return true; ++} ++ ++bool ++ipa_struct_relayout::rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi) ++{ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_ASSIGN: ++ return rewrite_assign (as_a (stmt), gsi); ++ case GIMPLE_CALL: ++ return rewrite_call (as_a (stmt), gsi); ++ default: ++ break; ++ } ++ return false; ++} ++ ++bool ++ipa_struct_relayout::handled_allocation_stmt (gcall *stmt) ++{ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) ++ return true; ++ return false; ++} ++ ++void ++ipa_struct_relayout::init_global_ptrs (gcall *stmt, gimple_stmt_iterator *gsi) ++{ ++ gcc_assert (handled_allocation_stmt (stmt)); ++ ++ tree lhs = gimple_call_lhs (stmt); ++ ++ /* Case that gimple is at the end of bb. */ ++ if (gsi_one_before_end_p (*gsi)) ++ { ++ gassign *gptr0 = gimple_build_assign (gptr[0], lhs); ++ gsi_insert_after (gsi, gptr0, GSI_SAME_STMT); ++ } ++ gsi_next (gsi); ++ ++ /* Emit gimple gptr0 = _X and gptr1 = _X. */ ++ gassign *gptr0 = gimple_build_assign (gptr[0], lhs); ++ gsi_insert_before (gsi, gptr0, GSI_SAME_STMT); ++ gassign *gptr1 = gimple_build_assign (gptr[1], lhs); ++ gsi_insert_before (gsi, gptr1, GSI_SAME_STMT); ++ ++ /* Emit gimple gptr_[i] = gptr_[i-1] + _Y[gap]. */ ++ for (unsigned i = 2; i <= ctype.field_count; i++) ++ { ++ gimple *new_stmt = NULL; ++ tree gptr_i_prev_ssa = create_ssa (gptr[i-1], gsi); ++ tree gptr_i_ssa = make_ssa_name (TREE_TYPE (gptr[i-1])); ++ ++ /* Emit gimple _Y[gap] = N * sizeof (member). */ ++ tree member_gap = gimplify_build2 (gsi, MULT_EXPR, ++ long_unsigned_type_node, ++ gimple_call_arg (stmt, 0), ++ GPTR_SIZE (i-1)); ++ ++ new_stmt = gimple_build_assign (gptr_i_ssa, POINTER_PLUS_EXPR, ++ gptr_i_prev_ssa, member_gap); ++ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); ++ ++ gassign *gptr_i = gimple_build_assign (gptr[i], gptr_i_ssa); ++ gsi_insert_before (gsi, gptr_i, GSI_SAME_STMT); ++ } ++ gsi_prev (gsi); ++} ++ ++bool ++ipa_struct_relayout::check_call_uses (gcall *stmt) ++{ ++ gcc_assert (current_node); ++ srfunction *fn = sr->find_function (current_node); ++ tree lhs = gimple_call_lhs (stmt); ++ ++ if (fn == NULL) ++ return false; ++ ++ srdecl *d = fn->find_decl (lhs); ++ if (d == NULL) ++ return false; ++ if (types_compatible_p (d->type->type, ctype.type)) ++ return true; ++ ++ return false; ++} ++ ++bool ++ipa_struct_relayout::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) ++{ ++ if (handled_allocation_stmt (stmt)) ++ { ++ /* Rewrite stmt _X = calloc (N, sizeof (struct)). */ ++ tree size = gimple_call_arg (stmt, 1); ++ if (TREE_CODE (size) != INTEGER_CST) ++ return false; ++ if (tree_to_uhwi (size) != ctype.old_size) ++ return false; ++ if (!check_call_uses (stmt)) ++ return false; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Rewrite allocation call:\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "to\n"); ++ } ++ ++ /* Modify sizeof (struct). */ ++ gimple_call_set_arg (stmt, 1, ctype.struct_size); ++ update_stmt (stmt); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ ++ init_global_ptrs (stmt, gsi); ++ } ++ return false; ++} ++ ++tree ++ipa_struct_relayout::create_ssa (tree node, gimple_stmt_iterator *gsi) ++{ ++ gcc_assert (TREE_CODE (node) == VAR_DECL); ++ tree node_ssa = make_ssa_name (TREE_TYPE (node)); ++ gassign *stmt = gimple_build_assign (node_ssa, node); ++ gsi_insert_before (gsi, stmt, GSI_SAME_STMT); ++ return node_ssa; ++} ++ ++bool ++ipa_struct_relayout::is_candidate (tree xhs) ++{ ++ if (TREE_CODE (xhs) != COMPONENT_REF) ++ return false; ++ tree mem = TREE_OPERAND (xhs, 0); ++ if (TREE_CODE (mem) == MEM_REF) ++ { ++ tree type = TREE_TYPE (mem); ++ if (types_compatible_p (type, ctype.type)) ++ return true; ++ } ++ return false; ++} ++ ++tree ++ipa_struct_relayout::rewrite_address (tree xhs, gimple_stmt_iterator *gsi) ++{ ++ tree mem_ref = TREE_OPERAND (xhs, 0); ++ tree pointer = TREE_OPERAND (mem_ref, 0); ++ tree pointer_offset = TREE_OPERAND (mem_ref, 1); ++ tree field = TREE_OPERAND (xhs, 1); ++ ++ tree pointer_ssa = fold_convert (long_unsigned_type_node, pointer); ++ tree gptr0_ssa = fold_convert (long_unsigned_type_node, gptr[0]); ++ ++ /* Emit gimple _X1 = ptr - gptr0. */ ++ tree step1 = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node, ++ pointer_ssa, gptr0_ssa); ++ ++ /* Emit gimple _X2 = _X1 / sizeof (struct). */ ++ tree step2 = gimplify_build2 (gsi, TRUNC_DIV_EXPR, long_unsigned_type_node, ++ step1, ctype.struct_size); ++ ++ unsigned field_num = ctype.calculate_field_num (field); ++ gcc_assert (field_num > 0 && field_num <= ctype.field_count); ++ ++ /* Emit gimple _X3 = _X2 * sizeof (member). */ ++ tree step3 = gimplify_build2 (gsi, MULT_EXPR, long_unsigned_type_node, ++ step2, GPTR_SIZE (field_num)); ++ ++ /* Emit gimple _X4 = gptr[I]. */ ++ tree gptr_field_ssa = create_ssa (gptr[field_num], gsi); ++ tree new_address = make_ssa_name (TREE_TYPE (gptr[field_num])); ++ gassign *new_stmt = gimple_build_assign (new_address, POINTER_PLUS_EXPR, ++ gptr_field_ssa, step3); ++ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); ++ ++ /* MEM_REF with nonzero offset like ++ MEM[ptr + sizeof (struct)] = 0B ++ should be transformed to ++ MEM[gptr + sizeof (member)] = 0B ++ */ ++ HOST_WIDE_INT size ++ = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_address)))); ++ tree new_size = rewrite_offset (pointer_offset, size); ++ if (new_size) ++ TREE_OPERAND (mem_ref, 1) = new_size; ++ ++ /* Update mem_ref pointer. */ ++ TREE_OPERAND (mem_ref, 0) = new_address; ++ ++ /* Update mem_ref TREE_TYPE. */ ++ TREE_TYPE (mem_ref) = TREE_TYPE (TREE_TYPE (new_address)); ++ ++ return mem_ref; ++} ++ ++tree ++ipa_struct_relayout::rewrite_offset (tree offset, HOST_WIDE_INT num) ++{ ++ if (TREE_CODE (offset) == INTEGER_CST) ++ { ++ bool sign = false; ++ HOST_WIDE_INT off = TREE_INT_CST_LOW (offset); ++ if (off == 0) ++ return NULL; ++ if (off < 0) ++ { ++ off = -off; ++ sign = true; ++ } ++ if (off % ctype.old_size == 0) ++ { ++ HOST_WIDE_INT times = off / ctype.old_size; ++ times = sign ? -times : times; ++ return build_int_cst (TREE_TYPE (offset), num * times); ++ } ++ } ++ return NULL; ++} ++ ++#define REWRITE_ASSIGN_TREE_IN_STMT(node) \ ++do \ ++{ \ ++ tree node = gimple_assign_##node (stmt); \ ++ if (node && is_candidate (node)) \ ++ { \ ++ tree mem_ref = rewrite_address (node, gsi); \ ++ gimple_assign_set_##node (stmt, mem_ref); \ ++ update_stmt (stmt); \ ++ } \ ++} while (0) ++ ++/* COMPONENT_REF = exp => MEM_REF = exp ++ / \ / \ ++ MEM_REF field gptr offset ++ / \ ++ pointer offset ++*/ ++bool ++ipa_struct_relayout::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Maybe rewrite assign:\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "to\n"); ++ } ++ ++ switch (gimple_num_ops (stmt)) ++ { ++ case 4: REWRITE_ASSIGN_TREE_IN_STMT (rhs3); // FALLTHRU ++ case 3: ++ { ++ REWRITE_ASSIGN_TREE_IN_STMT (rhs2); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (rhs2 && TREE_CODE (rhs2) == INTEGER_CST) ++ { ++ /* Handle pointer++ and pointer-- or ++ factor is euqal to struct size. */ ++ HOST_WIDE_INT times = 1; ++ if (maybe_rewrite_cst (rhs2, gsi, times)) ++ { ++ tree tmp = build_int_cst ( ++ TREE_TYPE (TYPE_SIZE_UNIT (ctype.type)), ++ ctype.new_size * times); ++ gimple_assign_set_rhs2 (stmt, tmp); ++ update_stmt (stmt); ++ } ++ } ++ } // FALLTHRU ++ case 2: REWRITE_ASSIGN_TREE_IN_STMT (rhs1); // FALLTHRU ++ case 1: REWRITE_ASSIGN_TREE_IN_STMT (lhs); // FALLTHRU ++ case 0: break; ++ default: gcc_unreachable (); ++ } ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } ++ return false; ++} ++ ++bool ++ipa_struct_relayout::maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi, ++ HOST_WIDE_INT ×) ++{ ++ bool ret = false; ++ gcc_assert (TREE_CODE (cst) == INTEGER_CST); ++ ++ gimple *stmt = gsi_stmt (*gsi); ++ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree lhs = gimple_assign_lhs (stmt); ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type) ++ || types_compatible_p (inner_type (TREE_TYPE (lhs)), ctype.type)) ++ { ++ tree num = NULL; ++ if (is_result_of_mult (cst, &num, TYPE_SIZE_UNIT (ctype.type))) ++ { ++ times = TREE_INT_CST_LOW (num); ++ return true; ++ } ++ } ++ } ++ ++ if (gimple_assign_rhs_code (stmt) == MULT_EXPR) ++ { ++ if (gsi_one_before_end_p (*gsi)) ++ return false; ++ gsi_next (gsi); ++ gimple *stmt2 = gsi_stmt (*gsi); ++ ++ if (gimple_code (stmt2) == GIMPLE_ASSIGN ++ && gimple_assign_rhs_code (stmt2) == POINTER_PLUS_EXPR) ++ { ++ tree lhs = gimple_assign_lhs (stmt2); ++ tree rhs1 = gimple_assign_rhs1 (stmt2); ++ if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type) ++ || types_compatible_p (inner_type (TREE_TYPE (lhs)), ctype.type)) ++ { ++ tree num = NULL; ++ if (is_result_of_mult (cst, &num, TYPE_SIZE_UNIT (ctype.type))) ++ { ++ times = TREE_INT_CST_LOW (num); ++ ret = true; ++ } ++ } ++ } ++ gsi_prev (gsi); ++ return ret; ++ } ++ return false; ++} ++ ++unsigned int ++ipa_struct_relayout::execute (void) ++{ ++ ctype.init_type_info (); ++ if (ctype.field_count < min_relayout_split ++ || ctype.field_count > max_relayout_split) ++ return 0; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Complete Struct Relayout Type: "); ++ print_generic_expr (dump_file, ctype.type); ++ fprintf (dump_file, "\n"); ++ } ++ transformed++; ++ ++ create_global_ptrs (); ++ return rewrite (); ++} ++ ++} // anon namespace ++ ++ ++namespace { ++ ++/* Methods for ipa_struct_reorg. */ ++ + /* Dump all of the recorded types to file F. */ + + void +@@ -1189,7 +1858,7 @@ ipa_struct_reorg::record_type (tree type) + f->type = t1; + t1->add_field_site (f); + } +- if (t1 == type1) ++ if (t1 == type1 && current_mode != COMPLETE_STRUCT_RELAYOUT) + type1->mark_escape (escape_rescusive_type, NULL); + } + } +@@ -1331,6 +2000,12 @@ ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) + else + e = escape_type_volatile_array_or_ptrptr (TREE_TYPE (decl)); + ++ /* Separate instance is hard to trace in complete struct ++ relayout optimization. */ ++ if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE) ++ e = escape_separate_instance; ++ + if (e != does_not_escape) + type->mark_escape (e, NULL); + } +@@ -1369,6 +2044,7 @@ ipa_struct_reorg::find_var (tree expr, gimple *stmt) + || TREE_CODE (expr) == VIEW_CONVERT_EXPR) + { + tree r = TREE_OPERAND (expr, 0); ++ tree orig_type = TREE_TYPE (expr); + if (handled_component_p (r) + || TREE_CODE (r) == MEM_REF) + { +@@ -1382,8 +2058,18 @@ ipa_struct_reorg::find_var (tree expr, gimple *stmt) + escape_vce, stmt); + } + if (TREE_CODE (r) == MEM_REF) +- mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 1)), +- escape_addr, stmt); ++ { ++ mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 1)), ++ escape_addr, stmt); ++ tree inner_type = TREE_TYPE (TREE_OPERAND (r, 0)); ++ if (orig_type != inner_type) ++ { ++ mark_type_as_escape (orig_type, ++ escape_cast_another_ptr, stmt); ++ mark_type_as_escape (inner_type, ++ escape_cast_another_ptr, stmt); ++ } ++ } + r = TREE_OPERAND (r, 0); + } + mark_expr_escape (r, escape_addr, stmt); +@@ -1407,7 +2093,8 @@ ipa_struct_reorg::find_vars (gimple *stmt) + { + case GIMPLE_ASSIGN: + if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS +- || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR ++ || gimple_assign_rhs_code (stmt) == NOP_EXPR) + { + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); +@@ -1432,6 +2119,32 @@ ipa_struct_reorg::find_vars (gimple *stmt) + current_function->record_decl (t, rhs, -1); + } + } ++ else ++ { ++ /* Because we won't handle these stmts in rewrite phase, ++ just mark these types as escaped. */ ++ switch (gimple_num_ops (stmt)) ++ { ++ case 4: mark_type_as_escape ( ++ TREE_TYPE (gimple_assign_rhs3 (stmt)), ++ escape_unhandled_rewrite, stmt); ++ // FALLTHRU ++ case 3: mark_type_as_escape ( ++ TREE_TYPE (gimple_assign_rhs2 (stmt)), ++ escape_unhandled_rewrite, stmt); ++ // FALLTHRU ++ case 2: mark_type_as_escape ( ++ TREE_TYPE (gimple_assign_rhs1 (stmt)), ++ escape_unhandled_rewrite, stmt); ++ // FALLTHRU ++ case 1: mark_type_as_escape ( ++ TREE_TYPE (gimple_assign_lhs (stmt)), ++ escape_unhandled_rewrite, stmt); ++ // FALLTHRU ++ case 0: break; ++ default: gcc_unreachable (); ++ } ++ } + break; + + case GIMPLE_CALL: +@@ -1514,9 +2227,21 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + /* If we have a integer, just check if it is a multiply of STRUCT_SIZE. */ + if (TREE_CODE (arg) == INTEGER_CST) + { +- if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg, struct_size))) ++ bool sign = false; ++ HOST_WIDE_INT size = TREE_INT_CST_LOW (arg); ++ if (size < 0) ++ { ++ size = -size; ++ sign = true; ++ } ++ tree arg2 = build_int_cst (TREE_TYPE (arg), size); ++ if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg2, struct_size))) + { +- *num = size_binop (FLOOR_DIV_EXPR, arg, struct_size); ++ tree number = size_binop (FLOOR_DIV_EXPR, arg2, struct_size); ++ if (sign) ++ number = build_int_cst (TREE_TYPE (number), ++ -tree_to_shwi (number)); ++ *num = number; + return true; + } + return false; +@@ -1586,16 +2311,21 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + + /* Return TRUE if STMT is an allocation statement that is handled. */ + +-static bool +-handled_allocation_stmt (gimple *stmt) ++bool ++ipa_struct_reorg::handled_allocation_stmt (gimple *stmt) + { +- if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN)) ++ if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) + return true; ++ ++ if (current_mode != COMPLETE_STRUCT_RELAYOUT) ++ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN)) ++ return true; + return false; + } + +@@ -1636,7 +2366,7 @@ ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt) + the size of structure. */ + if (operand_equal_p (arg1, struct_size, 0)) + return size; +- /* Check that first argument is a constant equal to ++ /* ??? Check that first argument is a constant equal to + the size of structure. */ + if (operand_equal_p (size, struct_size, 0)) + return arg1; +@@ -1751,6 +2481,25 @@ ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt) + } + } + ++static bool ++check_mem_ref_offset (tree expr) ++{ ++ tree num = NULL; ++ bool ret = false; ++ ++ if (TREE_CODE (expr) != MEM_REF) ++ return false; ++ ++ /* Try to find the structure size. */ ++ tree field_off = TREE_OPERAND (expr, 1); ++ tree tmp = TREE_OPERAND (expr, 0); ++ if (TREE_CODE (tmp) == ADDR_EXPR) ++ tmp = TREE_OPERAND (tmp, 0); ++ tree size = TYPE_SIZE_UNIT (inner_type (TREE_TYPE (tmp))); ++ ret = is_result_of_mult (field_off, &num, size); ++ return ret; ++} ++ + static tree + get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, + bool &realpart, bool &imagpart, +@@ -1792,7 +2541,8 @@ get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, + gcc_assert (TREE_CODE (field_off) == INTEGER_CST); + /* So we can mark the types as escaping if different. */ + accesstype = TREE_TYPE (field_off); +- offset += tree_to_uhwi (field_off); ++ if (!check_mem_ref_offset (expr)) ++ offset += tree_to_uhwi (field_off); + return TREE_OPERAND (expr, 0); + } + default: +@@ -2176,6 +2926,31 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, + type1->mark_escape (escape_cast_another_ptr, stmt); + } + ++void ++ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) ++{ ++ if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ && handled_allocation_stmt (stmt)) ++ { ++ tree arg0 = gimple_call_arg (stmt, 0); ++ basic_block bb = gimple_bb (stmt); ++ cgraph_node *node = current_function->node; ++ if (integer_onep (arg0)) ++ /* Actually NOT an array, but may ruin other array. */ ++ type->has_alloc_array = -1; ++ else if (bb->loop_father != NULL ++ && loop_outer (bb->loop_father) != NULL) ++ /* The allocation is in a loop. */ ++ type->has_alloc_array = -2; ++ else if (node->callers != NULL) ++ type->has_alloc_array = -3; ++ else ++ type->has_alloc_array = type->has_alloc_array < 0 ++ ? type->has_alloc_array ++ : type->has_alloc_array + 1; ++ } ++} ++ + /* + 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) + a) if the SSA_NAME is sourced from a pointer plus, record the pointer and +@@ -2223,6 +2998,7 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist) + if (!handled_allocation_stmt (stmt) + || !allocate_size (type, stmt)) + type->mark_escape (escape_return, stmt); ++ check_alloc_num (stmt, type); + return; + } + /* If the SSA_NAME is sourced from an inline-asm, +@@ -2264,6 +3040,20 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist) + return; + } + ++ if (gimple_assign_rhs_code (stmt) == MAX_EXPR ++ || gimple_assign_rhs_code (stmt) == MIN_EXPR ++ || gimple_assign_rhs_code (stmt) == BIT_IOR_EXPR ++ || gimple_assign_rhs_code (stmt) == BIT_XOR_EXPR ++ || gimple_assign_rhs_code (stmt) == BIT_AND_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (rhs) == SSA_NAME) ++ check_type_and_push (rhs, type, worklist, stmt); ++ if (TREE_CODE (rhs2) == SSA_NAME) ++ check_type_and_push (rhs2, type, worklist, stmt); ++ return; ++ } ++ + /* Casts between pointers and integer are escaping. */ + if (gimple_assign_cast_p (stmt)) + { +@@ -2328,6 +3118,11 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, + srtype *t1 = find_type (inner_type (t)); + if (t1 == type) + { ++ /* In Complete Struct Relayout, if lhs type is the same ++ as rhs type, we could return without any harm. */ ++ if (current_mode == COMPLETE_STRUCT_RELAYOUT) ++ return; ++ + tree base; + bool indirect; + srtype *type1; +@@ -2376,8 +3171,11 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + tree rhs1 = gimple_cond_lhs (stmt); + tree rhs2 = gimple_cond_rhs (stmt); + tree orhs = rhs1; +- if (gimple_cond_code (stmt) != EQ_EXPR +- && gimple_cond_code (stmt) != NE_EXPR) ++ enum tree_code code = gimple_cond_code (stmt); ++ if (code != EQ_EXPR && code != NE_EXPR ++ && (current_mode != COMPLETE_STRUCT_RELAYOUT ++ || (code != LT_EXPR && code != LE_EXPR ++ && code != GT_EXPR && code != GE_EXPR))) + { + mark_expr_escape (rhs1, escape_non_eq, stmt); + mark_expr_escape (rhs2, escape_non_eq, stmt); +@@ -2406,8 +3204,11 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + tree rhs1 = gimple_assign_rhs1 (stmt); + tree rhs2 = gimple_assign_rhs2 (stmt); + tree orhs = rhs1; +- if (gimple_assign_rhs_code (stmt) != EQ_EXPR +- && gimple_assign_rhs_code (stmt) != NE_EXPR) ++ enum tree_code code = gimple_assign_rhs_code (stmt); ++ if (code != EQ_EXPR && code != NE_EXPR ++ && (current_mode != COMPLETE_STRUCT_RELAYOUT ++ || (code != LT_EXPR && code != LE_EXPR ++ && code != GT_EXPR && code != GE_EXPR))) + { + mark_expr_escape (rhs1, escape_non_eq, stmt); + mark_expr_escape (rhs2, escape_non_eq, stmt); +@@ -2692,6 +3493,12 @@ ipa_struct_reorg::record_accesses (void) + /* Record accesses inside a function. */ + if (cnode->definition) + record_function (cnode); ++ else ++ { ++ tree return_type = TREE_TYPE (TREE_TYPE (cnode->decl)); ++ mark_type_as_escape (return_type, escape_return, NULL); ++ } ++ + } + + if (dump_file && (dump_flags & TDF_DETAILS)) +@@ -2807,8 +3614,11 @@ ipa_struct_reorg::propagate_escape (void) + void + ipa_struct_reorg::prune_escaped_types (void) + { +- detect_cycles (); +- propagate_escape (); ++ if (current_mode != COMPLETE_STRUCT_RELAYOUT) ++ { ++ detect_cycles (); ++ propagate_escape (); ++ } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +@@ -3954,17 +4764,66 @@ ipa_struct_reorg::rewrite_functions (void) + } + + unsigned int +-ipa_struct_reorg::execute (void) ++ipa_struct_reorg::execute_struct_relayout (void) + { +- /* FIXME: If there is a top-level inline-asm, +- the pass immediately returns. */ +- if (symtab->first_asm_symbol ()) +- return 0; +- record_accesses (); +- prune_escaped_types (); +- analyze_types (); ++ unsigned retval = 0; ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ tree type = types[i]->type; ++ if (TYPE_FIELDS (type) == NULL) ++ continue; ++ if (types[i]->has_alloc_array != 1) ++ continue; ++ if (types[i]->chain_type) ++ continue; ++ retval |= ipa_struct_relayout (type, this).execute (); ++ } ++ ++ if (dump_file) ++ { ++ if (transformed) ++ fprintf (dump_file, "\nNumber of structures to transform in " ++ "Complete Structure Relayout is %d\n", transformed); ++ else ++ fprintf (dump_file, "\nNo structures to transform in " ++ "Complete Structure Relayout.\n"); ++ } ++ ++ return retval; ++} ++ ++unsigned int ++ipa_struct_reorg::execute (enum srmode mode) ++{ ++ unsigned int ret = 0; ++ ++ if (mode == NORMAL) ++ { ++ current_mode = NORMAL; ++ /* FIXME: If there is a top-level inline-asm, ++ the pass immediately returns. */ ++ if (symtab->first_asm_symbol ()) ++ return 0; ++ record_accesses (); ++ prune_escaped_types (); ++ analyze_types (); ++ ++ ret = rewrite_functions (); ++ } ++ else if (mode == COMPLETE_STRUCT_RELAYOUT) ++ { ++ if (dump_file) ++ fprintf (dump_file, "\n\nTry Complete Struct Relayout:\n"); ++ current_mode = COMPLETE_STRUCT_RELAYOUT; ++ if (symtab->first_asm_symbol ()) ++ return 0; ++ record_accesses (); ++ prune_escaped_types (); ++ ++ ret = execute_struct_relayout (); ++ } + +- return rewrite_functions (); ++ return ret; + } + + const pass_data pass_data_ipa_struct_reorg = +@@ -3991,7 +4850,11 @@ public: + virtual bool gate (function *); + virtual unsigned int execute (function *) + { +- return ipa_struct_reorg ().execute (); ++ unsigned int ret = 0; ++ ret = ipa_struct_reorg ().execute (NORMAL); ++ if (!ret) ++ ret = ipa_struct_reorg ().execute (COMPLETE_STRUCT_RELAYOUT); ++ return ret; + } + + }; // class pass_ipa_struct_reorg +@@ -3999,10 +4862,11 @@ public: + bool + pass_ipa_struct_reorg::gate (function *) + { +- return (optimize ++ return (optimize >= 3 + && flag_ipa_struct_reorg + /* Don't bother doing anything if the program has errors. */ +- && !seen_error ()); ++ && !seen_error () ++ && flag_lto_partition == LTO_PARTITION_ONE); + } + + } // anon namespace +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +index a58794070..ef7f4c780 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +@@ -120,6 +120,9 @@ private: + public: + tree newtype[max_split]; + bool visited; ++ /* Negative number means it has illegal allocated arrays ++ that we do not optimize. */ ++ int has_alloc_array; + + // Constructors + srtype (tree type); +@@ -232,4 +235,34 @@ struct srdecl + + } // namespace struct_reorg + ++ ++namespace struct_relayout { ++ ++const int min_relayout_split = 8; ++const int max_relayout_split = 16; ++ ++struct csrtype ++{ ++ tree type; ++ unsigned HOST_WIDE_INT old_size; ++ unsigned HOST_WIDE_INT new_size; ++ unsigned field_count; ++ tree struct_size; ++ ++ // Constructors ++ csrtype () ++ : type (NULL), ++ old_size (0), ++ new_size (0), ++ field_count (0), ++ struct_size (NULL) ++ {} ++ ++ // Methods ++ unsigned calculate_field_num (tree field_offset); ++ void init_type_info (void); ++}; ++ ++} // namespace struct_relayout ++ + #endif +diff --git a/gcc/testsuite/g++.dg/struct/no-body-function.cpp b/gcc/testsuite/g++.dg/struct/no-body-function.cpp +new file mode 100644 +index 000000000..4e56e73fc +--- /dev/null ++++ b/gcc/testsuite/g++.dg/struct/no-body-function.cpp +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-std=gnu++17 -Wno-builtin-declaration-mismatch -O3 -fwhole-program -flto-partition=one -fipa-struct-reorg -S" } */ ++ ++struct S { ++ int x; ++ double y; ++}; ++S f(); ++ ++const auto [x0, y0] = f(); ++const auto [x1, y1] = f(); ++ ++static union { ++int a; ++double b; ++}; ++ ++const auto [x2, y2] = f(); +diff --git a/gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp b/gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp +new file mode 100644 +index 000000000..6ab71abe1 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/struct/struct-reorg-1.cpp +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3 -fwhole-program -flto-partition=one -fipa-struct-reorg -fdump-ipa-struct_reorg-details -S" } */ ++ ++struct Foo { int foo; int a; }; ++Foo& ignoreSetMutex = *(new Foo); ++ ++struct Goo { int goo; int a; }; ++ ++int main () ++{ ++ Goo* a; ++ return a->goo = 90; ++} +diff --git a/gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp b/gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp +new file mode 100644 +index 000000000..72b7db8a9 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/struct/struct-reorg-2.cpp +@@ -0,0 +1,17 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -fwhole-program -flto-partition=one -fipa-struct-reorg -fdump-ipa-struct_reorg-details" } */ ++ ++#include ++ ++struct testg { ++ int b; ++ float c; ++}; ++ ++testg *testgvar; ++int main () ++{ ++ testgvar = (testg*) calloc(10, sizeof(testg)); ++ int b = testgvar->b; ++ return b; ++} +diff --git a/gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp b/gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp +new file mode 100644 +index 000000000..771164a96 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/struct/struct-reorg-3.cpp +@@ -0,0 +1,24 @@ ++/* { dg-do run } */ ++/* { dg-options "-O3 -fwhole-program -flto-partition=one -fipa-struct-reorg -fdump-ipa-struct_reorg-details" } */ ++ ++#include ++ ++struct testg { ++ int b; ++ float c; ++ double d; ++ double e; ++ double f; ++ double h; ++ double i; ++ double j; ++ int k; ++}; ++ ++testg *testgvar; ++int main () ++{ ++ testgvar = (testg*) calloc(10, sizeof(testg)); ++ int b = testgvar->b; ++ return b; ++} +diff --git a/gcc/testsuite/g++.dg/struct/struct-reorg.exp b/gcc/testsuite/g++.dg/struct/struct-reorg.exp +new file mode 100644 +index 000000000..e3ffe1388 +--- /dev/null ++++ b/gcc/testsuite/g++.dg/struct/struct-reorg.exp +@@ -0,0 +1,26 @@ ++# Copyright (C) 2021-2023 Free Software Foundation, Inc. ++ ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . ++ ++load_lib g++-dg.exp ++ ++# Initialize `dg'. ++dg-init ++ ++g++-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.cpp]] \ ++ "" "" ++ ++# All done. ++dg-finish +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/csr_1.c b/gcc/testsuite/gcc.dg/struct/csr_1.c +new file mode 100644 +index 000000000..811030bf1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_1.c +@@ -0,0 +1,60 @@ ++// { dg-do run } ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++ node_p c; ++ node_p d; ++ long e; ++ long f; ++ long g; ++ long h; ++ long i; ++ long j; ++ long k; ++ long l; ++ int m; ++ int n; ++}; ++ ++const int MAX = 10000; ++node_p n; ++ ++int ++main () ++{ ++ n = (node_p) calloc (MAX, sizeof (node_t)); ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ n[i].a = 100; ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ if (n[i].a != 100) ++ { ++ abort (); ++ } ++ } ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ n[i].l = n[i].a; ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ if (n[i].l != 100) ++ { ++ abort (); ++ } ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform in Complete Structure Relayout is 1" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c +new file mode 100644 +index 000000000..63bb695ae +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c +@@ -0,0 +1,46 @@ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++ node_p c; ++ node_p d; ++ long e; ++ long f; ++ long g; ++ long h; ++ long i; ++ long j; ++ long k; ++ long l; ++ int m; ++ int n; ++}; ++ ++const int MAX = 1; ++node_p n; ++ ++int ++main () ++{ ++ n = (node_p) calloc (MAX, sizeof (node_t)); ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ n[i].a = 100; ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ if (n[i].a != 100) ++ { ++ abort (); ++ } ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c +new file mode 100644 +index 000000000..0f75d5d12 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c +@@ -0,0 +1,59 @@ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++ node_p c; ++ node_p d; ++ long e; ++ long f; ++ long g; ++ long h; ++ long i; ++ long j; ++ long k; ++ long l; ++ int m; ++ int n; ++}; ++ ++const int MAX = 10; ++node_p n; ++node_p m; ++ ++int main() ++{ ++ int i; ++ for (i = 0; i < MAX / 5; i++) ++ { ++ n = (node_p) calloc(MAX, sizeof(node_t)); ++ if (i == 0) ++ { ++ m = n; ++ } ++ } ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ n[i].a = 100; ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ m[i].a = 50; ++ } ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ if (n[i].a != 100) ++ { ++ abort (); ++ } ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c +new file mode 100644 +index 000000000..3dcb674c6 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c +@@ -0,0 +1,77 @@ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++ node_p c; ++ node_p d; ++ long e; ++ long f; ++ long g; ++ long h; ++ long i; ++ long j; ++ long k; ++ long l; ++ int m; ++ int n; ++}; ++ ++const int MAX = 10; ++node_p n; ++node_p m; ++ ++void test (int, int) __attribute__((noinline)); ++ ++void ++test (int num, int flag) ++{ ++ if (num <= 0) ++ { ++ return; ++ } ++ n = (node_p) calloc (num, sizeof (node_t)); ++ if (flag) ++ { ++ m = n; ++ } ++ return; ++} ++ ++int ++main () ++{ ++ test (MAX, 1); ++ test (MAX, 0); ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ n[i].a = 100; ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ m[i].a = 50; ++ } ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ if (n[i].a != 100) ++ { ++ abort (); ++ } ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ if (m[i].a != 50) ++ { ++ abort (); ++ } ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/csr_cast_int.c b/gcc/testsuite/gcc.dg/struct/csr_cast_int.c +new file mode 100644 +index 000000000..6907158c9 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_cast_int.c +@@ -0,0 +1,52 @@ ++// { dg-do run } ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++ node_p c; ++ node_p d; ++ long e; ++ long f; ++ long g; ++ long h; ++ long i; ++ long j; ++ long k; ++ long l; ++ int m; ++ int n; ++}; ++ ++const int MAX = 100; ++node_p n; ++unsigned long y; ++ ++int ++main () ++{ ++ n = (node_p) calloc (MAX, sizeof (node_t)); ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ n[i].b = 50; ++ } ++ ++ node_p x = &n[5]; ++ y = (unsigned long) x; ++ y += 8; ++ ++ if (*((unsigned long*) y) != 50) ++ { ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes a cast from/to intergral type\"" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c b/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c +new file mode 100644 +index 000000000..9e5e05838 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c +@@ -0,0 +1,48 @@ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++ node_p c; ++ node_p d; ++ long e; ++ long f; ++ long g; ++ long h; ++ long i; ++ long j; ++ long k; ++ long l; ++ int m; ++ int n; ++}; ++ ++const int MAX = 10000; ++node_p n; ++node_t t; ++ ++int ++main () ++{ ++ n = (node_p) calloc (MAX, sizeof (node_t)); ++ t.a = 100; ++ ++ for (int i = 0; i < MAX; i++) ++ { ++ n[i].a = t.a; ++ } ++ for (int i = 0; i < MAX; i++) ++ { ++ if (n[i].a != 100) ++ { ++ abort (); ++ } ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes via a separate instance\"" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c b/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c +new file mode 100644 +index 000000000..9d58edab8 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c +@@ -0,0 +1,37 @@ ++/* { dg-do run } */ ++ ++static struct S { ++ int *p1; ++ int *p2; ++} s; ++ ++typedef __UINTPTR_TYPE__ uintptr_t; ++ ++int ++foo () ++{ ++ int i = 1; ++ int j = 2; ++ struct S s; ++ int **p; ++ s.p1 = &i; ++ s.p2 = &j; ++ p = &s.p1; ++ uintptr_t pi = (uintptr_t) p; ++ pi = pi + sizeof (int *); ++ p = (int **)pi; ++ **p = 3; ++ return j; ++} ++ ++int ++main () ++{ ++ if (foo () != 3) ++ { ++ __builtin_abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "struct S has escaped: \"Type escapes via taking the address of field\"" "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/sr_convert_mem.c b/gcc/testsuite/gcc.dg/struct/sr_convert_mem.c +new file mode 100644 +index 000000000..a99ee0de4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/sr_convert_mem.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++ ++struct T1 { ++ long var1; ++ int var2; ++}; ++ ++struct T2 { ++ long var1; ++ int var2; ++}; ++ ++void test (void*); ++ ++__attribute__((used)) void ++foo (struct T2 *t2) ++{ ++ struct T1* t1 = (void *)(&t2[1]); ++ void* data = (void *)(&t1[1]); ++ ++ test(data); ++ return; ++} +diff --git a/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c +new file mode 100644 +index 000000000..fb135ef0b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c +@@ -0,0 +1,25 @@ ++// { dg-do compile } ++ ++#include ++ ++struct S { ++ unsigned long a; ++ unsigned long b; ++}; ++ ++struct S* s; ++struct S* t = (struct S*) 1000; ++ ++int ++main () ++{ ++ s = (struct S*) calloc (1000, sizeof (struct S)); ++ s = s > t ? s : t; ++ if (s == 0) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/sr_pointer_and.c b/gcc/testsuite/gcc.dg/struct/sr_pointer_and.c +new file mode 100644 +index 000000000..9a4b10d9a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/sr_pointer_and.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++ ++struct test {long val; struct test* next; }; ++ ++unsigned long P_DATA; ++ ++void func (struct test*); ++ ++__attribute__((used)) static void ++foo (struct test* pt) ++{ ++ struct test t; ++ ++ t.next = (void *)((unsigned long)pt->next & P_DATA); ++ func(&t); ++ return; ++} +diff --git a/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c b/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c +new file mode 100644 +index 000000000..9a82da0d6 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c +@@ -0,0 +1,33 @@ ++// { dg-do compile } ++ ++#include ++ ++typedef struct node node_t; ++typedef struct node* node_p; ++ ++struct node { ++ unsigned long a; ++ unsigned long b; ++}; ++ ++int max; ++int x; ++ ++node_p n; ++node_p z; ++ ++int ++main () ++{ ++ n = (node_p) calloc (max, sizeof (node_t)); ++ ++ node_p xp = &n[x]; ++ ++ if (xp - z == 10) ++ { ++ abort (); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes via a unhandled rewrite stmt\"" "struct_reorg" } } */ +-- +2.33.0 + diff --git a/0017-StructReorg-Some-bugfix-for-structure-reorganization.patch b/0017-StructReorg-Some-bugfix-for-structure-reorganization.patch new file mode 100644 index 0000000..2e06afe --- /dev/null +++ b/0017-StructReorg-Some-bugfix-for-structure-reorganization.patch @@ -0,0 +1,489 @@ +From 2b4db34d3b21ff8597373e9e67858b3b60cc7dae Mon Sep 17 00:00:00 2001 +From: eastb233 +Date: Fri, 21 Jul 2023 11:20:51 +0800 +Subject: [PATCH 17/22] [StructReorg] Some bugfix for structure reorganization + +Some bugfix for structure reorganization, +1. disable type simplify in LTO within optimizations +2. only enable optimizations in C language +3. use new to initialize allocated memory in symbol-summary.h +4. cover escape scenarios not considered +--- + gcc/ipa-free-lang-data.cc | 11 ++ + gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 101 +++++++++++-------- + gcc/symbol-summary.h | 13 ++- + gcc/testsuite/gcc.dg/struct/struct_reorg-5.c | 31 ++++++ + gcc/testsuite/gcc.dg/struct/struct_reorg-6.c | 54 ++++++++++ + gcc/testsuite/gcc.dg/struct/struct_reorg-7.c | 38 +++++++ + gcc/testsuite/gcc.dg/struct/struct_reorg-8.c | 25 +++++ + gcc/testsuite/gcc.dg/struct/struct_reorg-9.c | 54 ++++++++++ + 8 files changed, 283 insertions(+), 44 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-5.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-6.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-7.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-8.c + create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-9.c + +diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc +index a74215685..5450be9fe 100644 +--- a/gcc/ipa-free-lang-data.cc ++++ b/gcc/ipa-free-lang-data.cc +@@ -102,6 +102,12 @@ fld_worklist_push (tree t, class free_lang_data_d *fld) + static tree + fld_simplified_type_name (tree type) + { ++ /* Simplify type will cause that struct A and struct A within ++ struct B are different type pointers, so skip it in structure ++ optimizations. */ ++ if (flag_ipa_struct_reorg) ++ return TYPE_NAME (type); ++ + if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL) + return TYPE_NAME (type); + /* Drop TYPE_DECLs in TYPE_NAME in favor of the identifier in the +@@ -340,6 +346,11 @@ fld_simplified_type (tree t, class free_lang_data_d *fld) + { + if (!t) + return t; ++ /* Simplify type will cause that struct A and struct A within ++ struct B are different type pointers, so skip it in structure ++ optimizations. */ ++ if (flag_ipa_struct_reorg) ++ return t; + if (POINTER_TYPE_P (t)) + return fld_incomplete_type_of (t, fld); + /* FIXME: This triggers verification error, see PR88140. */ +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +index c8b975a92..9f790b28b 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +@@ -105,6 +105,7 @@ along with GCC; see the file COPYING3. If not see + #include "ipa-param-manipulation.h" + #include "gimplify-me.h" + #include "cfgloop.h" ++#include "langhooks.h" + + namespace { + +@@ -196,6 +197,39 @@ gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type, + GSI_SAME_STMT); + } + ++/* Check whether in C language or LTO with only C language. */ ++ ++static bool ++lang_c_p (void) ++{ ++ const char *language_string = lang_hooks.name; ++ ++ if (!language_string) ++ return false; ++ ++ if (strcmp (language_string, "GNU GIMPLE") == 0) ++ { ++ unsigned i = 0; ++ tree t = NULL; ++ const char *unit_string = NULL; ++ ++ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t) ++ { ++ unit_string = TRANSLATION_UNIT_LANGUAGE (t); ++ if (!unit_string ++ || (strncmp (unit_string, "GNU C", 5) != 0) ++ || (!ISDIGIT (unit_string[5]))) ++ return false; ++ } ++ return true; ++ } ++ else if (strncmp (language_string, "GNU C", 5) == 0 ++ && ISDIGIT (language_string[5])) ++ return true; ++ ++ return false; ++} ++ + enum srmode + { + NORMAL = 0, +@@ -1018,7 +1052,6 @@ public: + void analyze_types (void); + void clear_visited (void); + bool create_new_types (void); +- void restore_field_type (void); + void create_new_decls (void); + srdecl *find_decl (tree); + void create_new_functions (void); +@@ -2107,7 +2140,12 @@ ipa_struct_reorg::find_vars (gimple *stmt) + srtype *t = find_type (inner_type (TREE_TYPE (rhs))); + srdecl *d = find_decl (lhs); + if (!d && t) +- current_function->record_decl (t, lhs, -1); ++ { ++ current_function->record_decl (t, lhs, -1); ++ tree var = SSA_NAME_VAR (lhs); ++ if (var && VOID_POINTER_P (TREE_TYPE (var))) ++ current_function->record_decl (t, var, -1); ++ } + } + if (TREE_CODE (rhs) == SSA_NAME + && VOID_POINTER_P (TREE_TYPE (rhs)) +@@ -2116,7 +2154,12 @@ ipa_struct_reorg::find_vars (gimple *stmt) + srtype *t = find_type (inner_type (TREE_TYPE (lhs))); + srdecl *d = find_decl (rhs); + if (!d && t) +- current_function->record_decl (t, rhs, -1); ++ { ++ current_function->record_decl (t, rhs, -1); ++ tree var = SSA_NAME_VAR (rhs); ++ if (var && VOID_POINTER_P (TREE_TYPE (var))) ++ current_function->record_decl (t, var, -1); ++ } + } + } + else +@@ -2796,8 +2839,14 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt) + if (escapes != does_not_escape) + { + for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) +- mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)), +- escapes); ++ { ++ mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)), ++ escapes); ++ srdecl *d = current_function->find_decl ( ++ gimple_call_arg (stmt, i)); ++ if (d) ++ d->type->mark_escape (escapes, stmt); ++ } + return; + } + +@@ -3731,42 +3780,6 @@ ipa_struct_reorg::analyze_types (void) + } + } + +-/* When struct A has a struct B member, B's type info +- is not stored in +- TYPE_FIELDS (TREE_TYPE (TYPE_FIELDS (typeA))) +- Try to restore B's type information. */ +- +-void +-ipa_struct_reorg::restore_field_type (void) +-{ +- for (unsigned i = 0; i < types.length (); i++) +- { +- for (unsigned j = 0; j < types[i]->fields.length (); j++) +- { +- srfield *field = types[i]->fields[j]; +- if (TREE_CODE (inner_type (field->fieldtype)) == RECORD_TYPE) +- { +- /* If field type has TYPE_FIELDS information, +- we do not need to do this. */ +- if (TYPE_FIELDS (field->type->type) != NULL) +- continue; +- for (unsigned k = 0; k < types.length (); k++) +- { +- if (i == k) +- continue; +- const char *type1 = get_type_name (field->type->type); +- const char *type2 = get_type_name (types[k]->type); +- if (type1 == NULL || type2 == NULL) +- continue; +- if (type1 == type2 +- && TYPE_FIELDS (types[k]->type)) +- field->type = types[k]; +- } +- } +- } +- } +-} +- + /* Create all new types we want to create. */ + + bool +@@ -4647,7 +4660,6 @@ ipa_struct_reorg::rewrite_functions (void) + { + unsigned retval = 0; + +- restore_field_type (); + /* Create new types, if we did not create any new types, + then don't rewrite any accesses. */ + if (!create_new_types ()) +@@ -4866,7 +4878,10 @@ pass_ipa_struct_reorg::gate (function *) + && flag_ipa_struct_reorg + /* Don't bother doing anything if the program has errors. */ + && !seen_error () +- && flag_lto_partition == LTO_PARTITION_ONE); ++ && flag_lto_partition == LTO_PARTITION_ONE ++ /* Only enable struct optimizations in C since other ++ languages' grammar forbid. */ ++ && lang_c_p ()); + } + + } // anon namespace +diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h +index c54d3084c..3fe64047c 100644 +--- a/gcc/symbol-summary.h ++++ b/gcc/symbol-summary.h +@@ -103,6 +103,12 @@ protected: + /* Allocates new data that are stored within map. */ + T* allocate_new () + { ++ /* In structure optimizatons, we call new to ensure that ++ the allocated memory is initialized to 0. */ ++ if (flag_ipa_struct_reorg) ++ return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T () ++ : new T (); ++ + /* Call gcc_internal_because we do not want to call finalizer for + a type T. We call dtor explicitly. */ + return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T () +@@ -115,7 +121,12 @@ protected: + if (is_ggc ()) + ggc_delete (item); + else +- m_allocator.remove (item); ++ { ++ if (flag_ipa_struct_reorg) ++ delete item; ++ else ++ m_allocator.remove (item); ++ } + } + + /* Unregister all call-graph hooks. */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c +new file mode 100644 +index 000000000..273baa9a3 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-5.c +@@ -0,0 +1,31 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-flto -fno-use-linker-plugin" } */ ++ ++struct D ++{ ++ int n; ++ int c [8]; ++}; ++ ++struct A ++{ ++ int i; ++ char *p; ++}; ++ ++struct B ++{ ++ struct A *a; ++ struct D *d; ++}; ++ ++int dtInsert1 (struct B *b) ++{ ++ struct A a = { 0, 0 }; ++ struct D *d; ++ b->a = &a; ++ d = b->d; ++ &d->c [d->n]; ++ return 0; ++} ++ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c +new file mode 100644 +index 000000000..455f9b501 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-6.c +@@ -0,0 +1,54 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-flto -fno-use-linker-plugin" } */ ++ ++typedef struct basic_block_def *basic_block; ++typedef struct gimple_seq_node_d *gimple_seq_node; ++typedef struct gimple_seq_d *gimple_seq; ++typedef struct ++{ ++ gimple_seq_node ptr; ++ gimple_seq seq; ++ basic_block bb; ++} gimple_stmt_iterator; ++typedef void *gimple; ++extern void exit(int); ++struct gimple_seq_node_d ++{ ++ gimple stmt; ++ struct gimple_seq_node_d *next; ++}; ++struct gimple_seq_d ++{ ++}; ++static __inline__ gimple_stmt_iterator ++gsi_start (gimple_seq seq) ++{ ++ gimple_stmt_iterator i; ++ i.seq = seq; ++ return i; ++} ++static __inline__ unsigned char ++gsi_end_p (gimple_stmt_iterator i) ++{ ++ return i.ptr == ((void *)0); ++} ++static __inline__ void ++gsi_next (gimple_stmt_iterator *i) ++{ ++ i->ptr = i->ptr->next; ++} ++static __inline__ gimple ++gsi_stmt (gimple_stmt_iterator i) ++{ ++ return i.ptr->stmt; ++} ++void ++c_warn_unused_result (gimple_seq seq) ++{ ++ gimple_stmt_iterator i; ++ for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i)) ++ { ++ gimple g = gsi_stmt (i); ++ if (!g) exit(0); ++ } ++} +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c +new file mode 100644 +index 000000000..afc0bd86c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-7.c +@@ -0,0 +1,38 @@ ++/* { dg-do run } */ ++ ++#include ++#include ++ ++struct gki_elem { ++ char *key; ++ int idx; ++}; ++ ++typedef struct { ++ struct gki_elem *table; ++ ++ int primelevel; ++ int nhash; ++ int nkeys; ++} GKI; ++ ++void * ++sre_malloc(size_t size) ++{ ++ void *ptr = malloc (size); ++ return ptr; ++} ++ ++__attribute__((noinline)) int ++GKIStoreKey(GKI *hash) ++{ ++ hash->table = sre_malloc(sizeof(struct gki_elem)); ++} ++ ++int ++main () ++{ ++ GKI *hash = malloc (sizeof(GKI)); ++ GKIStoreKey(hash); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c +new file mode 100644 +index 000000000..9bcfaf368 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-8.c +@@ -0,0 +1,25 @@ ++/* { dg-do run } */ ++ ++#include ++#include ++#include ++ ++typedef struct { ++ unsigned char blue; ++ unsigned char green; ++} Pixel; ++ ++typedef struct { ++ unsigned short colormaplength; ++ Pixel *colormapdata; ++} TargaImage; ++ ++TargaImage *img; ++ ++int main() { ++ img = (TargaImage *) malloc( sizeof(TargaImage) ); ++ if (img->colormaplength > 0) { ++ img->colormapdata = (Pixel *) malloc(sizeof(Pixel) * img->colormaplength); ++ memset(img->colormapdata, 0, (sizeof(Pixel) * img->colormaplength) ); ++ } ++} +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c +new file mode 100644 +index 000000000..052f4e3bd +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-9.c +@@ -0,0 +1,54 @@ ++/* { dg-do run } */ ++ ++extern void abort(void); ++ ++struct packed_ushort { ++ unsigned short ucs; ++} __attribute__((packed)); ++ ++struct source { ++ int pos, length; ++}; ++ ++static int flag; ++ ++static void __attribute__((noinline)) fetch(struct source *p) ++{ ++ p->length = 128; ++} ++ ++static struct packed_ushort __attribute__((noinline)) next(struct source *p) ++{ ++ struct packed_ushort rv; ++ ++ if (p->pos >= p->length) { ++ if (flag) { ++ flag = 0; ++ fetch(p); ++ return next(p); ++ } ++ flag = 1; ++ rv.ucs = 0xffff; ++ return rv; ++ } ++ rv.ucs = 0; ++ return rv; ++} ++ ++int main(void) ++{ ++ struct source s; ++ int i; ++ ++ s.pos = 0; ++ s.length = 0; ++ flag = 0; ++ ++ for (i = 0; i < 16; i++) { ++ struct packed_ushort rv = next(&s); ++ if ((i == 0 && rv.ucs != 0xffff) ++ || (i > 0 && rv.ucs != 0)) ++ abort(); ++ } ++ return 0; ++} +-- +2.33.0 + diff --git a/0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch b/0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch new file mode 100644 index 0000000..6f99e5c --- /dev/null +++ b/0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch @@ -0,0 +1,342 @@ +From 19ded9dad06b22b9b7aa9e3902e3e7a38a2256ab Mon Sep 17 00:00:00 2001 +From: dingguangya +Date: Sat, 29 Jul 2023 18:27:10 +0800 +Subject: [PATCH 18/22] [ccmp] Add another optimization opportunity for ccmp + instruction + +Add flag -fccmp2. +Enables the use of the ccmp instruction by creating a new conflict +relationship for instances where temporary expressions replacement +cannot be effectively created. +--- + gcc/ccmp.cc | 33 ++++ + gcc/ccmp.h | 1 + + gcc/common.opt | 4 + + gcc/testsuite/gcc.target/aarch64/ccmp_3.c | 15 ++ + gcc/tree-ssa-coalesce.cc | 197 ++++++++++++++++++++++ + 5 files changed, 250 insertions(+) + create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c + +diff --git a/gcc/ccmp.cc b/gcc/ccmp.cc +index 3db0a264e..e34f3bcc6 100644 +--- a/gcc/ccmp.cc ++++ b/gcc/ccmp.cc +@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see + #include "cfgexpand.h" + #include "ccmp.h" + #include "predict.h" ++#include "gimple-iterator.h" + + /* Check whether T is a simple boolean variable or a SSA name + set by a comparison operator in the same basic block. */ +@@ -129,6 +130,38 @@ ccmp_candidate_p (gimple *g) + return false; + } + ++/* Check whether bb is a potential conditional compare candidate. */ ++bool ++check_ccmp_candidate (basic_block bb) ++{ ++ gimple_stmt_iterator gsi; ++ gimple *bb_last_stmt, *stmt; ++ tree op0, op1; ++ ++ gsi = gsi_last_bb (bb); ++ bb_last_stmt = gsi_stmt (gsi); ++ ++ if (bb_last_stmt && gimple_code (bb_last_stmt) == GIMPLE_COND) ++ { ++ op0 = gimple_cond_lhs (bb_last_stmt); ++ op1 = gimple_cond_rhs (bb_last_stmt); ++ ++ if (TREE_CODE (op0) == SSA_NAME ++ && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE ++ && TREE_CODE (op1) == INTEGER_CST ++ && ((gimple_cond_code (bb_last_stmt) == NE_EXPR) ++ || (gimple_cond_code (bb_last_stmt) == EQ_EXPR))) ++ { ++ stmt = SSA_NAME_DEF_STMT (op0); ++ if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN) ++ { ++ return ccmp_candidate_p (stmt); ++ } ++ } ++ } ++ return false; ++} ++ + /* Extract the comparison we want to do from the tree. */ + void + get_compare_parts (tree t, int *up, rtx_code *rcode, +diff --git a/gcc/ccmp.h b/gcc/ccmp.h +index 1799d5fed..efe3a1c14 100644 +--- a/gcc/ccmp.h ++++ b/gcc/ccmp.h +@@ -21,5 +21,6 @@ along with GCC; see the file COPYING3. If not see + #define GCC_CCMP_H + + extern rtx expand_ccmp_expr (gimple *, machine_mode); ++extern bool check_ccmp_candidate (basic_block bb); + + #endif /* GCC_CCMP_H */ +diff --git a/gcc/common.opt b/gcc/common.opt +index 4d91ce8cf..0aa516719 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2017,6 +2017,10 @@ fira-verbose= + Common RejectNegative Joined UInteger Var(flag_ira_verbose) Init(5) + -fira-verbose= Control IRA's level of diagnostic messages. + ++fccmp2 ++Common Var(flag_ccmp2) Init(0) Optimization ++Optimize potential ccmp instruction in complex scenarios. ++ + fivopts + Common Var(flag_ivopts) Init(1) Optimization + Optimize induction variables on trees. +diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c +new file mode 100644 +index 000000000..b509ba810 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-options "-O -fdump-rtl-expand-details -fccmp2" } */ ++ ++int func (int a, int b, int c) ++{ ++ while(1) ++ { ++ if(a-- == 0 || b >= c) ++ { ++ return 1; ++ } ++ } ++} ++ ++/* { dg-final { scan-assembler-times "\tccmp\t" 1} } */ +diff --git a/gcc/tree-ssa-coalesce.cc b/gcc/tree-ssa-coalesce.cc +index dccf41ab8..195e06428 100644 +--- a/gcc/tree-ssa-coalesce.cc ++++ b/gcc/tree-ssa-coalesce.cc +@@ -38,6 +38,9 @@ along with GCC; see the file COPYING3. If not see + #include "explow.h" + #include "tree-dfa.h" + #include "stor-layout.h" ++#include "ccmp.h" ++#include "target.h" ++#include "tree-outof-ssa.h" + + /* This set of routines implements a coalesce_list. This is an object which + is used to track pairs of ssa_names which are desirable to coalesce +@@ -854,6 +857,198 @@ live_track_clear_base_vars (live_track *ptr) + bitmap_clear (&ptr->live_base_var); + } + ++/* Return true if gimple is a copy assignment. */ ++ ++static inline bool ++gimple_is_assign_copy_p (gimple *gs) ++{ ++ return (is_gimple_assign (gs) && gimple_assign_copy_p (gs) ++ && TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME ++ && TREE_CODE (gimple_assign_rhs1 (gs)) == SSA_NAME); ++} ++ ++#define MAX_CCMP_CONFLICT_NUM 5 ++ ++/* Clear high-cost conflict graphs. */ ++ ++static void ++remove_high_cost_graph_for_ccmp (ssa_conflicts *conflict_graph) ++{ ++ unsigned x = 0; ++ int add_conflict_num = 0; ++ bitmap b; ++ FOR_EACH_VEC_ELT (conflict_graph->conflicts, x, b) ++ { ++ if (b) ++ { ++ add_conflict_num++; ++ } ++ } ++ if (add_conflict_num >= MAX_CCMP_CONFLICT_NUM) ++ { ++ conflict_graph->conflicts.release (); ++ } ++} ++ ++/* Adding a new conflict graph to the original graph. */ ++ ++static void ++process_add_graph (live_track *live, basic_block bb, ++ ssa_conflicts *conflict_graph) ++{ ++ tree use, def; ++ ssa_op_iter iter; ++ gimple *first_visit_stmt = NULL; ++ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ++ gsi_next (&gsi)) ++ { ++ if (gimple_visited_p (gsi_stmt (gsi))) ++ { ++ first_visit_stmt = gsi_stmt (gsi); ++ break; ++ } ++ } ++ if (!first_visit_stmt) ++ return; ++ ++ for (gimple_stmt_iterator gsi = gsi_last_bb (bb); ++ gsi_stmt (gsi) != first_visit_stmt; gsi_prev (&gsi)) ++ { ++ gimple *stmt = gsi_stmt (gsi); ++ if (gimple_visited_p (gsi_stmt (gsi)) && is_gimple_debug (stmt)) ++ { ++ continue; ++ } ++ if (gimple_is_assign_copy_p (stmt)) ++ { ++ live_track_clear_var (live, gimple_assign_rhs1 (stmt)); ++ } ++ FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF) ++ { ++ live_track_process_def (live, def, conflict_graph); ++ } ++ FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) ++ { ++ live_track_process_use (live, use); ++ } ++ } ++} ++ ++/* Build a conflict graph based on ccmp candidate. */ ++ ++static void ++add_ccmp_conflict_graph (ssa_conflicts *conflict_graph, ++ tree_live_info_p liveinfo, var_map map, basic_block bb) ++{ ++ live_track *live; ++ tree use, def; ++ ssa_op_iter iter; ++ live = new_live_track (map); ++ live_track_init (live, live_on_exit (liveinfo, bb)); ++ ++ gimple *last_stmt = gsi_stmt (gsi_last_bb (bb)); ++ gcc_assert (gimple_cond_lhs (last_stmt)); ++ ++ auto_vec stack; ++ stack.safe_push (gimple_cond_lhs (last_stmt)); ++ while (!stack.is_empty ()) ++ { ++ tree op = stack.pop (); ++ gimple *op_stmt = SSA_NAME_DEF_STMT (op); ++ if (!op_stmt || gimple_bb (op_stmt) != bb ++ || !is_gimple_assign (op_stmt) ++ || !ssa_is_replaceable_p (op_stmt)) ++ { ++ continue; ++ } ++ if (gimple_is_assign_copy_p (op_stmt)) ++ { ++ live_track_clear_var (live, gimple_assign_rhs1 (op_stmt)); ++ } ++ gimple_set_visited (op_stmt, true); ++ FOR_EACH_SSA_TREE_OPERAND (def, op_stmt, iter, SSA_OP_DEF) ++ { ++ live_track_process_def (live, def, conflict_graph); ++ } ++ FOR_EACH_SSA_TREE_OPERAND (use, op_stmt, iter, SSA_OP_USE) ++ { ++ stack.safe_push (use); ++ live_track_process_use (live, use); ++ } ++ } ++ ++ process_add_graph (live, bb, conflict_graph); ++ delete_live_track (live); ++ remove_high_cost_graph_for_ccmp (conflict_graph); ++} ++ ++/* Determine whether the ccmp conflict graph can be added. ++ i.e, ++ ++ ;; basic block 3, loop depth 1 ++ ;; pred: 2 ++ ;; 3 ++ # ivtmp.5_10 = PHI ++ _7 = b_4 (D) >= c_5 (D); ++ _8 = ivtmp.5_10 == 0; ++ _9 = _7 | _8; ++ ivtmp.5_11 = ivtmp.5_10 - 1; ++ if (_9 != 0) ++ goto ; [10.70%] ++ else ++ goto ; [89.30%] ++ ++ In the above loop, the expression will be replaced: ++ ++ _7 replaced by b_4 (D) >= c_5 (D) ++ _8 replaced by ivtmp.5_10 == 0 ++ ++ If the current case want use the ccmp instruction, then ++ ++ _9 can replaced by _7 | _8 ++ ++ So this requires that ivtmp.5_11 and ivtmp.5_10 be divided into different ++ partitions. ++ ++ Now this function can achieve this ability. */ ++ ++static void ++determine_add_ccmp_conflict_graph (basic_block bb, tree_live_info_p liveinfo, ++ var_map map, ssa_conflicts *graph) ++{ ++ if (!flag_ccmp2 || !targetm.gen_ccmp_first || !check_ccmp_candidate (bb)) ++ return; ++ for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); ++ gsi_next (&bsi)) ++ { ++ gimple_set_visited (gsi_stmt (bsi), false); ++ } ++ ssa_conflicts *ccmp_conflict_graph; ++ ccmp_conflict_graph = ssa_conflicts_new (num_var_partitions (map)); ++ add_ccmp_conflict_graph (ccmp_conflict_graph, liveinfo, map, bb); ++ unsigned x; ++ bitmap b; ++ if (ccmp_conflict_graph) ++ { ++ FOR_EACH_VEC_ELT (ccmp_conflict_graph->conflicts, x, b) ++ { ++ if (!b) ++ continue; ++ unsigned y = bitmap_first_set_bit (b); ++ if (!graph->conflicts[x] || !bitmap_bit_p (graph->conflicts[x], y)) ++ { ++ ssa_conflicts_add (graph, x, y); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "potential ccmp: add additional " ++ "conflict-ssa : bb[%d] %d:%d\n", ++ bb->index, x, y); ++ } ++ } ++ } ++ } ++ ssa_conflicts_delete (ccmp_conflict_graph); ++} + + /* Build a conflict graph based on LIVEINFO. Any partitions which are in the + partition view of the var_map liveinfo is based on get entries in the +@@ -938,6 +1133,8 @@ build_ssa_conflict_graph (tree_live_info_p liveinfo) + live_track_process_use (live, var); + } + ++ determine_add_ccmp_conflict_graph (bb, liveinfo, map, graph); ++ + /* If result of a PHI is unused, looping over the statements will not + record any conflicts since the def was never live. Since the PHI node + is going to be translated out of SSA form, it will insert a copy. +-- +2.33.0 + diff --git a/0019-fp-model-Enable-fp-model-on-kunpeng.patch b/0019-fp-model-Enable-fp-model-on-kunpeng.patch new file mode 100644 index 0000000..46ea52c --- /dev/null +++ b/0019-fp-model-Enable-fp-model-on-kunpeng.patch @@ -0,0 +1,405 @@ +From 8cdb316a3fe205a3089b9c17aec0442f4d5f75be Mon Sep 17 00:00:00 2001 +From: bule +Date: Sun, 27 Aug 2023 16:49:04 +0800 +Subject: [PATCH 19/22] [fp-model] Enable fp-model on kunpeng + +Enable fp-model options on kunpeng for precision control. +--- + gcc/common.opt | 26 +++++ + gcc/config/aarch64/aarch64-linux.h | 3 +- + gcc/flag-types.h | 9 ++ + gcc/fortran/options.cc | 8 ++ + gcc/opts-common.cc | 146 ++++++++++++++++++++++++++++- + gcc/opts.cc | 68 ++++++++++++++ + 6 files changed, 256 insertions(+), 4 deletions(-) + +diff --git a/gcc/common.opt b/gcc/common.opt +index 8a0dafc52..f5eef8a45 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1642,6 +1642,32 @@ ffp-int-builtin-inexact + Common Var(flag_fp_int_builtin_inexact) Init(1) Optimization + Allow built-in functions ceil, floor, round, trunc to raise \"inexact\" exceptions. + ++fftz ++Common Var(flag_ftz) Optimization ++Control fpcr register for flush to zero. ++ ++fp-model= ++Common Joined RejectNegative Enum(fp_model) Var(flag_fp_model) Init(FP_MODEL_NORMAL) Optimization ++-fp-model=[normal|fast|precise|except|strict] Perform floating-point precision control. ++ ++Enum ++Name(fp_model) Type(enum fp_model) UnknownError(unknown floating point precision model %qs) ++ ++EnumValue ++Enum(fp_model) String(normal) Value(FP_MODEL_NORMAL) ++ ++EnumValue ++Enum(fp_model) String(fast) Value(FP_MODEL_FAST) ++ ++EnumValue ++Enum(fp_model) String(precise) Value(FP_MODEL_PRECISE) ++ ++EnumValue ++Enum(fp_model) String(except) Value(FP_MODEL_EXCEPT) ++ ++EnumValue ++Enum(fp_model) String(strict) Value(FP_MODEL_STRICT) ++ + ; Nonzero means don't put addresses of constant functions in registers. + ; Used for compiling the Unix kernel, where strange substitutions are + ; done on the assembly output. +diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h +index 5e4553d79..a5cba6391 100644 +--- a/gcc/config/aarch64/aarch64-linux.h ++++ b/gcc/config/aarch64/aarch64-linux.h +@@ -50,7 +50,8 @@ + #define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC + + #define GNU_USER_TARGET_MATHFILE_SPEC \ +- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" ++ "%{Ofast|ffast-math|funsafe-math-optimizations|fp-model=fast|fftz:\ ++ %{!fno-ftz:crtfastmath.o%s}}" + + #undef ENDFILE_SPEC + #define ENDFILE_SPEC \ +diff --git a/gcc/flag-types.h b/gcc/flag-types.h +index 2c8498169..64c64eb32 100644 +--- a/gcc/flag-types.h ++++ b/gcc/flag-types.h +@@ -260,6 +260,15 @@ enum fp_contract_mode { + FP_CONTRACT_FAST = 2 + }; + ++/* Floating-point precision mode. */ ++enum fp_model { ++ FP_MODEL_NORMAL = 0, ++ FP_MODEL_FAST = 1, ++ FP_MODEL_PRECISE = 2, ++ FP_MODEL_EXCEPT = 3, ++ FP_MODEL_STRICT = 4 ++}; ++ + /* Scalar storage order kind. */ + enum scalar_storage_order_kind { + SSO_NATIVE = 0, +diff --git a/gcc/fortran/options.cc b/gcc/fortran/options.cc +index d0fa634f1..3eb99a84a 100644 +--- a/gcc/fortran/options.cc ++++ b/gcc/fortran/options.cc +@@ -243,6 +243,7 @@ form_from_filename (const char *filename) + return f_form; + } + ++static void gfc_handle_fpe_option (const char *arg, bool trap); + + /* Finalize commandline options. */ + +@@ -286,6 +287,13 @@ gfc_post_options (const char **pfilename) + if (flag_protect_parens == -1) + flag_protect_parens = !optimize_fast; + ++ /* If fp-model=precise/strict, turn on all ffpe-trap and ffpe-summary. */ ++ if (flag_fp_model == FP_MODEL_EXCEPT || flag_fp_model == FP_MODEL_STRICT) ++ { ++ gfc_handle_fpe_option ("all", false); ++ gfc_handle_fpe_option ("invalid,zero,overflow,underflow", true); ++ } ++ + /* -Ofast sets implies -fstack-arrays unless an explicit size is set for + stack arrays. */ + if (flag_stack_arrays == -1 && flag_max_stack_var_size == -2) +diff --git a/gcc/opts-common.cc b/gcc/opts-common.cc +index 7c07d5046..489a6e02a 100644 +--- a/gcc/opts-common.cc ++++ b/gcc/opts-common.cc +@@ -28,7 +28,8 @@ along with GCC; see the file COPYING3. If not see + #include "spellcheck.h" + #include "opts-jobserver.h" + +-static void prune_options (struct cl_decoded_option **, unsigned int *); ++static void prune_options (struct cl_decoded_option **, unsigned int *, ++ unsigned int); + + /* An option that is undocumented, that takes a joined argument, and + that doesn't fit any of the classes of uses (language/common, +@@ -1091,7 +1092,7 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv, + + *decoded_options = opt_array; + *decoded_options_count = num_decoded_options; +- prune_options (decoded_options, decoded_options_count); ++ prune_options (decoded_options, decoded_options_count, lang_mask); + } + + /* Return true if NEXT_OPT_IDX cancels OPT_IDX. Return false if the +@@ -1112,11 +1113,109 @@ cancel_option (int opt_idx, int next_opt_idx, int orig_next_opt_idx) + return false; + } + ++/* Check whether opt_idx exists in decoded_options array between index ++ start and end. If found, return its index in decoded_options, ++ else return end. */ ++static unsigned int ++find_opt_idx (const struct cl_decoded_option *decoded_options, ++ unsigned int decoded_options_count, ++ unsigned int start, unsigned int end, unsigned int opt_idx) ++{ ++ gcc_assert (end <= decoded_options_count); ++ gcc_assert (opt_idx < cl_options_count); ++ unsigned int k; ++ for (k = start; k < end; k++) ++ { ++ if (decoded_options[k].opt_index == opt_idx) ++ { ++ return k; ++ } ++ } ++ return k; ++} ++ ++/* remove the opt_index element from decoded_options array. */ ++static unsigned int ++remove_option (struct cl_decoded_option *decoded_options, ++ unsigned int decoded_options_count, ++ unsigned int opt_index) ++{ ++ gcc_assert (opt_index < decoded_options_count); ++ unsigned int i; ++ for (i = opt_index; i < decoded_options_count - 1; i++) ++ { ++ decoded_options[i] = decoded_options[i + 1]; ++ } ++ return decoded_options_count - 1; ++} ++ ++/* Handle the priority between fp-model, Ofast, and ++ ffast-math. */ ++static unsigned int ++handle_fp_model_driver (struct cl_decoded_option *decoded_options, ++ unsigned int decoded_options_count, ++ unsigned int fp_model_index, ++ unsigned int lang_mask) ++{ ++ struct cl_decoded_option fp_model_opt = decoded_options[fp_model_index]; ++ enum fp_model model = (enum fp_model) fp_model_opt.value; ++ if (model == FP_MODEL_PRECISE || model == FP_MODEL_STRICT) ++ { ++ /* If found Ofast, override Ofast with O3. */ ++ unsigned int Ofast_index; ++ Ofast_index = find_opt_idx (decoded_options, decoded_options_count, ++ 0, decoded_options_count, OPT_Ofast); ++ while (Ofast_index != decoded_options_count) ++ { ++ const char *tmp_argv = "-O3"; ++ decode_cmdline_option (&tmp_argv, lang_mask, ++ &decoded_options[Ofast_index]); ++ warning (0, "%<-Ofast%> is degraded to %<-O3%> due to %qs", ++ fp_model_opt.orig_option_with_args_text); ++ Ofast_index = find_opt_idx (decoded_options, decoded_options_count, ++ 0, decoded_options_count, OPT_Ofast); ++ } ++ /* If found ffast-math before fp-model=precise/strict ++ it, cancel it. */ ++ unsigned int ffast_math_index; ++ ffast_math_index ++ = find_opt_idx (decoded_options, decoded_options_count, 0, ++ fp_model_index, OPT_ffast_math); ++ if (ffast_math_index != fp_model_index) ++ { ++ decoded_options_count ++ = remove_option (decoded_options, decoded_options_count, ++ ffast_math_index); ++ warning (0, "%<-ffast-math%> before %qs is canceled", ++ fp_model_opt.orig_option_with_args_text); ++ } ++ } ++ if (model == FP_MODEL_FAST) ++ { ++ /* If found -fno-fast-math after fp-model=fast, cancel this one. */ ++ unsigned int fno_fast_math_index; ++ fno_fast_math_index ++ = find_opt_idx (decoded_options, decoded_options_count, fp_model_index, ++ decoded_options_count, OPT_ffast_math); ++ if (fno_fast_math_index != decoded_options_count ++ && decoded_options[fno_fast_math_index].value == 0) ++ { ++ decoded_options_count ++ = remove_option (decoded_options, decoded_options_count, ++ fp_model_index); ++ warning (0, ++ "%<-fp-model=fast%> before %<-fno-fast-math%> is canceled"); ++ } ++ } ++ return decoded_options_count; ++} ++ + /* Filter out options canceled by the ones after them. */ + + static void + prune_options (struct cl_decoded_option **decoded_options, +- unsigned int *decoded_options_count) ++ unsigned int *decoded_options_count, ++ unsigned int lang_mask) + { + unsigned int old_decoded_options_count = *decoded_options_count; + struct cl_decoded_option *old_decoded_options = *decoded_options; +@@ -1127,7 +1226,12 @@ prune_options (struct cl_decoded_option **decoded_options, + const struct cl_option *option; + unsigned int fdiagnostics_color_idx = 0; + ++ if (!diagnostic_ready_p ()) ++ diagnostic_initialize (global_dc, 0); ++ + /* Remove arguments which are negated by others after them. */ ++ ++ unsigned int fp_model_index = old_decoded_options_count; + new_decoded_options_count = 0; + for (i = 0; i < old_decoded_options_count; i++) + { +@@ -1151,6 +1255,34 @@ prune_options (struct cl_decoded_option **decoded_options, + fdiagnostics_color_idx = i; + continue; + ++ case OPT_fp_model_: ++ /* Only the last fp-model option will take effect. */ ++ unsigned int next_fp_model_idx; ++ next_fp_model_idx = find_opt_idx (old_decoded_options, ++ old_decoded_options_count, ++ i + 1, ++ old_decoded_options_count, ++ OPT_fp_model_); ++ if (next_fp_model_idx != old_decoded_options_count) ++ { ++ /* Found more than one fp-model, cancel this one. */ ++ if (old_decoded_options[i].value ++ != old_decoded_options[next_fp_model_idx].value) ++ { ++ warning (0, "%qs is overrided by %qs", ++ old_decoded_options[i]. ++ orig_option_with_args_text, ++ old_decoded_options[next_fp_model_idx]. ++ orig_option_with_args_text); ++ } ++ break; ++ } ++ else ++ { ++ /* Found the last fp-model option. */ ++ fp_model_index = new_decoded_options_count; ++ } ++ /* FALLTHRU. */ + default: + gcc_assert (opt_idx < cl_options_count); + option = &cl_options[opt_idx]; +@@ -1190,6 +1322,14 @@ keep: + break; + } + } ++ if (fp_model_index < new_decoded_options_count) ++ { ++ new_decoded_options_count ++ = handle_fp_model_driver (new_decoded_options, ++ new_decoded_options_count, ++ fp_model_index, ++ lang_mask); ++ } + + if (fdiagnostics_color_idx >= 1) + { +diff --git a/gcc/opts.cc b/gcc/opts.cc +index a97630d1c..b522ed7e2 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -328,6 +328,7 @@ static void set_debug_level (uint32_t dinfo, int extended, + struct gcc_options *opts_set, + location_t loc); + static void set_fast_math_flags (struct gcc_options *opts, int set); ++static void set_fp_model_flags (struct gcc_options *opts, int set); + static void decode_d_option (const char *arg, struct gcc_options *opts, + location_t loc, diagnostic_context *dc); + static void set_unsafe_math_optimizations_flags (struct gcc_options *opts, +@@ -2857,6 +2858,10 @@ common_handle_option (struct gcc_options *opts, + set_fast_math_flags (opts, value); + break; + ++ case OPT_fp_model_: ++ set_fp_model_flags (opts, value); ++ break; ++ + case OPT_funsafe_math_optimizations: + set_unsafe_math_optimizations_flags (opts, value); + break; +@@ -3266,6 +3271,69 @@ set_fast_math_flags (struct gcc_options *opts, int set) + } + } + ++/* Handle fp-model options. */ ++static void ++set_fp_model_flags (struct gcc_options *opts, int set) ++{ ++ enum fp_model model = (enum fp_model) set; ++ switch (model) ++ { ++ case FP_MODEL_FAST: ++ /* Equivalent to open ffast-math. */ ++ set_fast_math_flags (opts, 1); ++ break; ++ ++ case FP_MODEL_PRECISE: ++ /* Equivalent to close ffast-math. */ ++ set_fast_math_flags (opts, 0); ++ /* Turn on -frounding-math -fsignaling-nans. */ ++ if (!opts->frontend_set_flag_signaling_nans) ++ opts->x_flag_signaling_nans = 1; ++ if (!opts->frontend_set_flag_rounding_math) ++ opts->x_flag_rounding_math = 1; ++ opts->x_flag_expensive_optimizations = 0; ++ opts->x_flag_code_hoisting = 0; ++ opts->x_flag_predictive_commoning = 0; ++ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF; ++ break; ++ ++ case FP_MODEL_EXCEPT: ++ if (!opts->frontend_set_flag_signaling_nans) ++ opts->x_flag_signaling_nans = 1; ++ if (!opts->frontend_set_flag_errno_math) ++ opts->x_flag_errno_math = 1; ++ if (!opts->frontend_set_flag_trapping_math) ++ opts->x_flag_trapping_math = 1; ++ opts->x_flag_fp_int_builtin_inexact = 1; ++ /* Also turn on ffpe-trap in fortran. */ ++ break; ++ ++ case FP_MODEL_STRICT: ++ /* Turn on both precise and except. */ ++ if (!opts->frontend_set_flag_signaling_nans) ++ opts->x_flag_signaling_nans = 1; ++ if (!opts->frontend_set_flag_rounding_math) ++ opts->x_flag_rounding_math = 1; ++ opts->x_flag_expensive_optimizations = 0; ++ opts->x_flag_code_hoisting = 0; ++ opts->x_flag_predictive_commoning = 0; ++ if (!opts->frontend_set_flag_errno_math) ++ opts->x_flag_errno_math = 1; ++ if (!opts->frontend_set_flag_trapping_math) ++ opts->x_flag_trapping_math = 1; ++ opts->x_flag_fp_int_builtin_inexact = 1; ++ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF; ++ break; ++ ++ case FP_MODEL_NORMAL: ++ /* Do nothing. */ ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ + /* When -funsafe-math-optimizations is set the following + flags are set as well. */ + static void +-- +2.33.0 + diff --git a/0020-simdmath-Enable-simdmath-on-kunpeng.patch b/0020-simdmath-Enable-simdmath-on-kunpeng.patch new file mode 100644 index 0000000..f6b7a48 --- /dev/null +++ b/0020-simdmath-Enable-simdmath-on-kunpeng.patch @@ -0,0 +1,317 @@ +From 49ad10199dbdda2c36850a2617f5c985977939c5 Mon Sep 17 00:00:00 2001 +From: bule +Date: Sun, 27 Aug 2023 16:49:42 +0800 +Subject: [PATCH 20/22] [simdmath] Enable simdmath on kunpeng + +This enable simd math function supported by libmathlib on fortran/c/c++. +Use -fsimdmath to turn on the generation of simdmath function. The +supported functions can be found in simdmath.h. Add more simd declaration +if you need more kinds of math functions. -msimdmath-64 is used to turn +on 64-bit simd math functions which is not supported by libmathlib. +Therefore, this option is default to off. +--- + gcc/c-family/c-opts.cc | 4 ++ + gcc/common.opt | 4 ++ + gcc/config/aarch64/aarch64.cc | 9 ++++- + gcc/config/aarch64/aarch64.opt | 6 +++ + gcc/fortran/scanner.cc | 3 ++ + gcc/opts.cc | 17 ++++++++ + .../gcc.target/aarch64/simd_pcs_attribute-3.c | 2 +- + libgomp/Makefile.am | 4 +- + libgomp/Makefile.in | 10 +++-- + libgomp/configure | 4 +- + libgomp/configure.ac | 2 +- + libgomp/simdmath.h.in | 40 +++++++++++++++++++ + libgomp/simdmath_f.h.in | 11 +++++ + 13 files changed, 106 insertions(+), 10 deletions(-) + create mode 100644 libgomp/simdmath.h.in + create mode 100644 libgomp/simdmath_f.h.in + +diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc +index a341a0617..5134f6128 100644 +--- a/gcc/c-family/c-opts.cc ++++ b/gcc/c-family/c-opts.cc +@@ -801,6 +801,10 @@ c_common_post_options (const char **pfilename) + if (cpp_opts->deps.style == DEPS_NONE) + check_deps_environment_vars (); + ++ if (flag_simdmath) ++ { ++ defer_opt (OPT_include, "simdmath.h"); ++ } + handle_deferred_opts (); + + sanitize_cpp_opts (); +diff --git a/gcc/common.opt b/gcc/common.opt +index f5eef8a45..e9d580957 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2125,6 +2125,10 @@ fmath-errno + Common Var(flag_errno_math) Init(1) Optimization SetByCombined + Set errno after built-in math functions. + ++fsimdmath ++Common Var(flag_simdmath) Init(0) Optimization ++Enable auto-vectorize math functions for mathlib. This option will turn on -fno-math-errno and -fopenmp-simd. ++ + fmax-errors= + Common Joined RejectNegative UInteger Var(flag_max_errors) + -fmax-errors= Maximum number of errors to report. +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index 226dc9dff..a3da4ca30 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -26904,8 +26904,13 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, + elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)); + if (known_eq (clonei->simdlen, 0U)) + { +- count = 2; +- vec_bits = (num == 0 ? 64 : 128); ++ /* Currently mathlib or sleef hasn't provide function for V2SF mode ++ simdclone of single precision functions. (e.g._ZCVnN2v_expf) ++ Therefore this mode is disabled by default to avoid link error. ++ Use -msimdmath-64 option to enable this mode. */ ++ count = flag_simdmath_64 ? 2 : 1; ++ vec_bits = ((num == 0 && flag_simdmath_64) ? 64 : 128); ++ + clonei->simdlen = exact_div (vec_bits, elt_bits); + } + else +diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt +index 92220b26e..a64b927e9 100644 +--- a/gcc/config/aarch64/aarch64.opt ++++ b/gcc/config/aarch64/aarch64.opt +@@ -190,6 +190,12 @@ precision of square root results to about 16 bits for + single precision and to 32 bits for double precision. + If enabled, it implies -mlow-precision-recip-sqrt. + ++msimdmath-64 ++Target Var(flag_simdmath_64) Optimization ++Allow compiler to generate V2SF 64 bits simdclone of math functions, ++which is not currently supported in mathlib or sleef. ++Therefore this option is disabled by default. ++ + mlow-precision-div + Target Var(flag_mlow_precision_div) Optimization + Enable the division approximation. Enabling this reduces +diff --git a/gcc/fortran/scanner.cc b/gcc/fortran/scanner.cc +index 2dff25147..63e262f51 100644 +--- a/gcc/fortran/scanner.cc ++++ b/gcc/fortran/scanner.cc +@@ -2769,6 +2769,9 @@ gfc_new_file (void) + if (flag_pre_include != NULL) + load_file (flag_pre_include, NULL, false); + ++ if (flag_simdmath) ++ load_file ("simdmath_f.h", NULL, false); ++ + if (gfc_cpp_enabled ()) + { + gfc_cpp_preprocess (gfc_source_file); +diff --git a/gcc/opts.cc b/gcc/opts.cc +index b522ed7e2..c3cc2c169 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -322,6 +322,7 @@ static const char undocumented_msg[] = N_("This option lacks documentation."); + static const char use_diagnosed_msg[] = N_("Uses of this option are diagnosed."); + + typedef char *char_p; /* For DEF_VEC_P. */ ++static void set_simdmath_flags (struct gcc_options *opts, int set); + + static void set_debug_level (uint32_t dinfo, int extended, + const char *arg, struct gcc_options *opts, +@@ -2850,6 +2851,10 @@ common_handle_option (struct gcc_options *opts, + dc->min_margin_width = value; + break; + ++ case OPT_fsimdmath: ++ set_simdmath_flags (opts, value); ++ break; ++ + case OPT_fdump_: + /* Deferred. */ + break; +@@ -3227,6 +3232,18 @@ common_handle_option (struct gcc_options *opts, + return true; + } + ++/* The following routines are used to set -fno-math-errno and -fopenmp-simd ++ to enable vector mathlib. */ ++static void ++set_simdmath_flags (struct gcc_options *opts, int set) ++{ ++ if (set) ++ { ++ opts->x_flag_errno_math = 0; ++ opts->x_flag_openmp_simd = 1; ++ } ++} ++ + /* Used to set the level of strict aliasing warnings in OPTS, + when no level is specified (i.e., when -Wstrict-aliasing, and not + -Wstrict-aliasing=level was given). +diff --git a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c +index 95f6a6803..e0e0efa9d 100644 +--- a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c ++++ b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-Ofast" } */ ++/* { dg-options "-Ofast -msimdmath-64" } */ + + __attribute__ ((__simd__)) + __attribute__ ((__nothrow__ , __leaf__ , __const__)) +diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am +index f8b2a06d6..8dfa160d6 100644 +--- a/libgomp/Makefile.am ++++ b/libgomp/Makefile.am +@@ -75,10 +75,10 @@ libgomp_la_SOURCES += openacc.f90 + endif + + nodist_noinst_HEADERS = libgomp_f.h +-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h ++nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h + if USE_FORTRAN + nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ +- openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod ++ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h + endif + + LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) +diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in +index 6f0cb7161..90fc326f0 100644 +--- a/libgomp/Makefile.in ++++ b/libgomp/Makefile.in +@@ -147,7 +147,7 @@ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno + mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs + CONFIG_HEADER = config.h +-CONFIG_CLEAN_FILES = omp.h omp_lib.h omp_lib.f90 libgomp_f.h \ ++CONFIG_CLEAN_FILES = omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h \ + libgomp.spec + CONFIG_CLEAN_VPATH_FILES = + am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +@@ -583,9 +583,9 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ + @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBADD = libgomp.la $(PLUGIN_GCN_LIBS) + @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBTOOLFLAGS = --tag=disable-static + nodist_noinst_HEADERS = libgomp_f.h +-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h ++nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h + @USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ +-@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod ++@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h + + LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) + LINK = $(LIBTOOL) --tag CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ +@@ -676,6 +676,10 @@ omp.h: $(top_builddir)/config.status $(srcdir)/omp.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ + omp_lib.h: $(top_builddir)/config.status $(srcdir)/omp_lib.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ ++simdmath_f.h: $(top_builddir)/config.status $(srcdir)/simdmath_f.h.in ++ cd $(top_builddir) && $(SHELL) ./config.status $@ ++simdmath.h: $(top_builddir)/config.status $(srcdir)/simdmath.h.in ++ cd $(top_builddir) && $(SHELL) ./config.status $@ + omp_lib.f90: $(top_builddir)/config.status $(srcdir)/omp_lib.f90.in + cd $(top_builddir) && $(SHELL) ./config.status $@ + libgomp_f.h: $(top_builddir)/config.status $(srcdir)/libgomp_f.h.in +diff --git a/libgomp/configure b/libgomp/configure +index 85fdb4d3f..471c957b7 100755 +--- a/libgomp/configure ++++ b/libgomp/configure +@@ -17064,7 +17064,7 @@ fi + + + +-ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h" ++ac_config_files="$ac_config_files omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h" + + ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec" + +@@ -18215,6 +18215,8 @@ do + "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; + "omp.h") CONFIG_FILES="$CONFIG_FILES omp.h" ;; + "omp_lib.h") CONFIG_FILES="$CONFIG_FILES omp_lib.h" ;; ++ "simdmath.h") CONFIG_FILES="$CONFIG_FILES simdmath.h" ;; ++ "simdmath_f.h") CONFIG_FILES="$CONFIG_FILES simdmath_f.h" ;; + "omp_lib.f90") CONFIG_FILES="$CONFIG_FILES omp_lib.f90" ;; + "libgomp_f.h") CONFIG_FILES="$CONFIG_FILES libgomp_f.h" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; +diff --git a/libgomp/configure.ac b/libgomp/configure.ac +index a9b1f3973..1f81a0d30 100644 +--- a/libgomp/configure.ac ++++ b/libgomp/configure.ac +@@ -472,7 +472,7 @@ CFLAGS="$save_CFLAGS" + # Determine what GCC version number to use in filesystem paths. + GCC_BASE_VER + +-AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h) ++AC_CONFIG_FILES(omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h) + AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec) + AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in]) + AC_CONFIG_FILES([testsuite/libgomp-site-extra.exp]) +diff --git a/libgomp/simdmath.h.in b/libgomp/simdmath.h.in +new file mode 100644 +index 000000000..ab91a4ec3 +--- /dev/null ++++ b/libgomp/simdmath.h.in +@@ -0,0 +1,40 @@ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double cos (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float cosf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double sin (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float sinf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double exp (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float expf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double log (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float logf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double pow (double x, double y); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float powf (float x, float y); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float exp2f (float x); ++ ++#ifdef __cplusplus ++} // extern "C" ++#endif +diff --git a/libgomp/simdmath_f.h.in b/libgomp/simdmath_f.h.in +new file mode 100644 +index 000000000..550595015 +--- /dev/null ++++ b/libgomp/simdmath_f.h.in +@@ -0,0 +1,11 @@ ++!GCC$ builtin (cos) attributes simd (notinbranch) ++!GCC$ builtin (cosf) attributes simd (notinbranch) ++!GCC$ builtin (sin) attributes simd (notinbranch) ++!GCC$ builtin (sinf) attributes simd (notinbranch) ++!GCC$ builtin (exp) attributes simd (notinbranch) ++!GCC$ builtin (expf) attributes simd (notinbranch) ++!GCC$ builtin (exp2f) attributes simd (notinbranch) ++!GCC$ builtin (log) attributes simd (notinbranch) ++!GCC$ builtin (logf) attributes simd (notinbranch) ++!GCC$ builtin (pow) attributes simd (notinbranch) ++!GCC$ builtin (powf) attributes simd (notinbranch) +-- +2.33.0 + diff --git a/0021-StructReorderFields-Structure-reorder-fields.patch b/0021-StructReorderFields-Structure-reorder-fields.patch new file mode 100644 index 0000000..8324617 --- /dev/null +++ b/0021-StructReorderFields-Structure-reorder-fields.patch @@ -0,0 +1,5739 @@ +From 6997c9ad8985f6f0bfc16cdb46e7386af299a226 Mon Sep 17 00:00:00 2001 +From: h00564365 +Date: Mon, 31 Jul 2023 22:01:56 +0800 +Subject: [PATCH 21/22] [StructReorderFields] Structure reorder fields + +Introduce structure fields reordering optimization, that change +fields ordering of C-like structures in order to better utilize spatial +locality. +--- + gcc/common.opt | 4 + + gcc/doc/invoke.texi | 1 + + gcc/gimple-ssa-warn-access.cc | 2 +- + gcc/ipa-free-lang-data.cc | 4 +- + gcc/ipa-struct-reorg/escapes.def | 3 + + gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 2545 +++++++++++++---- + gcc/ipa-struct-reorg/ipa-struct-reorg.h | 14 +- + gcc/passes.def | 1 + + gcc/symbol-summary.h | 4 +- + .../struct/rf_DTE_struct_instance_field.c | 75 + + gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c | 94 + + .../gcc.dg/struct/rf_check_ptr_layers_bug.c | 24 + + .../gcc.dg/struct/rf_create_fields_bug.c | 82 + + .../gcc.dg/struct/rf_create_new_func_bug.c | 56 + + .../gcc.dg/struct/rf_ele_minus_verify.c | 60 + + .../gcc.dg/struct/rf_escape_by_base.c | 83 + + .../gcc.dg/struct/rf_external_func_types.c | 69 + + gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c | 72 + + .../gcc.dg/struct/rf_mem_ref_offset.c | 58 + + .../struct/rf_mul_layer_ptr_record_bug.c | 30 + + .../gcc.dg/struct/rf_pass_conflict.c | 109 + + gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c | 87 + + gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c | 71 + + .../gcc.dg/struct/rf_ptr_negate_expr.c | 55 + + gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c | 34 + + gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c | 55 + + gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c | 58 + + .../gcc.dg/struct/rf_rescusive_type.c | 57 + + .../struct/rf_rewrite_assign_more_cmp.c | 65 + + .../gcc.dg/struct/rf_rewrite_cond_bug.c | 72 + + .../gcc.dg/struct/rf_rewrite_cond_more_cmp.c | 58 + + .../gcc.dg/struct/rf_rewrite_phi_bug.c | 81 + + gcc/testsuite/gcc.dg/struct/rf_shwi.c | 23 + + gcc/testsuite/gcc.dg/struct/rf_visible_func.c | 92 + + .../gcc.dg/struct/rf_void_ptr_param_func.c | 54 + + gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 15 +- + gcc/testsuite/gcc.dg/struct/struct_reorg-1.c | 8 +- + gcc/testsuite/gcc.dg/struct/struct_reorg-3.c | 9 +- + gcc/timevar.def | 1 + + gcc/tree-pass.h | 1 + + 40 files changed, 3796 insertions(+), 490 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_external_func_types.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_shwi.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_visible_func.c + create mode 100644 gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index 0c7bd2f6c..98169de7c 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1954,6 +1954,10 @@ fipa-matrix-reorg + Common Ignore + Does nothing. Preserved for backward compatibility. + ++fipa-reorder-fields ++Common Var(flag_ipa_reorder_fields) Init(0) Optimization ++Perform structure fields reorder optimizations. ++ + fipa-struct-reorg + Common Var(flag_ipa_struct_reorg) Init(0) Optimization + Perform structure layout optimizations. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 3485cc8af..2b376e0e9 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -526,6 +526,7 @@ Objective-C and Objective-C++ Dialects}. + -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol + -finline-small-functions -fipa-modref -fipa-cp -fipa-cp-clone @gol + -fipa-bit-cp -fipa-vrp -fipa-pta -fipa-profile -fipa-pure-const @gol ++-fipa-reorder-fields @gol + -fipa-struct-reorg @gol + -fipa-reference -fipa-reference-addressable @gol + -fipa-stack-alignment -fipa-icf -fira-algorithm=@var{algorithm} @gol +diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc +index a24645783..7f5c92c96 100644 +--- a/gcc/gimple-ssa-warn-access.cc ++++ b/gcc/gimple-ssa-warn-access.cc +@@ -2198,7 +2198,7 @@ pass_waccess::gate (function *) + In pass waccess, it will traverse all SSA and cause ICE + when handling these unused SSA. So temporarily disable + pass waccess when enable structure optimizations. */ +- if (flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) + return false; + + return (warn_free_nonheap_object +diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc +index 5450be9fe..a88381ddb 100644 +--- a/gcc/ipa-free-lang-data.cc ++++ b/gcc/ipa-free-lang-data.cc +@@ -105,7 +105,7 @@ fld_simplified_type_name (tree type) + /* Simplify type will cause that struct A and struct A within + struct B are different type pointers, so skip it in structure + optimizations. */ +- if (flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) + return TYPE_NAME (type); + + if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL) +@@ -349,7 +349,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld) + /* Simplify type will cause that struct A and struct A within + struct B are different type pointers, so skip it in structure + optimizations. */ +- if (flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) + return t; + if (POINTER_TYPE_P (t)) + return fld_incomplete_type_of (t, fld); +diff --git a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def +index d825eb3e6..996a09bac 100644 +--- a/gcc/ipa-struct-reorg/escapes.def ++++ b/gcc/ipa-struct-reorg/escapes.def +@@ -58,5 +58,8 @@ DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled + DEF_ESCAPE (escape_return, "Type escapes via a return [not handled yet]") + DEF_ESCAPE (escape_separate_instance, "Type escapes via a separate instance") + DEF_ESCAPE (escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt") ++DEF_ESCAPE (escape_via_orig_escape, "Type escapes via a original escape type") ++DEF_ESCAPE (escape_instance_field, "Type escapes via a field of instance") ++DEF_ESCAPE (escape_via_empty_no_orig, "Type escapes via empty and no original") + + #undef DEF_ESCAPE +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +index 9f790b28b..3e5f9538b 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +@@ -207,50 +207,88 @@ lang_c_p (void) + if (!language_string) + return false; + +- if (strcmp (language_string, "GNU GIMPLE") == 0) ++ if (lang_GNU_C ()) ++ return true; ++ else if (strcmp (language_string, "GNU GIMPLE") == 0) // For LTO check + { + unsigned i = 0; +- tree t = NULL; +- const char *unit_string = NULL; ++ tree t = NULL_TREE; + + FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t) + { +- unit_string = TRANSLATION_UNIT_LANGUAGE (t); +- if (!unit_string +- || (strncmp (unit_string, "GNU C", 5) != 0) +- || (!ISDIGIT (unit_string[5]))) ++ language_string = TRANSLATION_UNIT_LANGUAGE (t); ++ if (language_string == NULL ++ || strncmp (language_string, "GNU C", 5) ++ || (language_string[5] != '\0' ++ && !(ISDIGIT (language_string[5])))) + return false; + } + return true; + } +- else if (strncmp (language_string, "GNU C", 5) == 0 +- && ISDIGIT (language_string[5])) +- return true; +- + return false; + } + ++/* Get the number of pointer layers. */ ++ ++int ++get_ptr_layers (tree expr) ++{ ++ int layers = 0; ++ while (POINTER_TYPE_P (expr) || TREE_CODE (expr) == ARRAY_TYPE) ++ { ++ layers++; ++ expr = TREE_TYPE (expr); ++ } ++ return layers; ++} ++ ++/* Comparison pointer layers. */ ++ ++bool ++cmp_ptr_layers (tree a, tree b) ++{ ++ return get_ptr_layers (a) == get_ptr_layers (b); ++} ++ ++/* Return true if the ssa_name comes from the void* parameter. */ ++ ++bool ++is_from_void_ptr_parm (tree ssa_name) ++{ ++ gcc_assert (TREE_CODE (ssa_name) == SSA_NAME); ++ tree var = SSA_NAME_VAR (ssa_name); ++ return (var && TREE_CODE (var) == PARM_DECL ++ && VOID_POINTER_P (TREE_TYPE (ssa_name))); ++} ++ + enum srmode + { + NORMAL = 0, +- COMPLETE_STRUCT_RELAYOUT ++ COMPLETE_STRUCT_RELAYOUT, ++ STRUCT_REORDER_FIELDS + }; + +-static bool is_result_of_mult (tree, tree *, tree); ++static bool is_result_of_mult (tree arg, tree *num, tree struct_size); ++static bool isptrptr (tree type); + +-} // anon namespace ++srmode current_mode; + ++} // anon namespace + + namespace struct_reorg { + ++hash_map > fields_to_finish; ++ + /* Constructor of srfunction. */ + + srfunction::srfunction (cgraph_node *n) + : node (n), + old (NULL), + newnode (NULL), +- newf (NULL) +-{} ++ newf (NULL), ++ is_safe_func (false) ++{ ++} + + /* Add an ARG to the list of arguments for the function. */ + +@@ -400,12 +438,13 @@ srtype::add_field_site (srfield *field) + + /* Constructor of DECL. */ + +-srdecl::srdecl (srtype *tp, tree decl, int argnum) ++srdecl::srdecl (srtype *tp, tree decl, int argnum, tree orig_type) + : type (tp), + decl (decl), + func (NULL_TREE), + argumentnum (argnum), +- visited (false) ++ visited (false), ++ orig_type (orig_type) + { + if (TREE_CODE (decl) == SSA_NAME) + func = current_function_decl; +@@ -429,17 +468,23 @@ srfunction::find_decl (tree decl) + /* Record DECL of the TYPE with argument num ARG. */ + + srdecl * +-srfunction::record_decl (srtype *type, tree decl, int arg) ++srfunction::record_decl (srtype *type, tree decl, int arg, tree orig_type) + { + // Search for the decl to see if it is already there. + srdecl *decl1 = find_decl (decl); + + if (decl1) +- return decl1; ++ { ++ /* Added the orig_type information. */ ++ if (!decl1->orig_type && orig_type && isptrptr (orig_type)) ++ decl1->orig_type = orig_type; ++ return decl1; ++ } + + gcc_assert (type); + +- decl1 = new srdecl (type, decl, arg); ++ orig_type = isptrptr (TREE_TYPE (decl)) ? TREE_TYPE (decl) : orig_type; ++ decl1 = new srdecl (type, decl, arg, isptrptr (orig_type) ? orig_type : NULL); + decls.safe_push (decl1); + return decl1; + } +@@ -503,31 +548,21 @@ srtype::dump (FILE *f) + print_generic_expr (f, type); + fprintf (f, "(%d) { ", TYPE_UID (type)); + if (escapes != does_not_escape) +- fprintf (f, " escapes = \"%s\"\n", escape_reason ()); +- fprintf (f, " fields = { "); ++ fprintf (f, "escapes = \"%s\"", escape_reason ()); ++ fprintf (f, "\nfields = {\n"); + FOR_EACH_VEC_ELT (fields, i, field) +- { +- if (i == 0) +- fprintf (f, "\n "); +- else +- fprintf (f, "\n, "); +- field->dump (f); +- } +- fprintf (f, " }\n "); +- fprintf (f, "\n accesses = {"); ++ field->dump (f); ++ fprintf (f, "}\n "); ++ ++ fprintf (f, "\naccesses = {\n"); + FOR_EACH_VEC_ELT (accesses, i, access) +- { +- fprintf (f, "\n"); +- access->dump (f); +- } +- fprintf (f, " }\n "); +- fprintf (f, "\n functions = {"); ++ access->dump (f); ++ fprintf (f, "}\n "); ++ ++ fprintf (f, "\nfunctions = {\n"); + FOR_EACH_VEC_ELT (functions, i, fn) +- { +- fprintf (f, " \n"); +- fn->simple_dump (f); +- } +- fprintf (f, "\n }\n"); ++ fn->simple_dump (f); ++ fprintf (f, "}\n"); + fprintf (f, "}\n"); + } + +@@ -537,6 +572,8 @@ void + srtype::simple_dump (FILE *f) + { + print_generic_expr (f, type); ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ fprintf (f, "(%d)", TYPE_UID (type)); + } + + /* Analyze the type and decide what to be done with it. */ +@@ -572,6 +609,12 @@ srfield::create_new_fields (tree newtype[max_split], + tree newfields[max_split], + tree newlast[max_split]) + { ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ create_new_reorder_fields (newtype, newfields, newlast); ++ return; ++ } ++ + tree nt[max_split]; + + for (unsigned i = 0; i < max_split; i++) +@@ -620,6 +663,104 @@ srfield::create_new_fields (tree newtype[max_split], + } + } + ++/* Reorder fields. */ ++ ++void ++srfield::reorder_fields (tree newfields[max_split], tree newlast[max_split], ++ tree &field) ++{ ++ /* Reorder fields in descending. ++ newfields: always stores the first member of the chain ++ and with the largest size. ++ field: indicates the node to be inserted. */ ++ if (newfields[clusternum] == NULL) ++ { ++ newfields[clusternum] = field; ++ newlast[clusternum] = field; ++ } ++ else ++ { ++ tree tmp = newfields[clusternum]; ++ if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (field))) ++ > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tmp)))) ++ { ++ DECL_CHAIN (field) = tmp; ++ newfields[clusternum] = field; ++ } ++ else ++ { ++ while (DECL_CHAIN (tmp) ++ && (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (field))) ++ <= tree_to_uhwi ( ++ TYPE_SIZE (TREE_TYPE (DECL_CHAIN (tmp)))))) ++ tmp = DECL_CHAIN (tmp); ++ ++ /* Now tmp size > field size ++ insert field: tmp -> xx ==> tmp -> field -> xx. */ ++ DECL_CHAIN (field) = DECL_CHAIN (tmp); // field -> xx ++ DECL_CHAIN (tmp) = field; // tmp -> field ++ } ++ } ++} ++ ++/* Create the new reorder fields for this field. ++ newtype[max_split]: srtype's member variable, ++ newfields[max_split]: created by create_new_type func, ++ newlast[max_split]: created by create_new_type func. */ ++ ++void ++srfield::create_new_reorder_fields (tree newtype[max_split], ++ tree newfields[max_split], ++ tree newlast[max_split]) ++{ ++ /* newtype, corresponding to newtype[max_split] in srtype. */ ++ tree nt = NULL_TREE; ++ if (type == NULL) ++ /* Common var. */ ++ nt = fieldtype; ++ else ++ { ++ /* RECORD_TYPE var. */ ++ if (type->has_escaped ()) ++ nt = type->type; ++ else ++ nt = type->newtype[0]; ++ } ++ tree field = make_node (FIELD_DECL); ++ ++ /* Used for recursive types. ++ fields_to_finish: hase_map in the format of "type: {fieldA, fieldB}", ++ key : indicates the original type, ++ vaule: filed that need to be updated to newtype. */ ++ if (nt == NULL) ++ { ++ nt = make_node (RECORD_TYPE); ++ auto_vec &fields ++ = fields_to_finish.get_or_insert (inner_type (type->type)); ++ fields.safe_push (field); ++ } ++ ++ DECL_NAME (field) = DECL_NAME (fielddecl); ++ if (type == NULL) ++ /* Common members do not need to reconstruct. ++ Otherwise, int* -> int** or void* -> void**. */ ++ TREE_TYPE (field) = nt; ++ else ++ TREE_TYPE (field) = reconstruct_complex_type (TREE_TYPE (fielddecl), nt); ++ DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl); ++ SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl)); ++ DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl); ++ TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl); ++ DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl); ++ TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (fielddecl); ++ DECL_CONTEXT (field) = newtype[clusternum]; ++ ++ reorder_fields (newfields, newlast, field); ++ ++ /* srfield member variable, which stores the new field decl. */ ++ newfield[0] = field; ++} ++ + /* Create the new TYPE corresponding to THIS type. */ + + bool +@@ -655,7 +796,8 @@ srtype::create_new_type (void) + /* If the fields' types did have a change or + we are not splitting the struct into two clusters, + then just return false and don't change the type. */ +- if (!createnewtype && maxclusters == 0) ++ if (!createnewtype && maxclusters == 0 ++ && current_mode != STRUCT_REORDER_FIELDS) + { + newtype[0] = type; + return false; +@@ -664,6 +806,7 @@ srtype::create_new_type (void) + /* Should have at most max_split clusters. */ + gcc_assert (maxclusters < max_split); + ++ /* Record the first member of the field chain. */ + tree newfields[max_split]; + tree newlast[max_split]; + +@@ -682,7 +825,8 @@ srtype::create_new_type (void) + sprintf (id, "%d", i); + if (tname) + { +- name = concat (tname, ".reorg.", id, NULL); ++ name = concat (tname, current_mode == STRUCT_REORDER_FIELDS ++ ? ".reorder." : ".reorg.", id, NULL); + TYPE_NAME (newtype[i]) = build_decl (UNKNOWN_LOCATION, + TYPE_DECL, + get_identifier (name), +@@ -718,6 +862,7 @@ srtype::create_new_type (void) + for (unsigned i = 0; i < maxclusters; i++) + { + print_generic_expr (dump_file, newtype[i]); ++ fprintf (dump_file, "(%d)", TYPE_UID (newtype[i])); + fprintf (dump_file, "\n"); + } + } +@@ -776,8 +921,12 @@ srfunction::create_new_decls (void) + tree newinner[max_split]; + memset (newinner, 0, sizeof (newinner)); + for (unsigned j = 0; j < max_split && type->newtype[j]; j++) +- newtype1[j] = reconstruct_complex_type (TREE_TYPE (decls[i]->decl), +- type->newtype[j]); ++ { ++ newtype1[j] = reconstruct_complex_type ( ++ isptrptr (decls[i]->orig_type) ? decls[i]->orig_type ++ : TREE_TYPE (decls[i]->decl), ++ type->newtype[j]); ++ } + if (inner) + { + srdecl *in = find_decl (inner); +@@ -825,7 +974,8 @@ srfunction::create_new_decls (void) + sprintf (id, "%d", j); + if (tname) + { +- name = concat (tname, ".reorg.", id, NULL); ++ name = concat (tname, current_mode == STRUCT_REORDER_FIELDS ++ ? ".reorder." : ".reorg.", id, NULL); + new_name = get_identifier (name); + free (name); + } +@@ -850,7 +1000,6 @@ srfunction::create_new_decls (void) + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Created New decls for decl:\n"); +- fprintf (dump_file, "\n"); + decls[i]->dump (dump_file); + fprintf (dump_file, "\n"); + for (unsigned j = 0; j < max_split && decls[i]->newdecl[j]; j++) +@@ -876,7 +1025,7 @@ srfield::dump (FILE *f) + fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset); + fprintf (f, ", type = "); + print_generic_expr (f, fieldtype); +- fprintf (f, "\n}\n"); ++ fprintf (f, "}\n"); + } + + /* A simplified dump out the field structure to FILE. */ +@@ -908,7 +1057,7 @@ sraccess::dump (FILE *f) + fprintf (f, " in function: %s/%d", node->name (), node->order); + fprintf (f, ", stmt:\n"); + print_gimple_stmt (f, stmt, 0); +- fprintf (f, "\n }\n"); ++ fprintf (f, "}\n"); + } + + /* Dump out the decl structure to FILE. */ +@@ -1023,8 +1172,7 @@ public: + // Constructors + ipa_struct_reorg (void) + : current_function (NULL), +- done_recording (false), +- current_mode (NORMAL) ++ done_recording (false) + {} + + // Fields +@@ -1032,9 +1180,10 @@ public: + auto_vec_del functions; + srglobal globals; + srfunction *current_function; ++ hash_set safe_functions; ++ auto_vec ext_func_types; + + bool done_recording; +- srmode current_mode; + + // Methods + unsigned execute (enum srmode mode); +@@ -1042,6 +1191,7 @@ public: + gimple *stmt = NULL); + + void dump_types (FILE *f); ++ void dump_newtypes (FILE *f); + void dump_types_escaped (FILE *f); + void dump_functions (FILE *f); + void record_accesses (void); +@@ -1049,6 +1199,9 @@ public: + bool walk_field_for_cycles (srtype *); + void prune_escaped_types (void); + void propagate_escape (void); ++ void propagate_escape_via_original (void); ++ void propagate_escape_via_empty_with_no_original (void); ++ void propagate_escape_via_ext_func_types (void); + void analyze_types (void); + void clear_visited (void); + bool create_new_types (void); +@@ -1060,8 +1213,11 @@ public: + srdecl *record_var (tree decl, + escape_type escapes = does_not_escape, + int arg = -1); ++ void record_safe_func_with_void_ptr_parm (void); + srfunction *record_function (cgraph_node *node); + srfunction *find_function (cgraph_node *node); ++ void record_field_type (tree field, srtype *base_srtype); ++ void record_struct_field_types (tree base_type, srtype *base_srtype); + srtype *record_type (tree type); + void process_union (tree type); + srtype *find_type (tree type); +@@ -1072,7 +1228,7 @@ public: + void record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt); + void mark_expr_escape (tree, escape_type, gimple *stmt); + bool handled_allocation_stmt (gimple *stmt); +- tree allocate_size (srtype *t, gimple *stmt); ++ tree allocate_size (srtype *t, srdecl *decl, gimple *stmt); + + void mark_decls_in_as_not_needed (tree fn); + +@@ -1087,21 +1243,23 @@ public: + bool ignore_missing_decl = false); + bool rewrite_lhs_rhs (tree lhs, tree rhs, tree newlhs[max_split], + tree newrhs[max_split]); +- bool get_type_field (tree expr, tree &base, bool &indirect, +- srtype *&type, srfield *&field, +- bool &realpart, bool &imagpart, +- bool &address, bool should_create = false, +- bool can_escape = false); ++ bool get_type_field (tree expr, tree &base, bool &indirect, srtype *&type, ++ srfield *&field, bool &realpart, bool &imagpart, ++ bool &address, bool &escape_from_base, ++ bool should_create = false, bool can_escape = false); + bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t); + + void check_alloc_num (gimple *stmt, srtype *type); ++ void check_definition_assign (srdecl *decl, vec &worklist); ++ void check_definition_call (srdecl *decl, vec &worklist); + void check_definition (srdecl *decl, vec &); + void check_uses (srdecl *decl, vec &); + void check_use (srdecl *decl, gimple *stmt, vec &); +- void check_type_and_push (tree newdecl, srtype *type, ++ void check_type_and_push (tree newdecl, srdecl *decl, + vec &worklist, gimple *stmt); + void check_other_side (srdecl *decl, tree other, gimple *stmt, + vec &worklist); ++ void check_ptr_layers (tree a_expr, tree b_expr, gimple *stmt); + + void find_vars (gimple *stmt); + void find_var (tree expr, gimple *stmt); +@@ -1703,9 +1861,42 @@ ipa_struct_reorg::dump_types (FILE *f) + srtype *type; + FOR_EACH_VEC_ELT (types, i, type) + { ++ fprintf (f, "======= the %dth type: ======\n", i); + type->dump (f); ++ fprintf (f, "\n"); ++ } ++} ++ ++/* Dump all of the created newtypes to file F. */ ++ ++void ++ipa_struct_reorg::dump_newtypes (FILE *f) ++{ ++ unsigned i = 0; ++ srtype *type = NULL; ++ FOR_EACH_VEC_ELT (types, i, type) ++ { ++ if (type->has_escaped ()) ++ continue; ++ fprintf (f, "======= the %dth newtype: ======\n", i); ++ fprintf (f, "type : "); ++ print_generic_expr (f, type->newtype[0]); ++ fprintf (f, "(%d) ", TYPE_UID (type->newtype[0])); ++ fprintf (f, "{ "); ++ fprintf (f, "\nfields = {\n"); ++ ++ for (tree field = TYPE_FIELDS (TYPE_MAIN_VARIANT (type->newtype[0])); ++ field; field = DECL_CHAIN (field)) ++ { ++ fprintf (f, "field (%d) ", DECL_UID (field)); ++ fprintf (f, "{"); ++ fprintf (f, "type = "); ++ print_generic_expr (f, TREE_TYPE (field)); ++ fprintf (f, "}\n"); ++ } ++ fprintf (f, "}\n "); ++ fprintf (f, "\n"); + } +- fprintf (f, "\n"); + } + + /* Dump all of the recorded types to file F. */ +@@ -1803,6 +1994,8 @@ isarraytype (tree type) + static bool + isptrptr (tree type) + { ++ if (type == NULL) ++ return false; + bool firstptr = false; + while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) + { +@@ -1817,154 +2010,740 @@ isptrptr (tree type) + return false; + } + +-/* Return the escape type which corresponds to if +- this is an volatile type, an array type or a pointer +- to a pointer type. */ ++/* Adding node to map and stack. */ + +-static escape_type +-escape_type_volatile_array_or_ptrptr (tree type) ++bool ++add_node (tree node, int layers, hash_map &map, ++ auto_vec &stack) + { +- if (isvolatile_type (type)) +- return escape_volatile; +- if (isarraytype (type)) +- return escape_array; +- if (isptrptr (type)) +- return escape_ptr_ptr; +- return does_not_escape; ++ if (TREE_CODE (node) != SSA_NAME) ++ return false; ++ if (map.get (node) == NULL) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, " "); ++ fprintf (dump_file, "add node: \t\t"); ++ print_generic_expr (dump_file, node); ++ fprintf (dump_file, ",\t\tptr layers: %d: \n", layers); ++ } ++ map.put (node, layers); ++ stack.safe_push (node); ++ } ++ else if (*map.get (node) != layers) ++ return false; ++ return true; + } + +-/* Record TYPE if not already recorded. */ ++/* Check the number of pointer layers of the gimple phi in definition. */ + +-srtype * +-ipa_struct_reorg::record_type (tree type) ++bool ++check_def_phi (tree def_node, hash_map &ptr_layers) + { +- unsigned typeuid; +- +- /* Get the main variant as we are going +- to record that type only. */ +- type = TYPE_MAIN_VARIANT (type); +- typeuid = TYPE_UID (type); ++ bool res = true; ++ gimple *def_stmt = SSA_NAME_DEF_STMT (def_node); ++ for (unsigned j = 0; j < gimple_phi_num_args (def_stmt); j++) ++ { ++ tree phi_node = gimple_phi_arg_def (def_stmt, j); ++ if (integer_zerop (phi_node)) ++ continue; ++ if (ptr_layers.get (phi_node) == NULL) ++ return false; ++ res &= *ptr_layers.get (def_node) == *ptr_layers.get (phi_node); ++ } ++ return res; ++} + +- srtype *type1; ++/* Check the number of pointer layers of the gimple assign in definition. */ + +- type1 = find_type (type); +- if (type1) +- return type1; ++bool ++check_def_assign (tree def_node, hash_map &ptr_layers) ++{ ++ bool res = true; ++ gimple *def_stmt = SSA_NAME_DEF_STMT (def_node); ++ gimple_rhs_class rhs_class = gimple_assign_rhs_class (def_stmt); ++ tree_code rhs_code = gimple_assign_rhs_code (def_stmt); ++ tree rhs1 = gimple_assign_rhs1 (def_stmt); ++ tree rhs1_base = TREE_CODE (rhs1) == MEM_REF ? TREE_OPERAND (rhs1, 0) : rhs1; ++ if (ptr_layers.get (rhs1_base) == NULL) ++ return false; ++ if (rhs_class == GIMPLE_SINGLE_RHS || rhs_class == GIMPLE_UNARY_RHS) ++ { ++ if (TREE_CODE (rhs1) == SSA_NAME) ++ res = *ptr_layers.get (def_node) == *ptr_layers.get (rhs1); ++ else if (TREE_CODE (rhs1) == MEM_REF) ++ res = *ptr_layers.get (def_node) ++ == *ptr_layers.get (TREE_OPERAND (rhs1, 0)); ++ else ++ { ++ return false; ++ } ++ } ++ else if (rhs_class == GIMPLE_BINARY_RHS) ++ { ++ if (rhs_code == POINTER_PLUS_EXPR) ++ res = *ptr_layers.get (def_node) == *ptr_layers.get (rhs1); ++ else if (rhs_code == BIT_AND_EXPR) ++ res = *ptr_layers.get (def_node) == *ptr_layers.get (rhs1); ++ else ++ return false; ++ } ++ else ++ return false; ++ return res; ++} + +- /* If already done recording just return NULL. */ +- if (done_recording) +- return NULL; ++/* Check node definition. */ + ++bool ++check_node_def (hash_map &ptr_layers) ++{ ++ bool res = true; + if (dump_file && (dump_flags & TDF_DETAILS)) +- fprintf (dump_file, "Recording new type: %u.\n", typeuid); +- +- type1 = new srtype (type); +- types.safe_push (type1); +- +- /* If the type has an user alignment set, +- that means the user most likely already setup the type. */ +- if (TYPE_USER_ALIGN (type)) +- type1->mark_escape (escape_user_alignment, NULL); +- +- for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ fprintf (dump_file, "\n======== check node definition ========\n"); ++ for (unsigned i = 1; i < num_ssa_names; ++i) + { +- if (TREE_CODE (field) == FIELD_DECL) ++ tree name = ssa_name (i); ++ if (name && ptr_layers.get (name) != NULL) + { +- tree t = TREE_TYPE (field); +- process_union (t); +- if (TREE_CODE (inner_type (t)) == UNION_TYPE +- || TREE_CODE (inner_type (t)) == QUAL_UNION_TYPE) +- type1->mark_escape (escape_union, NULL); +- if (isvolatile_type (t)) +- type1->mark_escape (escape_volatile, NULL); +- escape_type e = escape_type_volatile_array_or_ptrptr (t); +- if (e != does_not_escape) +- type1->mark_escape (e, NULL); +- if (handled_type (t)) +- { +- srtype *t1 = record_type (inner_type (t)); +- srfield *f = type1->find_field (int_byte_position (field)); +- /* We might have an variable sized type which +- we don't set the handle. */ +- if (f) +- { +- f->type = t1; +- t1->add_field_site (f); +- } +- if (t1 == type1 && current_mode != COMPLETE_STRUCT_RELAYOUT) +- type1->mark_escape (escape_rescusive_type, NULL); +- } ++ gimple *def_stmt = SSA_NAME_DEF_STMT (name); ++ if (dump_file && (dump_flags & TDF_DETAILS) ++ && gimple_code (def_stmt) != GIMPLE_DEBUG) ++ print_gimple_stmt (dump_file, def_stmt, 0); ++ ++ if (gimple_code (def_stmt) == GIMPLE_PHI) ++ res = check_def_phi (name, ptr_layers); ++ else if (gimple_code (def_stmt) == GIMPLE_ASSIGN) ++ res = check_def_assign (name, ptr_layers); ++ else if (gimple_code (def_stmt) == GIMPLE_NOP) ++ continue; ++ else ++ return false; + } + } ++ return res; ++} + +- return type1; ++/* Check pointer usage. */ ++ ++bool ++check_record_ptr_usage (gimple *use_stmt, tree ¤t_node, ++ hash_map &ptr_layers, ++ auto_vec &ssa_name_stack) ++{ ++ gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt); ++ tree rhs1 = gimple_assign_rhs1 (use_stmt); ++ tree lhs = gimple_assign_lhs (use_stmt); ++ if (rhs_class != GIMPLE_SINGLE_RHS ++ || (TREE_CODE (rhs1) != COMPONENT_REF && TREE_CODE (rhs1) != SSA_NAME) ++ || (TREE_CODE (lhs) != MEM_REF && TREE_CODE (lhs) != SSA_NAME)) ++ return false; ++ ++ bool res = true; ++ /* MEM[(long int *)a_1] = _1; (record). ++ If lhs is ssa_name, lhs cannot be the current node. ++ _2 = _1->flow; (No record). */ ++ if (TREE_CODE (rhs1) == SSA_NAME) ++ { ++ tree tmp = (rhs1 != current_node) ? rhs1 : lhs; ++ if (TREE_CODE (tmp) == MEM_REF) ++ res = add_node (TREE_OPERAND (tmp, 0), ++ *ptr_layers.get (current_node) + 1, ++ ptr_layers, ssa_name_stack); ++ else ++ res = add_node (tmp, *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); ++ } ++ else if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == COMPONENT_REF) ++ res = !(POINTER_TYPE_P (TREE_TYPE (rhs1))); ++ else ++ res = false; ++ return res; + } + +-/* Mark TYPE as escaping with ESCAPES as the reason. */ ++/* Check and record a single node. */ + +-void +-ipa_struct_reorg::mark_type_as_escape (tree type, +- escape_type escapes, +- gimple *stmt) ++bool ++check_record_single_node (gimple *use_stmt, tree ¤t_node, ++ hash_map &ptr_layers, ++ auto_vec &ssa_name_stack) + { +- if (handled_type (type)) +- { +- srtype *stype = record_type (inner_type (type)); ++ gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt); ++ tree rhs1 = gimple_assign_rhs1 (use_stmt); ++ tree lhs = gimple_assign_lhs (use_stmt); ++ gcc_assert (rhs_class == GIMPLE_SINGLE_RHS || rhs_class == GIMPLE_UNARY_RHS); + +- if (!stype) +- return; ++ if ((TREE_CODE (rhs1) != SSA_NAME && TREE_CODE (rhs1) != MEM_REF) ++ || (TREE_CODE (lhs) != SSA_NAME && TREE_CODE (lhs) != MEM_REF)) ++ return false; + +- stype->mark_escape (escapes, stmt); ++ bool res = true; ++ if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == MEM_REF) ++ /* Add such as: _2 = MEM[(struct arc_t * *)_1]. */ ++ res = add_node (lhs, *ptr_layers.get (current_node) - 1, ++ ptr_layers, ssa_name_stack); ++ else if (TREE_CODE (lhs) == MEM_REF && TREE_CODE (rhs1) == SSA_NAME) ++ { ++ /* Add such as: MEM[(long int *)a_1] = _1. */ ++ if (rhs1 == current_node) ++ res = add_node (TREE_OPERAND (lhs, 0), ++ *ptr_layers.get (current_node) + 1, ++ ptr_layers, ssa_name_stack); ++ else ++ res = add_node (rhs1, *ptr_layers.get (current_node) - 1, ++ ptr_layers, ssa_name_stack); + } ++ else if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == SSA_NAME) ++ res = add_node (lhs, *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); ++ else ++ res = false; ++ ++ return res; + } + +-/* Maybe process the union of type TYPE, such that marking all of the fields' +- types as being escaping. */ ++/* Check and record multiple nodes. */ + +-void +-ipa_struct_reorg::process_union (tree type) ++bool ++check_record_mult_node (gimple *use_stmt, tree ¤t_node, ++ hash_map &ptr_layers, ++ auto_vec &ssa_name_stack) + { +- static hash_set unions_recorded; ++ gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt); ++ tree_code rhs_code = gimple_assign_rhs_code (use_stmt); ++ tree rhs1 = gimple_assign_rhs1 (use_stmt); ++ tree lhs = gimple_assign_lhs (use_stmt); ++ tree rhs2 = gimple_assign_rhs2 (use_stmt); ++ gcc_assert (rhs_class == GIMPLE_BINARY_RHS); ++ ++ if ((rhs_code != POINTER_PLUS_EXPR && rhs_code != POINTER_DIFF_EXPR ++ && rhs_code != BIT_AND_EXPR) ++ || (TREE_CODE (lhs) != SSA_NAME && TREE_CODE (rhs1) != SSA_NAME)) ++ return false; + +- type = inner_type (type); +- if (TREE_CODE (type) != UNION_TYPE +- && TREE_CODE (type) != QUAL_UNION_TYPE) +- return; ++ bool res = true; ++ if (rhs_code == POINTER_PLUS_EXPR) ++ res = add_node (lhs == current_node ? rhs1 : lhs, ++ *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); ++ else if (rhs_code == POINTER_DIFF_EXPR) ++ res = add_node (rhs1 != current_node ? rhs1 : rhs2, ++ *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); ++ else if (rhs_code == BIT_AND_EXPR) ++ { ++ if (TREE_CODE (rhs2) != INTEGER_CST) ++ return false; ++ res = add_node (lhs == current_node ? rhs1 : lhs, ++ *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); ++ } ++ return res; ++} + +- type = TYPE_MAIN_VARIANT (type); ++/* Check whether gimple assign is correctly used and record node. */ + +- /* We already processed this type. */ +- if (unions_recorded.add (type)) +- return; ++bool ++check_record_assign (tree ¤t_node, gimple *use_stmt, ++ hash_map &ptr_layers, ++ auto_vec &ssa_name_stack) ++{ ++ gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt); ++ if (*ptr_layers.get (current_node) == 1) ++ return check_record_ptr_usage (use_stmt, current_node, ++ ptr_layers, ssa_name_stack); ++ else if (*ptr_layers.get (current_node) > 1) ++ { ++ if (rhs_class != GIMPLE_BINARY_RHS ++ && rhs_class != GIMPLE_UNARY_RHS ++ && rhs_class != GIMPLE_SINGLE_RHS) ++ return false; + +- for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ if (rhs_class == GIMPLE_SINGLE_RHS || rhs_class == GIMPLE_UNARY_RHS) ++ return check_record_single_node (use_stmt, current_node, ++ ptr_layers, ssa_name_stack); ++ else if (rhs_class == GIMPLE_BINARY_RHS) ++ return check_record_mult_node (use_stmt, current_node, ++ ptr_layers, ssa_name_stack); ++ } ++ else ++ return false; ++ ++ return true; ++} ++ ++/* Check whether gimple phi is correctly used and record node. */ ++ ++bool ++check_record_phi (tree ¤t_node, gimple *use_stmt, ++ hash_map &ptr_layers, ++ auto_vec &ssa_name_stack) ++{ ++ bool res = true; ++ res &= add_node (gimple_phi_result (use_stmt), *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); ++ ++ for (unsigned i = 0; i < gimple_phi_num_args (use_stmt); i++) + { +- if (TREE_CODE (field) == FIELD_DECL) +- { +- mark_type_as_escape (TREE_TYPE (field), escape_union); +- process_union (TREE_TYPE (field)); +- } ++ if (integer_zerop (gimple_phi_arg_def (use_stmt, i))) ++ continue; ++ res &= add_node (gimple_phi_arg_def (use_stmt, i), ++ *ptr_layers.get (current_node), ++ ptr_layers, ssa_name_stack); + } ++ return res; + } + +-/* Used by record_var function as a callback to walk_tree. +- Mark the type as escaping if it has expressions which +- cannot be converted for global initializations. */ ++/* Check the use of callee. */ + +-static tree +-record_init_types (tree *tp, int *walk_subtrees, void *data) ++bool ++check_callee (cgraph_node *node, gimple *stmt, ++ hash_map &ptr_layers, int input_layers) + { +- ipa_struct_reorg *c = (ipa_struct_reorg *)data; +- switch (TREE_CODE (*tp)) ++ /* caller main () ++ { spec_qsort.constprop (_649, _651); } ++ def spec_qsort.constprop (void * a, size_t n) ++ { spec_qsort.constprop (a_1, _139); } */ ++ /* In safe functions, only call itself is allowed. */ ++ if (node->get_edge (stmt)->callee != node) ++ return false; ++ tree input_node = gimple_call_arg (stmt, 0); ++ if (ptr_layers.get (input_node) == NULL ++ || *ptr_layers.get (input_node) != input_layers) ++ return false; ++ if (SSA_NAME_VAR (input_node) != DECL_ARGUMENTS (node->decl)) ++ return false; ++ ++ for (unsigned i = 1; i < gimple_call_num_args (stmt); i++) + { +- CASE_CONVERT: +- case COMPONENT_REF: +- case VIEW_CONVERT_EXPR: +- case ARRAY_REF: +- { +- tree typeouter = TREE_TYPE (*tp); +- tree typeinner = TREE_TYPE (TREE_OPERAND (*tp, 0)); +- c->mark_type_as_escape (typeouter, escape_via_global_init); ++ if (ptr_layers.get (gimple_call_arg (stmt, i)) != NULL) ++ return false; ++ } ++ return true; ++} ++ ++/* Check the usage of input nodes and related nodes. */ ++ ++bool ++check_node_use (cgraph_node *node, tree current_node, ++ hash_map &ptr_layers, ++ auto_vec &ssa_name_stack, ++ int input_layers) ++{ ++ imm_use_iterator imm_iter; ++ gimple *use_stmt = NULL; ++ bool res = true; ++ /* Use FOR_EACH_IMM_USE_STMT as an indirect edge ++ to search for possible related nodes and push to stack. */ ++ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, current_node) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS) ++ && gimple_code (use_stmt) != GIMPLE_DEBUG) ++ { ++ fprintf (dump_file, "%*s", 4, ""); ++ print_gimple_stmt (dump_file, use_stmt, 0); ++ } ++ /* For other types of gimple, do not record the node. */ ++ if (res) ++ { ++ if (gimple_code (use_stmt) == GIMPLE_PHI) ++ res = check_record_phi (current_node, use_stmt, ++ ptr_layers, ssa_name_stack); ++ else if (gimple_code (use_stmt) == GIMPLE_ASSIGN) ++ res = check_record_assign (current_node, use_stmt, ++ ptr_layers, ssa_name_stack); ++ else if (gimple_code (use_stmt) == GIMPLE_CALL) ++ res = check_callee (node, use_stmt, ptr_layers, input_layers); ++ else if (gimple_code (use_stmt) == GIMPLE_RETURN) ++ res = false; ++ } ++ } ++ return res; ++} ++ ++/* Trace the pointer layers of void node. */ ++ ++bool ++get_void_node_ptr_layers (tree input, int &input_layers) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "input type is void* node\n"); ++ imm_use_iterator imm_iter; ++ gimple *use_stmt = NULL; ++ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, input) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ print_gimple_stmt (dump_file, use_stmt, 0); ++ if (gimple_code (use_stmt) == GIMPLE_ASSIGN ++ && gimple_assign_rhs_class (use_stmt) == GIMPLE_SINGLE_RHS) ++ { ++ tree rhs1 = gimple_assign_rhs1 (use_stmt); ++ tree lhs = gimple_assign_lhs (use_stmt); ++ if (TREE_CODE (lhs) == SSA_NAME && handled_type (TREE_TYPE (lhs))) ++ { ++ if (TREE_CODE (rhs1) == MEM_REF) ++ { ++ input_layers = get_ptr_layers (TREE_TYPE (lhs)) + 1; ++ return true; ++ } ++ } ++ } ++ } ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "end trace pointer layers of void* node\n"); ++ return false; ++} ++ ++/* Preparing the First Node for DFS. */ ++ ++bool ++set_init_node (cgraph_node *node, cgraph_edge *caller, ++ hash_map &ptr_layers, ++ auto_vec &ssa_name_stack, int &input_layers) ++{ ++ /* Set input_layer ++ caller spec_qsort.constprop (_649, _651) ++ |-- Obtains the actual ptr layer ++ from the input node. */ ++ caller->caller->get_untransformed_body (); ++ if (caller->call_stmt == NULL ++ || gimple_call_num_args (caller->call_stmt) == 0) ++ return false; ++ tree input = gimple_call_arg (caller->call_stmt, 0); ++ if (!(POINTER_TYPE_P (TREE_TYPE (input)) ++ || TREE_CODE (TREE_TYPE (input)) == ARRAY_TYPE)) ++ return false; ++ if (handled_type (TREE_TYPE (input))) ++ input_layers = get_ptr_layers (TREE_TYPE (input)); ++ else ++ { ++ if (VOID_POINTER_P (TREE_TYPE (input))) ++ { ++ if (!get_void_node_ptr_layers (input, input_layers)) ++ return false; ++ } ++ } ++ ++ /* Set initial node ++ def spec_qsort.constprop (void * a, size_t n) ++ |-- Find the initial ssa_name ++ from the parameter node. */ ++ tree parm = DECL_ARGUMENTS (node->decl); ++ for (unsigned j = 1; j < num_ssa_names; ++j) ++ { ++ tree name = ssa_name (j); ++ if (!name || has_zero_uses (name) || virtual_operand_p (name)) ++ continue; ++ if (SSA_NAME_VAR (name) == parm ++ && gimple_code (SSA_NAME_DEF_STMT (name)) == GIMPLE_NOP) ++ { ++ if (!add_node (name, input_layers, ptr_layers, ssa_name_stack)) ++ return false; ++ } ++ } ++ return !ssa_name_stack.is_empty (); ++} ++ ++/* Check the usage of each call. */ ++ ++bool ++check_each_call (cgraph_node *node, cgraph_edge *caller) ++{ ++ hash_map ptr_layers; ++ auto_vec ssa_name_stack; ++ int input_layers = 0; ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "======== check each call : %s/%u ========\n", ++ node->name (), node->order); ++ if (!set_init_node (node, caller, ptr_layers, ssa_name_stack, input_layers)) ++ return false; ++ int i = 0; ++ while (!ssa_name_stack.is_empty ()) ++ { ++ tree current_node = ssa_name_stack.pop (); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\ncur node %d: \t", i++); ++ print_generic_expr (dump_file, current_node); ++ fprintf (dump_file, ",\t\tptr layers: %d: \n", ++ *ptr_layers.get (current_node)); ++ } ++ if (get_ptr_layers (TREE_TYPE (current_node)) ++ > *ptr_layers.get (current_node)) ++ return false; ++ if (!check_node_use (node, current_node, ptr_layers, ssa_name_stack, ++ input_layers)) ++ return false; ++ } ++ ++ if (!check_node_def (ptr_layers)) ++ return false; ++ return true; ++} ++ ++/* Filter out function: void func (void*, int n), ++ and the function has no static variable, no structure-related variable, ++ and no global variable is used. */ ++ ++bool ++filter_func (cgraph_node *node) ++{ ++ tree parm = DECL_ARGUMENTS (node->decl); ++ if (!(parm && VOID_POINTER_P (TREE_TYPE (parm)) ++ && VOID_TYPE_P (TREE_TYPE (TREE_TYPE (node->decl))))) ++ return false; ++ ++ for (parm = DECL_CHAIN (parm); parm; parm = DECL_CHAIN (parm)) ++ { ++ if (TREE_CODE (TREE_TYPE (parm)) != INTEGER_TYPE) ++ return false; ++ } ++ ++ if (DECL_STRUCT_FUNCTION (node->decl)->static_chain_decl) ++ return false; ++ ++ tree var = NULL_TREE; ++ unsigned int i = 0; ++ bool res = true; ++ FOR_EACH_LOCAL_DECL (cfun, i, var) ++ { ++ if (TREE_CODE (var) == VAR_DECL && handled_type (TREE_TYPE (var))) ++ res = false; ++ } ++ if (!res) ++ return false; ++ ++ for (unsigned j = 1; j < num_ssa_names; ++j) ++ { ++ tree name = ssa_name (j); ++ if (!name || has_zero_uses (name) || virtual_operand_p (name)) ++ continue; ++ tree var = SSA_NAME_VAR (name); ++ if (var && TREE_CODE (var) == VAR_DECL && is_global_var (var)) ++ return false; ++ } ++ return true; ++} ++ ++/* Check whether the function with the void* parameter and uses the input node ++ safely. ++ In these functions only component_ref can be used to dereference the last ++ layer of the input structure pointer. The hack operation pointer offset ++ after type cast cannot be used. ++*/ ++ ++bool ++is_safe_func_with_void_ptr_parm (cgraph_node *node) ++{ ++ if (!filter_func (node)) ++ return false; ++ ++ /* Distinguish Recursive Callers ++ normal_callers: main () ++ { spec_qsort.constprop (_649, _651); } ++ definition: spec_qsort.constprop (void * a, size_t n) ++ recursive_callers: { spec_qsort.constprop (a_1, _139); } */ ++ auto_vec callers = node->collect_callers (); ++ auto_vec normal_callers; ++ for (unsigned i = 0; i < callers.length (); i++) ++ { ++ if (callers[i]->caller != node) ++ normal_callers.safe_push (callers[i]); ++ } ++ if (normal_callers.length () == 0) ++ return false; ++ ++ for (unsigned i = 0; i < normal_callers.length (); i++) ++ { ++ if (!check_each_call (node, normal_callers[i])) ++ return false; ++ } ++ return true; ++} ++ ++/* Return the escape type which corresponds to if ++ this is an volatile type, an array type or a pointer ++ to a pointer type. */ ++ ++static escape_type ++escape_type_volatile_array_or_ptrptr (tree type) ++{ ++ if (isvolatile_type (type)) ++ return escape_volatile; ++ if (isarraytype (type)) ++ return escape_array; ++ if (isptrptr (type) && (current_mode != STRUCT_REORDER_FIELDS)) ++ return escape_ptr_ptr; ++ return does_not_escape; ++} ++ ++/* Record field type. */ ++ ++void ++ipa_struct_reorg::record_field_type (tree field, srtype *base_srtype) ++{ ++ tree field_type = TREE_TYPE (field); ++ /* The uid of the type in the structure is different ++ from that outside the structure. */ ++ srtype *field_srtype = record_type (inner_type (field_type)); ++ srfield *field_srfield = base_srtype->find_field (int_byte_position (field)); ++ /* We might have an variable sized type which we don't set the handle. */ ++ if (field_srfield) ++ { ++ field_srfield->type = field_srtype; ++ field_srtype->add_field_site (field_srfield); ++ } ++ if (field_srtype == base_srtype && current_mode != COMPLETE_STRUCT_RELAYOUT ++ && current_mode != STRUCT_REORDER_FIELDS) ++ base_srtype->mark_escape (escape_rescusive_type, NULL); ++ /* Types of non-pointer field are difficult to track the correctness ++ of the rewrite when it used by the escaped type. */ ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && TREE_CODE (field_type) == RECORD_TYPE) ++ field_srtype->mark_escape (escape_instance_field, NULL); ++} ++ ++/* Record structure all field types. */ ++ ++void ++ipa_struct_reorg::record_struct_field_types (tree base_type, ++ srtype *base_srtype) ++{ ++ for (tree field = TYPE_FIELDS (base_type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ tree field_type = TREE_TYPE (field); ++ process_union (field_type); ++ if (TREE_CODE (inner_type (field_type)) == UNION_TYPE ++ || TREE_CODE (inner_type (field_type)) == QUAL_UNION_TYPE) ++ base_srtype->mark_escape (escape_union, NULL); ++ if (isvolatile_type (field_type)) ++ base_srtype->mark_escape (escape_volatile, NULL); ++ escape_type e = escape_type_volatile_array_or_ptrptr (field_type); ++ if (e != does_not_escape) ++ base_srtype->mark_escape (e, NULL); ++ /* Types of non-pointer field are difficult to track the correctness ++ of the rewrite when it used by the escaped type. */ ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && TREE_CODE (field_type) == RECORD_TYPE) ++ base_srtype->mark_escape (escape_instance_field, NULL); ++ if (handled_type (field_type)) ++ record_field_type (field, base_srtype); ++ } ++ } ++} ++ ++/* Record TYPE if not already recorded. */ ++ ++srtype * ++ipa_struct_reorg::record_type (tree type) ++{ ++ unsigned typeuid; ++ ++ /* Get the main variant as we are going ++ to record that type only. */ ++ type = TYPE_MAIN_VARIANT (type); ++ typeuid = TYPE_UID (type); ++ ++ srtype *type1; ++ ++ type1 = find_type (type); ++ if (type1) ++ return type1; ++ ++ /* If already done recording just return NULL. */ ++ if (done_recording) ++ return NULL; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "Recording new type: %u.\n", typeuid); ++ const char *type_name = get_type_name (type); ++ if (type_name == NULL) ++ fprintf (dump_file, "Recording new type NULL name\n"); ++ else ++ fprintf (dump_file, "Recording new type name: %s.\n", type_name); ++ } ++ ++ type1 = new srtype (type); ++ types.safe_push (type1); ++ ++ /* If the type has an user alignment set, ++ that means the user most likely already setup the type. */ ++ if (TYPE_USER_ALIGN (type)) ++ type1->mark_escape (escape_user_alignment, NULL); ++ ++ record_struct_field_types (type, type1); ++ ++ return type1; ++} ++ ++/* Mark TYPE as escaping with ESCAPES as the reason. */ ++ ++void ++ipa_struct_reorg::mark_type_as_escape (tree type, ++ escape_type escapes, ++ gimple *stmt) ++{ ++ if (handled_type (type)) ++ { ++ srtype *stype = record_type (inner_type (type)); ++ ++ if (!stype) ++ return; ++ ++ stype->mark_escape (escapes, stmt); ++ } ++} ++ ++/* Maybe process the union of type TYPE, such that marking all of the fields' ++ types as being escaping. */ ++ ++void ++ipa_struct_reorg::process_union (tree type) ++{ ++ static hash_set unions_recorded; ++ ++ type = inner_type (type); ++ if (TREE_CODE (type) != UNION_TYPE ++ && TREE_CODE (type) != QUAL_UNION_TYPE) ++ return; ++ ++ type = TYPE_MAIN_VARIANT (type); ++ ++ /* We already processed this type. */ ++ if (unions_recorded.add (type)) ++ return; ++ ++ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) ++ { ++ if (TREE_CODE (field) == FIELD_DECL) ++ { ++ mark_type_as_escape (TREE_TYPE (field), escape_union); ++ process_union (TREE_TYPE (field)); ++ } ++ } ++} ++ ++/* Used by record_var function as a callback to walk_tree. ++ Mark the type as escaping if it has expressions which ++ cannot be converted for global initializations. */ ++ ++static tree ++record_init_types (tree *tp, int *walk_subtrees, void *data) ++{ ++ ipa_struct_reorg *c = (ipa_struct_reorg *)data; ++ switch (TREE_CODE (*tp)) ++ { ++ CASE_CONVERT: ++ case COMPONENT_REF: ++ case VIEW_CONVERT_EXPR: ++ case ARRAY_REF: ++ { ++ tree typeouter = TREE_TYPE (*tp); ++ tree typeinner = TREE_TYPE (TREE_OPERAND (*tp, 0)); ++ c->mark_type_as_escape (typeouter, escape_via_global_init); + c->mark_type_as_escape (typeinner, escape_via_global_init); + break; + } +@@ -1996,6 +2775,8 @@ ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) + + process_union (TREE_TYPE (decl)); + ++ /* Only the structure type RECORD_TYPE is recorded. ++ Therefore, the void* type is filtered out. */ + if (handled_type (TREE_TYPE (decl))) + { + type = record_type (inner_type (TREE_TYPE (decl))); +@@ -2035,7 +2816,8 @@ ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) + + /* Separate instance is hard to trace in complete struct + relayout optimization. */ +- if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ if ((current_mode == COMPLETE_STRUCT_RELAYOUT ++ || current_mode == STRUCT_REORDER_FIELDS) + && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE) + e = escape_separate_instance; + +@@ -2078,11 +2860,9 @@ ipa_struct_reorg::find_var (tree expr, gimple *stmt) + { + tree r = TREE_OPERAND (expr, 0); + tree orig_type = TREE_TYPE (expr); +- if (handled_component_p (r) +- || TREE_CODE (r) == MEM_REF) ++ if (handled_component_p (r) || TREE_CODE (r) == MEM_REF) + { +- while (handled_component_p (r) +- || TREE_CODE (r) == MEM_REF) ++ while (handled_component_p (r) || TREE_CODE (r) == MEM_REF) + { + if (TREE_CODE (r) == VIEW_CONVERT_EXPR) + { +@@ -2114,8 +2894,10 @@ ipa_struct_reorg::find_var (tree expr, gimple *stmt) + srtype *type; + srfield *field; + bool realpart, imagpart, address; ++ bool escape_from_base = false; ++ /* The should_create flag is true, the declaration can be recorded. */ + get_type_field (expr, base, indirect, type, field, +- realpart, imagpart, address, true, true); ++ realpart, imagpart, address, escape_from_base, true, true); + } + + void +@@ -2132,36 +2914,79 @@ ipa_struct_reorg::find_vars (gimple *stmt) + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); + find_var (gimple_assign_lhs (stmt), stmt); ++ /* _2 = MEM[(struct arc_t * *)_1]; ++ records the right value _1 declaration. */ + find_var (gimple_assign_rhs1 (stmt), stmt); +- if (TREE_CODE (lhs) == SSA_NAME ++ ++ /* Add a safe func mechanism. */ ++ bool l_find = true; ++ bool r_find = true; ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ l_find = !(current_function->is_safe_func ++ && TREE_CODE (lhs) == SSA_NAME ++ && is_from_void_ptr_parm (lhs)); ++ r_find = !(current_function->is_safe_func ++ && TREE_CODE (rhs) == SSA_NAME ++ && is_from_void_ptr_parm (rhs)); ++ } ++ ++ if ((TREE_CODE (lhs) == SSA_NAME) + && VOID_POINTER_P (TREE_TYPE (lhs)) +- && handled_type (TREE_TYPE (rhs))) ++ && handled_type (TREE_TYPE (rhs)) && l_find) + { + srtype *t = find_type (inner_type (TREE_TYPE (rhs))); + srdecl *d = find_decl (lhs); + if (!d && t) + { +- current_function->record_decl (t, lhs, -1); ++ current_function->record_decl (t, lhs, -1, ++ isptrptr (TREE_TYPE (rhs)) ? TREE_TYPE (rhs) : NULL); + tree var = SSA_NAME_VAR (lhs); + if (var && VOID_POINTER_P (TREE_TYPE (var))) +- current_function->record_decl (t, var, -1); ++ current_function->record_decl (t, var, -1, ++ isptrptr (TREE_TYPE (rhs)) ? TREE_TYPE (rhs) : NULL); + } + } ++ /* Find void ssa_name such as: ++ void * _1; struct arc * _2; ++ _2 = _1 + _3; _1 = calloc (100, 40). */ + if (TREE_CODE (rhs) == SSA_NAME + && VOID_POINTER_P (TREE_TYPE (rhs)) +- && handled_type (TREE_TYPE (lhs))) ++ && handled_type (TREE_TYPE (lhs)) && r_find) + { + srtype *t = find_type (inner_type (TREE_TYPE (lhs))); + srdecl *d = find_decl (rhs); + if (!d && t) + { +- current_function->record_decl (t, rhs, -1); ++ current_function->record_decl (t, rhs, -1, ++ isptrptr (TREE_TYPE (lhs)) ? TREE_TYPE (lhs) : NULL); + tree var = SSA_NAME_VAR (rhs); + if (var && VOID_POINTER_P (TREE_TYPE (var))) +- current_function->record_decl (t, var, -1); ++ current_function->record_decl (t, var, -1, ++ isptrptr (TREE_TYPE (lhs)) ? TREE_TYPE (lhs) : NULL); + } + } + } ++ else if ((current_mode == STRUCT_REORDER_FIELDS) ++ && (gimple_assign_rhs_code (stmt) == LE_EXPR ++ || gimple_assign_rhs_code (stmt) == LT_EXPR ++ || gimple_assign_rhs_code (stmt) == GE_EXPR ++ || gimple_assign_rhs_code (stmt) == GT_EXPR)) ++ { ++ find_var (gimple_assign_lhs (stmt), stmt); ++ find_var (gimple_assign_rhs1 (stmt), stmt); ++ find_var (gimple_assign_rhs2 (stmt), stmt); ++ } ++ /* Find void ssa_name from stmt such as: _2 = _1 - old_arcs_1. */ ++ else if ((current_mode == STRUCT_REORDER_FIELDS) ++ && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR ++ && types_compatible_p ( ++ TYPE_MAIN_VARIANT (TREE_TYPE (gimple_assign_rhs1 (stmt))), ++ TYPE_MAIN_VARIANT (TREE_TYPE (gimple_assign_rhs2 (stmt))))) ++ { ++ find_var (gimple_assign_rhs1 (stmt), stmt); ++ find_var (gimple_assign_rhs2 (stmt), stmt); ++ } + else + { + /* Because we won't handle these stmts in rewrite phase, +@@ -2232,27 +3057,134 @@ ipa_struct_reorg::find_vars (gimple *stmt) + } + } + +-/* Maybe record access of statement for further analaysis. */ ++/* Maybe record access of statement for further analaysis. */ ++ ++void ++ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt) ++{ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_ASSIGN: ++ maybe_record_assign (node, as_a (stmt)); ++ break; ++ case GIMPLE_CALL: ++ maybe_record_call (node, as_a (stmt)); ++ break; ++ case GIMPLE_DEBUG: ++ break; ++ case GIMPLE_GOTO: ++ case GIMPLE_SWITCH: ++ break; ++ default: ++ break; ++ } ++} ++ ++/* Calculate the multiplier. */ ++ ++static bool ++calculate_mult_num (tree arg, tree *num, tree struct_size) ++{ ++ gcc_assert (TREE_CODE (arg) == INTEGER_CST); ++ bool sign = false; ++ HOST_WIDE_INT size = TREE_INT_CST_LOW (arg); ++ if (size < 0) ++ { ++ size = -size; ++ sign = true; ++ } ++ tree arg2 = build_int_cst (TREE_TYPE (arg), size); ++ if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg2, struct_size))) ++ { ++ tree number = size_binop (FLOOR_DIV_EXPR, arg2, struct_size); ++ if (sign) ++ number = build_int_cst (TREE_TYPE (number), -tree_to_shwi (number)); ++ *num = number; ++ return true; ++ } ++ return false; ++} ++ ++/* Trace and calculate the multiplier of PLUS_EXPR. */ ++ ++static bool ++trace_calculate_plus (gimple *size_def_stmt, tree *num, tree struct_size) ++{ ++ gcc_assert (gimple_assign_rhs_code (size_def_stmt) == PLUS_EXPR); ++ ++ tree num1 = NULL_TREE; ++ tree num2 = NULL_TREE; ++ tree arg0 = gimple_assign_rhs1 (size_def_stmt); ++ tree arg1 = gimple_assign_rhs2 (size_def_stmt); ++ if (!is_result_of_mult (arg0, &num1, struct_size) || num1 == NULL_TREE) ++ return false; ++ if (!is_result_of_mult (arg1, &num2, struct_size) || num2 == NULL_TREE) ++ return false; ++ *num = size_binop (PLUS_EXPR, num1, num2); ++ return true; ++} ++ ++/* Trace and calculate the multiplier of MULT_EXPR. */ ++ ++static bool ++trace_calculate_mult (gimple *size_def_stmt, tree *num, tree struct_size) ++{ ++ gcc_assert (gimple_assign_rhs_code (size_def_stmt) == MULT_EXPR); ++ ++ tree arg0 = gimple_assign_rhs1 (size_def_stmt); ++ tree arg1 = gimple_assign_rhs2 (size_def_stmt); ++ tree num1 = NULL_TREE; ++ ++ if (is_result_of_mult (arg0, &num1, struct_size) && num1 != NULL_TREE) ++ { ++ *num = size_binop (MULT_EXPR, arg1, num1); ++ return true; ++ } ++ if (is_result_of_mult (arg1, &num1, struct_size) && num1 != NULL_TREE) ++ { ++ *num = size_binop (MULT_EXPR, arg0, num1); ++ return true; ++ } ++ *num = NULL_TREE; ++ return false; ++} ++ ++/* Trace and calculate the multiplier of NEGATE_EXPR. */ ++ ++static bool ++trace_calculate_negate (gimple *size_def_stmt, tree *num, tree struct_size) ++{ ++ gcc_assert (gimple_assign_rhs_code (size_def_stmt) == NEGATE_EXPR); ++ ++ /* Support NEGATE_EXPR trace: _3 = -_2; _2 = _1 * 72. */ ++ tree num1 = NULL_TREE; ++ tree arg0 = gimple_assign_rhs1 (size_def_stmt); ++ if (!is_result_of_mult (arg0, &num1, struct_size) || num1 == NULL_TREE) ++ return false; ++ tree num0 = build_int_cst (TREE_TYPE (num1), -1); ++ *num = size_binop (MULT_EXPR, num0, num1); ++ return true; ++} ++ ++/* Trace and calculate the multiplier of POINTER_DIFF_EXPR. */ + +-void +-ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt) ++static bool ++trace_calculate_diff (gimple *size_def_stmt, tree *num) + { +- switch (gimple_code (stmt)) ++ gcc_assert (gimple_assign_rhs_code (size_def_stmt) == NOP_EXPR); ++ ++ /* Support POINTER_DIFF_EXPR trace: ++ _3 = (long unsigned int) _2; _2 = _1 - old_arcs_1. */ ++ tree arg = gimple_assign_rhs1 (size_def_stmt); ++ size_def_stmt = SSA_NAME_DEF_STMT (arg); ++ if (size_def_stmt && is_gimple_assign (size_def_stmt) ++ && gimple_assign_rhs_code (size_def_stmt) == POINTER_DIFF_EXPR) + { +- case GIMPLE_ASSIGN: +- maybe_record_assign (node, as_a (stmt)); +- break; +- case GIMPLE_CALL: +- maybe_record_call (node, as_a (stmt)); +- break; +- case GIMPLE_DEBUG: +- break; +- case GIMPLE_GOTO: +- case GIMPLE_SWITCH: +- break; +- default: +- break; ++ *num = NULL_TREE; ++ return true; + } ++ *num = NULL_TREE; ++ return false; + } + + /* This function checks whether ARG is a result of multiplication +@@ -2269,26 +3201,8 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + + /* If we have a integer, just check if it is a multiply of STRUCT_SIZE. */ + if (TREE_CODE (arg) == INTEGER_CST) +- { +- bool sign = false; +- HOST_WIDE_INT size = TREE_INT_CST_LOW (arg); +- if (size < 0) +- { +- size = -size; +- sign = true; +- } +- tree arg2 = build_int_cst (TREE_TYPE (arg), size); +- if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg2, struct_size))) +- { +- tree number = size_binop (FLOOR_DIV_EXPR, arg2, struct_size); +- if (sign) +- number = build_int_cst (TREE_TYPE (number), +- -tree_to_shwi (number)); +- *num = number; +- return true; +- } +- return false; +- } ++ return calculate_mult_num (arg, num, struct_size); ++ + gimple *size_def_stmt = SSA_NAME_DEF_STMT (arg); + + /* If the allocation statement was of the form +@@ -2304,43 +3218,20 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + return false; + + // FIXME: this should handle SHIFT also. +- if (gimple_assign_rhs_code (size_def_stmt) == PLUS_EXPR) +- { +- tree num1, num2; +- tree arg0 = gimple_assign_rhs1 (size_def_stmt); +- tree arg1 = gimple_assign_rhs2 (size_def_stmt); +- if (!is_result_of_mult (arg0, &num1, struct_size)) +- return false; +- if (!is_result_of_mult (arg1, &num2, struct_size)) +- return false; +- *num = size_binop (PLUS_EXPR, num1, num2); +- return true; +- } +- else if (gimple_assign_rhs_code (size_def_stmt) == MULT_EXPR) +- { +- tree arg0 = gimple_assign_rhs1 (size_def_stmt); +- tree arg1 = gimple_assign_rhs2 (size_def_stmt); +- tree num1; +- +- if (is_result_of_mult (arg0, &num1, struct_size)) +- { +- *num = size_binop (MULT_EXPR, arg1, num1); +- return true; +- } +- if (is_result_of_mult (arg1, &num1, struct_size)) +- { +- *num = size_binop (MULT_EXPR, arg0, num1); +- return true; +- } +- +- *num = NULL_TREE; +- return false; +- } +- else if (gimple_assign_rhs_code (size_def_stmt) == SSA_NAME) ++ tree_code rhs_code = gimple_assign_rhs_code (size_def_stmt); ++ if (rhs_code == PLUS_EXPR) ++ return trace_calculate_plus (size_def_stmt, num, struct_size); ++ else if (rhs_code == MULT_EXPR) ++ return trace_calculate_mult (size_def_stmt, num, struct_size); ++ else if (rhs_code == SSA_NAME) + { + arg = gimple_assign_rhs1 (size_def_stmt); + size_def_stmt = SSA_NAME_DEF_STMT (arg); + } ++ else if (rhs_code == NEGATE_EXPR && current_mode == STRUCT_REORDER_FIELDS) ++ return trace_calculate_negate (size_def_stmt, num, struct_size); ++ else if (rhs_code == NOP_EXPR && current_mode == STRUCT_REORDER_FIELDS) ++ return trace_calculate_diff (size_def_stmt, num); + else + { + *num = NULL_TREE; +@@ -2357,18 +3248,22 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + bool + ipa_struct_reorg::handled_allocation_stmt (gimple *stmt) + { +- if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ if ((current_mode == STRUCT_REORDER_FIELDS) ++ && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))) ++ return true; ++ if ((current_mode == COMPLETE_STRUCT_RELAYOUT) + && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) + return true; +- +- if (current_mode != COMPLETE_STRUCT_RELAYOUT) +- if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) +- || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN)) +- return true; ++ if ((current_mode == NORMAL) ++ && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) ++ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN))) ++ return true; + return false; + } + +@@ -2376,7 +3271,7 @@ ipa_struct_reorg::handled_allocation_stmt (gimple *stmt) + elements in the array allocated. */ + + tree +-ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt) ++ipa_struct_reorg::allocate_size (srtype *type, srdecl *decl, gimple *stmt) + { + if (!stmt + || gimple_code (stmt) != GIMPLE_CALL +@@ -2396,6 +3291,10 @@ ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt) + + tree struct_size = TYPE_SIZE_UNIT (type->type); + ++ /* Specify the correct size to relax multi-layer pointer. */ ++ if (TREE_CODE (decl->decl) == SSA_NAME && isptrptr (decl->orig_type)) ++ struct_size = TYPE_SIZE_UNIT (decl->orig_type); ++ + tree size = gimple_call_arg (stmt, 0); + + if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) +@@ -2409,8 +3308,10 @@ ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt) + the size of structure. */ + if (operand_equal_p (arg1, struct_size, 0)) + return size; +- /* ??? Check that first argument is a constant equal to +- the size of structure. */ ++ /* ??? Check that first argument is a constant ++ equal to the size of structure. */ ++ /* If the allocated number is equal to the value of struct_size, ++ the value of arg1 is changed to the allocated number. */ + if (operand_equal_p (size, struct_size, 0)) + return arg1; + if (dump_file && (dump_flags & TDF_DETAILS)) +@@ -2453,10 +3354,16 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, + + if (!d) + { ++ /* MEM[(struct arc *)_1].head = _2; _2 = calloc (100, 104). */ + if (VOID_POINTER_P (TREE_TYPE (side)) + && TREE_CODE (side) == SSA_NAME) +- current_function->record_decl (type, side, -1); ++ { ++ /* The type is other, the declaration is side. */ ++ current_function->record_decl (type, side, -1, ++ isptrptr (TREE_TYPE (other)) ? TREE_TYPE (other) : NULL); ++ } + else ++ /* *_1 = &MEM[(void *)&x + 8B]. */ + type->mark_escape (escape_cast_another_ptr, stmt); + } + else if (type != d->type) +@@ -2464,6 +3371,17 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, + type->mark_escape (escape_cast_another_ptr, stmt); + d->type->mark_escape (escape_cast_another_ptr, stmt); + } ++ /* x_1 = y.x_nodes; void *x; ++ Directly mark the structure pointer type assigned ++ to the void* variable as escape. */ ++ else if (current_mode == STRUCT_REORDER_FIELDS ++ && TREE_CODE (side) == SSA_NAME ++ && VOID_POINTER_P (TREE_TYPE (side)) ++ && SSA_NAME_VAR (side) ++ && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (side)))) ++ mark_type_as_escape (TREE_TYPE (other), escape_cast_void, stmt); ++ ++ check_ptr_layers (side, other, stmt); + } + + /* Record accesses in an assignment statement STMT. */ +@@ -2486,8 +3404,12 @@ ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt) + if (!handled_type (TREE_TYPE (lhs))) + return; + /* Check if rhs2 is a multiplication of the size of the type. */ ++ /* The size adjustment and judgment of multi-layer pointers ++ are added. */ + if (is_result_of_mult (rhs2, &num, +- TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))))) ++ isptrptr (TREE_TYPE (lhs)) ++ ? TYPE_SIZE_UNIT (TREE_TYPE (lhs)) ++ : TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))))) + { + record_stmt_expr (lhs, node, stmt); + record_stmt_expr (rhs1, node, stmt); +@@ -2525,9 +3447,8 @@ ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt) + } + + static bool +-check_mem_ref_offset (tree expr) ++check_mem_ref_offset (tree expr, tree *num) + { +- tree num = NULL; + bool ret = false; + + if (TREE_CODE (expr) != MEM_REF) +@@ -2538,15 +3459,18 @@ check_mem_ref_offset (tree expr) + tree tmp = TREE_OPERAND (expr, 0); + if (TREE_CODE (tmp) == ADDR_EXPR) + tmp = TREE_OPERAND (tmp, 0); +- tree size = TYPE_SIZE_UNIT (inner_type (TREE_TYPE (tmp))); +- ret = is_result_of_mult (field_off, &num, size); ++ /* Specify the correct size for the multi-layer pointer. */ ++ tree size = isptrptr (TREE_TYPE (tmp)) ++ ? TYPE_SIZE_UNIT (TREE_TYPE (tmp)) ++ : TYPE_SIZE_UNIT (inner_type (TREE_TYPE (tmp))); ++ ret = is_result_of_mult (field_off, num, size); + return ret; + } + + static tree + get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, + bool &realpart, bool &imagpart, +- tree &accesstype) ++ tree &accesstype, tree *num) + { + offset = 0; + realpart = false; +@@ -2569,22 +3493,29 @@ get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, + { + case COMPONENT_REF: + { ++ /* x.a = _1; If expr is the lvalue of stmt, ++ then field type is FIELD_DECL - POINTER_TYPE - RECORD_TYPE. */ + tree field = TREE_OPERAND (expr, 1); + tree field_off = byte_position (field); + if (TREE_CODE (field_off) != INTEGER_CST) + return NULL; + offset += tree_to_shwi (field_off); ++ /* x.a = _1; If expr is the lvalue of stmt, ++ then expr type is VAR_DECL - RECORD_TYPE (fetch x) */ + expr = TREE_OPERAND (expr, 0); + accesstype = NULL; + break; + } + case MEM_REF: + { ++ /* _2 = MEM[(struct s * *)_1]; ++ If expr is the right value of stmt, then field_off type is ++ INTEGER_CST - POINTER_TYPE - POINTER_TYPE - RECORD_TYPE. */ + tree field_off = TREE_OPERAND (expr, 1); + gcc_assert (TREE_CODE (field_off) == INTEGER_CST); + /* So we can mark the types as escaping if different. */ + accesstype = TREE_TYPE (field_off); +- if (!check_mem_ref_offset (expr)) ++ if (!check_mem_ref_offset (expr, num)) + offset += tree_to_uhwi (field_off); + return TREE_OPERAND (expr, 0); + } +@@ -2626,10 +3557,11 @@ ipa_struct_reorg::wholeaccess (tree expr, tree base, + bool + ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + srtype *&type, srfield *&field, +- bool &realpart, bool &imagpart, +- bool &address, bool should_create, ++ bool &realpart, bool &imagpart, bool &address, ++ bool &escape_from_base, bool should_create, + bool can_escape) + { ++ tree num = NULL_TREE; + HOST_WIDE_INT offset; + tree accesstype; + address = false; +@@ -2641,8 +3573,9 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + mark_as_bit_field = true; + } + ++ /* Ref is classified into two types: COMPONENT_REF or MER_REF. */ + base = get_ref_base_and_offset (expr, offset, realpart, imagpart, +- accesstype); ++ accesstype, &num); + + /* Variable access, unkown type. */ + if (base == NULL) +@@ -2680,6 +3613,8 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + if (!t) + return false; + } ++ /* If no such decl is finded ++ or orig_type is not added to this decl, then add it. */ + else if (!d && accesstype) + { + if (!should_create) +@@ -2691,15 +3626,52 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + t = record_type (inner_type (accesstype)); + if (!t || t->has_escaped ()) + return false; +- /* If base is not void* mark the type as escaping. */ +- if (!VOID_POINTER_P (TREE_TYPE (base))) ++ /* If base is not void* mark the type as escaping. ++ release INTEGER_TYPE cast to struct pointer. ++ (If t has escpaed above, then directly returns ++ and doesn't mark escape follow.). */ ++ /* _1 = MEM[(struct arc_t * *)a_1]. ++ then base a_1: ssa_name - pointer_type - integer_type. */ ++ if (current_mode == STRUCT_REORDER_FIELDS) + { +- gcc_assert (can_escape); +- t->mark_escape (escape_cast_another_ptr, NULL); +- return false; ++ bool is_int_ptr = POINTER_TYPE_P (TREE_TYPE (base)) ++ && (TREE_CODE (inner_type (TREE_TYPE (base))) ++ == INTEGER_TYPE); ++ if (!(VOID_POINTER_P (TREE_TYPE (base)) ++ || (current_function->is_safe_func && is_int_ptr))) ++ { ++ gcc_assert (can_escape); ++ t->mark_escape (escape_cast_another_ptr, NULL); ++ return false; ++ } ++ if (TREE_CODE (base) == SSA_NAME ++ && !(current_function->is_safe_func && is_int_ptr)) ++ { ++ /* Add a safe func mechanism. */ ++ if (!(current_function->is_safe_func ++ && is_from_void_ptr_parm (base))) ++ /* Add auxiliary information of the multi-layer pointer ++ type. */ ++ current_function->record_decl (t, base, -1, ++ isptrptr (accesstype) ? accesstype : NULL); ++ } ++ } ++ else ++ { ++ if (!(VOID_POINTER_P (TREE_TYPE (base)))) ++ { ++ gcc_assert (can_escape); ++ t->mark_escape (escape_cast_another_ptr, NULL); ++ return false; ++ } ++ if (TREE_CODE (base) == SSA_NAME) ++ { ++ /* Add auxiliary information of the multi-layer pointer ++ type. */ ++ current_function->record_decl (t, base, -1, ++ isptrptr (accesstype) ? accesstype : NULL); ++ } + } +- if (TREE_CODE (base) == SSA_NAME) +- current_function->record_decl (t, base, -1); + } + else if (!d) + return false; +@@ -2707,7 +3679,10 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + t = d->type; + + if (t->has_escaped ()) ++ { ++ escape_from_base = true; + return false; ++ } + + if (mark_as_bit_field) + { +@@ -2716,6 +3691,17 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + return false; + } + ++ /* Escape the operation of fetching field with pointer offset such as: ++ *(&(t->right)) = malloc (0); -> MEM[(struct node * *)_1 + 8B] = malloc (0); ++ */ ++ if (current_mode != NORMAL ++ && (TREE_CODE (expr) == MEM_REF) && (offset != 0)) ++ { ++ gcc_assert (can_escape); ++ t->mark_escape (escape_non_multiply_size, NULL); ++ return false; ++ } ++ + if (wholeaccess (expr, base, accesstype, t)) + { + field = NULL; +@@ -2733,7 +3719,6 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + print_generic_expr (dump_file, expr); + fprintf (dump_file, "\n"); + print_generic_expr (dump_file, base); +- fprintf (dump_file, "\n"); + } + gcc_assert (can_escape); + t->mark_escape (escape_unkown_field, NULL); +@@ -2747,9 +3732,8 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + print_generic_expr (dump_file, f->fieldtype); + fprintf (dump_file, "\naccess type = "); + print_generic_expr (dump_file, TREE_TYPE (expr)); +- fprintf (dump_file, "original expr = "); ++ fprintf (dump_file, "\noriginal expr = "); + print_generic_expr (dump_file, expr); +- fprintf (dump_file, "\n"); + } + gcc_assert (can_escape); + t->mark_escape (escape_unkown_field, NULL); +@@ -2772,8 +3756,9 @@ ipa_struct_reorg::mark_expr_escape (tree expr, escape_type escapes, + srtype *type; + srfield *field; + bool realpart, imagpart, address; ++ bool escape_from_base = false; + if (!get_type_field (expr, base, indirect, type, field, +- realpart, imagpart, address)) ++ realpart, imagpart, address, escape_from_base)) + return; + + type->mark_escape (escapes, stmt); +@@ -2846,10 +3831,23 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt) + gimple_call_arg (stmt, i)); + if (d) + d->type->mark_escape (escapes, stmt); ++ ++ if (escapes == escape_external_function ++ && !gimple_call_builtin_p (stmt, BUILT_IN_MEMSET)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "escape_external_function: "); ++ print_gimple_stmt (dump_file, stmt, 0); ++ } ++ if (d) ++ ext_func_types.safe_push (d->type); ++ } + } + return; + } + ++ /* Get func param it's tree_list. */ + argtype = TYPE_ARG_TYPES (gimple_call_fntype (stmt)); + for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) + { +@@ -2857,9 +3855,14 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt) + if (argtype) + { + tree argtypet = TREE_VALUE (argtype); +- if (!free_or_realloc ++ /* callee_func (_1, _2); ++ Check the callee func, instead of current func. */ ++ if (!(free_or_realloc ++ || (current_mode == STRUCT_REORDER_FIELDS ++ && safe_functions.contains ( ++ node->get_edge (stmt)->callee))) + && VOID_POINTER_P (argtypet)) +- mark_type_as_escape (TREE_TYPE (arg), escape_cast_void); ++ mark_type_as_escape (TREE_TYPE (arg), escape_cast_void, stmt); + else + record_stmt_expr (arg, node, stmt); + } +@@ -2878,12 +3881,22 @@ ipa_struct_reorg::record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt) + srtype *type; + srfield *field; + bool realpart, imagpart, address; ++ bool escape_from_base = false; + if (!get_type_field (expr, base, indirect, type, field, +- realpart, imagpart, address)) ++ realpart, imagpart, address, escape_from_base)) + return; + +- if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg)) +- type->mark_escape (escape_non_optimize, stmt); ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ if (!opt_for_fn (current_function_decl, flag_ipa_reorder_fields)) ++ type->mark_escape (escape_non_optimize, stmt); ++ } ++ else ++ { ++ if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg)) ++ type->mark_escape (escape_non_optimize, stmt); ++ } ++ + + /* Record it. */ + type->add_access (new sraccess (stmt, node, type, field)); +@@ -2901,10 +3914,10 @@ ipa_struct_reorg::find_function (cgraph_node *node) + } + + void +-ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, +- vec &worklist, +- gimple *stmt) ++ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl, ++ vec &worklist, gimple *stmt) + { ++ srtype *type = decl->type; + if (integer_zerop (newdecl)) + return; + +@@ -2916,7 +3929,8 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, + type->mark_escape (escape_cast_another_ptr, stmt); + return; + } +- if (d->type == type) ++ if (d->type == type ++ && cmp_ptr_layers (TREE_TYPE (newdecl), TREE_TYPE (decl->decl))) + return; + + srtype *type1 = d->type; +@@ -2967,7 +3981,9 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, + /* Only add to the worklist if the decl is a SSA_NAME. */ + if (TREE_CODE (newdecl) == SSA_NAME) + worklist.safe_push (d); +- if (d->type == type) ++ tree a_decl = d->orig_type ? d->orig_type : TREE_TYPE (newdecl); ++ tree b_decl = decl->orig_type ? decl->orig_type : TREE_TYPE (decl->decl); ++ if (d->type == type && cmp_ptr_layers (a_decl, b_decl)) + return; + + srtype *type1 = d->type; +@@ -3000,6 +4016,96 @@ ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) + } + } + ++/* Check the definition of gimple assign. */ ++ ++void ++ipa_struct_reorg::check_definition_assign (srdecl *decl, ++ vec &worklist) ++{ ++ tree ssa_name = decl->decl; ++ srtype *type = decl->type; ++ gimple *stmt = SSA_NAME_DEF_STMT (ssa_name); ++ gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN); ++ /* a) if the SSA_NAME is sourced from a pointer plus, record the pointer and ++ check to make sure the addition was a multiple of the size. ++ check the pointer type too. */ ++ tree rhs = gimple_assign_rhs1 (stmt); ++ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree num = NULL_TREE; ++ /* Specify the correct size for the multi-layer pointer. */ ++ if (!is_result_of_mult (rhs2, &num, isptrptr (decl->orig_type) ++ ? TYPE_SIZE_UNIT (decl->orig_type) ++ : TYPE_SIZE_UNIT (type->type))) ++ type->mark_escape (escape_non_multiply_size, stmt); ++ ++ if (TREE_CODE (rhs) == SSA_NAME) ++ check_type_and_push (rhs, decl, worklist, stmt); ++ return; ++ } ++ ++ if (gimple_assign_rhs_code (stmt) == MAX_EXPR ++ || gimple_assign_rhs_code (stmt) == MIN_EXPR ++ || gimple_assign_rhs_code (stmt) == BIT_IOR_EXPR ++ || gimple_assign_rhs_code (stmt) == BIT_XOR_EXPR ++ || gimple_assign_rhs_code (stmt) == BIT_AND_EXPR) ++ { ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (rhs) == SSA_NAME) ++ check_type_and_push (rhs, decl, worklist, stmt); ++ if (TREE_CODE (rhs2) == SSA_NAME) ++ check_type_and_push (rhs2, decl, worklist, stmt); ++ return; ++ } ++ ++ /* Casts between pointers and integer are escaping. */ ++ if (gimple_assign_cast_p (stmt)) ++ { ++ type->mark_escape (escape_cast_int, stmt); ++ return; ++ } ++ ++ /* d) if the name is from a cast/assignment, make sure it is used as ++ that type or void* ++ i) If void* then push the ssa_name into worklist. */ ++ gcc_assert (gimple_assign_single_p (stmt)); ++ check_other_side (decl, rhs, stmt, worklist); ++ check_ptr_layers (decl->decl, rhs, stmt); ++} ++ ++/* Check the definition of gimple call. */ ++ ++void ++ipa_struct_reorg::check_definition_call (srdecl *decl, vec &worklist) ++{ ++ tree ssa_name = decl->decl; ++ srtype *type = decl->type; ++ gimple *stmt = SSA_NAME_DEF_STMT (ssa_name); ++ gcc_assert (gimple_code (stmt) == GIMPLE_CALL); ++ ++ /* For realloc, check the type of the argument. */ ++ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) ++ check_type_and_push (gimple_call_arg (stmt, 0), decl, worklist, stmt); ++ ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ if (!handled_allocation_stmt (stmt)) ++ type->mark_escape (escape_return, stmt); ++ if (!allocate_size (type, decl, stmt)) ++ type->mark_escape (escape_non_multiply_size, stmt); ++ } ++ else ++ { ++ if (!handled_allocation_stmt (stmt) ++ || !allocate_size (type, decl, stmt)) ++ type->mark_escape (escape_return, stmt); ++ } ++ ++ check_alloc_num (stmt, type); ++ return; ++} ++ + /* + 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) + a) if the SSA_NAME is sourced from a pointer plus, record the pointer and +@@ -3029,9 +4135,12 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist) + if (var + && TREE_CODE (var) == PARM_DECL + && VOID_POINTER_P (TREE_TYPE (ssa_name))) +- type->mark_escape (escape_cast_void, NULL); ++ type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name)); + return; + } ++ if (current_mode == STRUCT_REORDER_FIELDS && SSA_NAME_VAR (ssa_name) ++ && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (ssa_name)))) ++ type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name)); + gimple *stmt = SSA_NAME_DEF_STMT (ssa_name); + + /* +@@ -3039,17 +4148,7 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist) + i) Add SSA_NAME (void*) to the worklist if allocated from realloc + */ + if (gimple_code (stmt) == GIMPLE_CALL) +- { +- /* For realloc, check the type of the argument. */ +- if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) +- check_type_and_push (gimple_call_arg (stmt, 0), type, worklist, stmt); +- +- if (!handled_allocation_stmt (stmt) +- || !allocate_size (type, stmt)) +- type->mark_escape (escape_return, stmt); +- check_alloc_num (stmt, type); +- return; +- } ++ check_definition_call (decl, worklist); + /* If the SSA_NAME is sourced from an inline-asm, + just mark the type as escaping. */ + if (gimple_code (stmt) == GIMPLE_ASM) +@@ -3065,58 +4164,11 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist) + { + for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) + check_type_and_push (gimple_phi_arg_def (stmt, i), +- type, worklist, stmt); +- return; +- } +- +- gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN); +- /* +- a) if the SSA_NAME is sourced from a pointer plus, record the pointer and +- check to make sure the addition was a multiple of the size. +- check the pointer type too. +- */ +- +- tree rhs = gimple_assign_rhs1 (stmt); +- if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) +- { +- tree rhs2 = gimple_assign_rhs2 (stmt); +- tree num; +- if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type))) +- type->mark_escape (escape_non_multiply_size, stmt); +- +- if (TREE_CODE (rhs) == SSA_NAME) +- check_type_and_push (rhs, type, worklist, stmt); +- return; +- } +- +- if (gimple_assign_rhs_code (stmt) == MAX_EXPR +- || gimple_assign_rhs_code (stmt) == MIN_EXPR +- || gimple_assign_rhs_code (stmt) == BIT_IOR_EXPR +- || gimple_assign_rhs_code (stmt) == BIT_XOR_EXPR +- || gimple_assign_rhs_code (stmt) == BIT_AND_EXPR) +- { +- tree rhs2 = gimple_assign_rhs2 (stmt); +- if (TREE_CODE (rhs) == SSA_NAME) +- check_type_and_push (rhs, type, worklist, stmt); +- if (TREE_CODE (rhs2) == SSA_NAME) +- check_type_and_push (rhs2, type, worklist, stmt); +- return; +- } +- +- /* Casts between pointers and integer are escaping. */ +- if (gimple_assign_cast_p (stmt)) +- { +- type->mark_escape (escape_cast_int, stmt); ++ decl, worklist, stmt); + return; + } +- +- /* +- d) if the name is from a cast/assignment, make sure it is used as that +- type or void* +- i) If void* then push the ssa_name into worklist +- */ +- gcc_assert (gimple_assign_single_p (stmt)); +- check_other_side (decl, rhs, stmt, worklist); ++ if (gimple_code (stmt) == GIMPLE_ASSIGN) ++ check_definition_assign (decl, worklist); + } + + /* Mark the types used by the inline-asm as escaping. +@@ -3149,45 +4201,121 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, + { + srtype *type = decl->type; + +- if (TREE_CODE (other) == SSA_NAME +- || DECL_P (other) ++ if (TREE_CODE (other) == SSA_NAME || DECL_P (other) + || TREE_CODE (other) == INTEGER_CST) + { +- check_type_and_push (other, type, worklist, stmt); ++ check_type_and_push (other, decl, worklist, stmt); ++ return; ++ } ++ ++ tree t = TREE_TYPE (other); ++ if (!handled_type (t)) ++ { ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ return; ++ } ++ ++ srtype *t1 = find_type (inner_type (t)); ++ if (t1 == type) ++ { ++ /* In Complete Struct Relayout, if lhs type is the same ++ as rhs type, we could return without any harm. */ ++ if (current_mode == COMPLETE_STRUCT_RELAYOUT) ++ return; ++ ++ tree base; ++ bool indirect; ++ srtype *type1; ++ srfield *field; ++ bool realpart, imagpart, address; ++ bool escape_from_base = false; ++ if (!get_type_field (other, base, indirect, type1, field, ++ realpart, imagpart, address, escape_from_base)) ++ { ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ /* Release INTEGER_TYPE cast to struct pointer. */ ++ bool cast_from_int_ptr = current_function->is_safe_func && base ++ && find_decl (base) == NULL && POINTER_TYPE_P (TREE_TYPE (base)) ++ && (TREE_CODE (inner_type (TREE_TYPE (base))) == INTEGER_TYPE); ++ ++ /* Add a safe func mechanism. */ ++ bool from_void_ptr_parm = current_function->is_safe_func ++ && TREE_CODE (base) == SSA_NAME && is_from_void_ptr_parm (base); ++ ++ /* Release type is used by a type which escapes. */ ++ if (escape_from_base || cast_from_int_ptr || from_void_ptr_parm) ++ return; ++ } ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ } ++ + return; + } + +- tree t = TREE_TYPE (other); +- if (!handled_type (t)) ++ if (t1) ++ t1->mark_escape (escape_cast_another_ptr, stmt); ++ ++ type->mark_escape (escape_cast_another_ptr, stmt); ++} ++ ++ ++/* Get the expr base. */ ++ ++void ++get_base (tree &base, tree expr) ++{ ++ if (TREE_CODE (expr) == MEM_REF) ++ base = TREE_OPERAND (expr, 0); ++ else if (TREE_CODE (expr) == COMPONENT_REF) ++ { ++ base = TREE_OPERAND (expr, 0); ++ base = (TREE_CODE (base) == MEM_REF) ? TREE_OPERAND (base, 0) : base; ++ } ++ else if (TREE_CODE (expr) == ADDR_EXPR) ++ base = TREE_OPERAND (expr, 0); ++} ++ ++/* Check whether the number of pointer layers of exprs is equal, ++ marking unequals as escape. */ ++ ++void ++ipa_struct_reorg::check_ptr_layers (tree a_expr, tree b_expr, gimple *stmt) ++{ ++ if (current_mode != STRUCT_REORDER_FIELDS || current_function->is_safe_func ++ || !(POINTER_TYPE_P (TREE_TYPE (a_expr))) ++ || !(POINTER_TYPE_P (TREE_TYPE (b_expr))) ++ || !handled_type (TREE_TYPE (a_expr)) ++ || !handled_type (TREE_TYPE (b_expr))) ++ return; ++ ++ tree a_base = a_expr; ++ tree b_base = b_expr; ++ get_base (a_base, a_expr); ++ get_base (b_base, b_expr); ++ ++ srdecl *a = find_decl (a_base); ++ srdecl *b = find_decl (b_base); ++ if (a && b == NULL && TREE_CODE (b_expr) != INTEGER_CST) + { +- type->mark_escape (escape_cast_another_ptr, stmt); ++ a->type->mark_escape (escape_cast_another_ptr, stmt); + return; + } +- +- srtype *t1 = find_type (inner_type (t)); +- if (t1 == type) ++ else if (b && a == NULL && TREE_CODE (a_expr) != INTEGER_CST) + { +- /* In Complete Struct Relayout, if lhs type is the same +- as rhs type, we could return without any harm. */ +- if (current_mode == COMPLETE_STRUCT_RELAYOUT) +- return; +- +- tree base; +- bool indirect; +- srtype *type1; +- srfield *field; +- bool realpart, imagpart, address; +- if (!get_type_field (other, base, indirect, type1, field, +- realpart, imagpart, address)) +- type->mark_escape (escape_cast_another_ptr, stmt); +- ++ b->type->mark_escape (escape_cast_another_ptr, stmt); + return; + } ++ else if (a == NULL && b == NULL) ++ return; + +- if (t1) +- t1->mark_escape (escape_cast_another_ptr, stmt); ++ if (cmp_ptr_layers (TREE_TYPE (a_expr), TREE_TYPE (b_expr))) ++ return; + +- type->mark_escape (escape_cast_another_ptr, stmt); ++ if (a) ++ a->type->mark_escape (escape_cast_another_ptr, stmt); ++ if (b) ++ b->type->mark_escape (escape_cast_another_ptr, stmt); + } + + void +@@ -3205,7 +4333,7 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + check to make sure they are used correctly. */ + if (gimple_code (stmt) == GIMPLE_PHI) + { +- check_type_and_push (gimple_phi_result (stmt), type, worklist, stmt); ++ check_type_and_push (gimple_phi_result (stmt), decl, worklist, stmt); + return; + } + +@@ -3221,10 +4349,15 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + tree rhs2 = gimple_cond_rhs (stmt); + tree orhs = rhs1; + enum tree_code code = gimple_cond_code (stmt); +- if (code != EQ_EXPR && code != NE_EXPR +- && (current_mode != COMPLETE_STRUCT_RELAYOUT +- || (code != LT_EXPR && code != LE_EXPR +- && code != GT_EXPR && code != GE_EXPR))) ++ if ((current_mode == NORMAL && (code != EQ_EXPR && code != NE_EXPR)) ++ || (current_mode == COMPLETE_STRUCT_RELAYOUT ++ && (code != EQ_EXPR && code != NE_EXPR ++ && code != LT_EXPR && code != LE_EXPR ++ && code != GT_EXPR && code != GE_EXPR)) ++ || (current_mode == STRUCT_REORDER_FIELDS ++ && (code != EQ_EXPR && code != NE_EXPR ++ && code != LT_EXPR && code != LE_EXPR ++ && code != GT_EXPR && code != GE_EXPR))) + { + mark_expr_escape (rhs1, escape_non_eq, stmt); + mark_expr_escape (rhs2, escape_non_eq, stmt); +@@ -3235,7 +4368,7 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + return; + if (TREE_CODE (orhs) != SSA_NAME) + mark_expr_escape (rhs1, escape_non_eq, stmt); +- check_type_and_push (orhs, type, worklist, stmt); ++ check_type_and_push (orhs, decl, worklist, stmt); + return; + } + +@@ -3254,9 +4387,14 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + tree rhs2 = gimple_assign_rhs2 (stmt); + tree orhs = rhs1; + enum tree_code code = gimple_assign_rhs_code (stmt); +- if (code != EQ_EXPR && code != NE_EXPR +- && (current_mode != COMPLETE_STRUCT_RELAYOUT +- || (code != LT_EXPR && code != LE_EXPR ++ if ((current_mode == NORMAL && (code != EQ_EXPR && code != NE_EXPR)) ++ || (current_mode == COMPLETE_STRUCT_RELAYOUT ++ && (code != EQ_EXPR && code != NE_EXPR ++ && code != LT_EXPR && code != LE_EXPR ++ && code != GT_EXPR && code != GE_EXPR)) ++ || (current_mode == STRUCT_REORDER_FIELDS ++ && (code != EQ_EXPR && code != NE_EXPR ++ && code != LT_EXPR && code != LE_EXPR + && code != GT_EXPR && code != GE_EXPR))) + { + mark_expr_escape (rhs1, escape_non_eq, stmt); +@@ -3268,7 +4406,7 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + return; + if (TREE_CODE (orhs) != SSA_NAME) + mark_expr_escape (rhs1, escape_non_eq, stmt); +- check_type_and_push (orhs, type, worklist, stmt); ++ check_type_and_push (orhs, decl, worklist, stmt); + return; + } + +@@ -3282,6 +4420,7 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + check_other_side (decl, lhs, stmt, worklist); + return; + } ++ check_ptr_layers (lhs, rhs, stmt); + } + + if (is_gimple_assign (stmt) +@@ -3291,9 +4430,26 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + tree lhs = gimple_assign_lhs (stmt); + tree num; + check_other_side (decl, lhs, stmt, worklist); +- if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type))) ++ check_ptr_layers (lhs, decl->decl, stmt); ++ /* Specify the correct size for the multi-layer pointer. */ ++ if (!is_result_of_mult (rhs2, &num, isptrptr (decl->orig_type) ++ ? TYPE_SIZE_UNIT (decl->orig_type) ++ : TYPE_SIZE_UNIT (type->type))) + type->mark_escape (escape_non_multiply_size, stmt); + } ++ ++ if (is_gimple_assign (stmt) ++ && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR) ++ { ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree other = rhs1 == decl->decl ? rhs2 : rhs1; ++ ++ check_other_side (decl, other, stmt, worklist); ++ check_ptr_layers (decl->decl, other, stmt); ++ return; ++ } ++ + } + + /* +@@ -3360,17 +4516,43 @@ ipa_struct_reorg::record_function (cgraph_node *node) + if (DECL_PRESERVE_P (node->decl)) + escapes = escape_marked_as_used; + else if (!node->local) +- escapes = escape_visible_function; ++ { ++ if (current_mode != STRUCT_REORDER_FIELDS) ++ escapes = escape_visible_function; ++ if (current_mode == STRUCT_REORDER_FIELDS && node->externally_visible) ++ escapes = escape_visible_function; ++ } + else if (!node->can_change_signature) + escapes = escape_cannot_change_signature; + else if (!tree_versionable_function_p (node->decl)) + escapes = escape_noclonable_function; +- else if (!opt_for_fn (node->decl, flag_ipa_struct_reorg)) +- escapes = escape_non_optimize; ++ ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ if (!opt_for_fn (node->decl, flag_ipa_reorder_fields)) ++ escapes = escape_non_optimize; ++ } ++ else if (current_mode == NORMAL || current_mode == COMPLETE_STRUCT_RELAYOUT) ++ { ++ if (!opt_for_fn (node->decl, flag_ipa_struct_reorg)) ++ escapes = escape_non_optimize; ++ } + + basic_block bb; + gimple_stmt_iterator si; + ++ /* Add a safe func mechanism. */ ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ current_function->is_safe_func = safe_functions.contains (node); ++ if (dump_file) ++ { ++ fprintf (dump_file, "\nfunction %s/%u: is_safe_func = %d\n", ++ node->name (), node->order, ++ current_function->is_safe_func); ++ } ++ } ++ + /* Record the static chain decl. */ + if (fn->static_chain_decl) + { +@@ -3503,6 +4685,42 @@ ipa_struct_reorg::record_function (cgraph_node *node) + return sfn; + } + ++ ++/* For a function that contains the void* parameter and passes the structure ++ pointer, check whether the function uses the input node safely. ++ For these functions, the void* parameter and related ssa_name are not ++ recorded in record_function (), and the input structure type is not escaped. ++*/ ++ ++void ++ipa_struct_reorg::record_safe_func_with_void_ptr_parm () ++{ ++ cgraph_node *node = NULL; ++ FOR_EACH_FUNCTION (node) ++ { ++ if (!node->real_symbol_p ()) ++ continue; ++ if (node->definition) ++ { ++ if (!node->has_gimple_body_p () || node->inlined_to) ++ continue; ++ node->get_body (); ++ function *fn = DECL_STRUCT_FUNCTION (node->decl); ++ if (!fn) ++ continue; ++ push_cfun (fn); ++ if (is_safe_func_with_void_ptr_parm (node)) ++ { ++ safe_functions.add (node); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nfunction %s/%u is safe function.\n", ++ node->name (), node->order); ++ } ++ pop_cfun (); ++ } ++ } ++} ++ + /* Record all accesses for all types including global variables. */ + + void +@@ -3534,6 +4752,10 @@ ipa_struct_reorg::record_accesses (void) + record_var (var->decl, escapes); + } + ++ /* Add a safe func mechanism. */ ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ record_safe_func_with_void_ptr_parm (); ++ + FOR_EACH_FUNCTION (cnode) + { + if (!cnode->real_symbol_p ()) +@@ -3552,11 +4774,14 @@ ipa_struct_reorg::record_accesses (void) + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "all types (before pruning):\n"); ++ fprintf (dump_file, "\n"); ++ fprintf (dump_file, "==============================================\n\n"); ++ fprintf (dump_file, "======== all types (before pruning): ========\n\n"); + dump_types (dump_file); +- fprintf (dump_file, "all functions (before pruning):\n"); ++ fprintf (dump_file, "======= all functions (before pruning): =======\n"); + dump_functions (dump_file); + } ++ /* If record_var () is called later, new types will not be recorded. */ + done_recording = true; + } + +@@ -3580,6 +4805,7 @@ ipa_struct_reorg::walk_field_for_cycles (srtype *type) + { + if (!field->type) + ; ++ /* If there are two members of the same structure pointer type? */ + else if (field->type->visited + || walk_field_for_cycles (field->type)) + { +@@ -3658,22 +4884,113 @@ ipa_struct_reorg::propagate_escape (void) + } while (changed); + } + ++/* If the original type (with members) has escaped, corresponding to the ++ struct pointer type (empty member) in the structure fields ++ should also marked as escape. */ ++ ++void ++ipa_struct_reorg::propagate_escape_via_original (void) ++{ ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ for (unsigned j = 0; j < types.length (); j++) ++ { ++ const char *type1 = get_type_name (types[i]->type); ++ const char *type2 = get_type_name (types[j]->type); ++ if (type1 == NULL || type2 == NULL) ++ continue; ++ if (type1 == type2 && types[j]->has_escaped ()) ++ { ++ if (!types[i]->has_escaped ()) ++ types[i]->mark_escape (escape_via_orig_escape, NULL); ++ break; ++ } ++ } ++ } ++} ++ ++/* Marks the fileds as empty and does not have the original structure type ++ is escape. */ ++ ++void ++ipa_struct_reorg::propagate_escape_via_empty_with_no_original (void) ++{ ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ if (types[i]->fields.length () == 0) ++ { ++ for (unsigned j = 0; j < types.length (); j++) ++ { ++ if (i != j && types[j]->fields.length ()) ++ { ++ const char *type1 = get_type_name (types[i]->type); ++ const char *type2 = get_type_name (types[j]->type); ++ if (type1 != NULL && type2 != NULL && type1 == type2) ++ break; ++ } ++ if (j == types.length () - 1) ++ types[i]->mark_escape (escape_via_empty_no_orig, NULL); ++ } ++ } ++ } ++} ++ ++/* Escape propagation is performed on types that escape through external ++ functions. */ ++ ++void ++ipa_struct_reorg::propagate_escape_via_ext_func_types (void) ++{ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n propagate_escape_via_ext_func_types: \n\n"); ++ unsigned i = 0; ++ hash_set visited_types; ++ while (i < ext_func_types.length ()) ++ { ++ visited_types.add (ext_func_types[i]); ++ unsigned j = 0; ++ srfield * field; ++ FOR_EACH_VEC_ELT (ext_func_types[i]->fields, j, field) ++ { ++ if (field->type) ++ { ++ if (!field->type->has_escaped ()) ++ field->type->mark_escape (escape_dependent_type_escapes, NULL); ++ if (!visited_types.contains (field->type)) ++ ext_func_types.safe_push (field->type); ++ } ++ } ++ i++; ++ } ++} ++ + /* Prune the escaped types and their decls from what was recorded. */ + + void + ipa_struct_reorg::prune_escaped_types (void) + { +- if (current_mode != COMPLETE_STRUCT_RELAYOUT) ++ if (current_mode != COMPLETE_STRUCT_RELAYOUT ++ && current_mode != STRUCT_REORDER_FIELDS) + { ++ /* Detect recusive types and mark them as escaping. */ + detect_cycles (); ++ /* If contains or is contained by the escape type, ++ mark them as escaping. */ + propagate_escape (); + } ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ propagate_escape_via_original (); ++ propagate_escape_via_empty_with_no_original (); ++ propagate_escape_via_ext_func_types (); ++ } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "all types (after prop but before pruning):\n"); ++ fprintf (dump_file, "==============================================\n\n"); ++ fprintf (dump_file, "all types (after prop but before pruning): \n\n"); + dump_types (dump_file); +- fprintf (dump_file, "all functions (after prop but before pruning):\n"); ++ fprintf (dump_file, "all functions (after prop but before pruning): \n"); + dump_functions (dump_file); + } + +@@ -3721,7 +5038,8 @@ ipa_struct_reorg::prune_escaped_types (void) + /* Prune functions which don't refer to any variables any more. */ + if (function->args.is_empty () + && function->decls.is_empty () +- && function->globals.is_empty ()) ++ && function->globals.is_empty () ++ && current_mode != STRUCT_REORDER_FIELDS) + { + delete function; + functions.ordered_remove (i); +@@ -3746,24 +5064,31 @@ ipa_struct_reorg::prune_escaped_types (void) + + /* Prune types that escape, all references to those types + will have been removed in the above loops. */ +- for (unsigned i = 0; i < types.length ();) ++ /* The escape type is not deleted in STRUCT_REORDER_FIELDS, ++ Then the type that contains the escaped type fields ++ can find complete information. */ ++ if (current_mode != STRUCT_REORDER_FIELDS) + { +- srtype *type = types[i]; +- if (type->has_escaped ()) ++ for (unsigned i = 0; i < types.length ();) + { +- /* All references to this type should have been removed now. */ +- delete type; +- types.ordered_remove (i); ++ srtype *type = types[i]; ++ if (type->has_escaped ()) ++ { ++ /* All references to this type should have been removed now. */ ++ delete type; ++ types.ordered_remove (i); ++ } ++ else ++ i++; + } +- else +- i++; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "all types (after pruning):\n"); ++ fprintf (dump_file, "==============================================\n\n"); ++ fprintf (dump_file, "========= all types (after pruning): =========\n\n"); + dump_types (dump_file); +- fprintf (dump_file, "all functions (after pruning):\n"); ++ fprintf (dump_file, "======== all functions (after pruning): ========\n"); + dump_functions (dump_file); + } + } +@@ -3790,6 +5115,26 @@ ipa_struct_reorg::create_new_types (void) + for (unsigned i = 0; i < types.length (); i++) + newtypes += types[i]->create_new_type (); + ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ for (unsigned i = 0; i < types.length (); i++) ++ { ++ auto_vec *fields = fields_to_finish.get (types[i]->type); ++ if (fields) ++ { ++ for (unsigned j = 0; j < fields->length (); j++) ++ { ++ tree field = (*fields)[j]; ++ TREE_TYPE (field) ++ = reconstruct_complex_type (TREE_TYPE (field), ++ types[i]->newtype[0]); ++ } ++ } ++ } ++ for (unsigned i = 0; i < types.length (); i++) ++ layout_type (types[i]->newtype[0]); ++ } ++ + if (dump_file) + { + if (newtypes) +@@ -3894,7 +5239,8 @@ ipa_struct_reorg::create_new_args (cgraph_node *new_node) + char *name = NULL; + if (tname) + { +- name = concat (tname, ".reorg.0", NULL); ++ name = concat (tname, current_mode == STRUCT_REORDER_FIELDS ++ ? ".reorder.0" : ".reorg.0", NULL); + new_name = get_identifier (name); + free (name); + } +@@ -3980,9 +5326,10 @@ ipa_struct_reorg::create_new_functions (void) + fprintf (dump_file, "\n"); + } + statistics_counter_event (NULL, "Create new function", 1); +- new_node = node->create_version_clone_with_body (vNULL, NULL, +- NULL, NULL, NULL, +- "struct_reorg"); ++ new_node = node->create_version_clone_with_body ( ++ vNULL, NULL, NULL, NULL, NULL, ++ current_mode == STRUCT_REORDER_FIELDS ++ ? "struct_reorder" : "struct_reorg"); + new_node->can_change_signature = node->can_change_signature; + new_node->make_local (); + f->newnode = new_node; +@@ -4026,6 +5373,7 @@ ipa_struct_reorg::rewrite_expr (tree expr, + srfield *f; + bool realpart, imagpart; + bool address; ++ bool escape_from_base = false; + + tree newbase[max_split]; + memset (newexpr, 0, sizeof (tree[max_split])); +@@ -4043,8 +5391,8 @@ ipa_struct_reorg::rewrite_expr (tree expr, + return true; + } + +- if (!get_type_field (expr, base, indirect, t, f, +- realpart, imagpart, address)) ++ if (!get_type_field (expr, base, indirect, t, f, realpart, imagpart, ++ address, escape_from_base)) + return false; + + /* If the type is not changed, then just return false. */ +@@ -4107,7 +5455,38 @@ ipa_struct_reorg::rewrite_expr (tree expr, + if (address) + newbase1 = build_fold_addr_expr (newbase1); + if (indirect) +- newbase1 = build_simple_mem_ref (newbase1); ++ { ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ /* Supports the MEM_REF offset. ++ _1 = MEM[(struct arc *)ap_1 + 72B].flow; ++ Old rewrite: _1 = ap.reorder.0_8->flow; ++ New rewrite: _1 ++ = MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow; ++ */ ++ HOST_WIDE_INT offset_tmp = 0; ++ HOST_WIDE_INT mem_offset = 0; ++ bool realpart_tmp = false; ++ bool imagpart_tmp = false; ++ tree accesstype_tmp = NULL_TREE; ++ tree num = NULL_TREE; ++ get_ref_base_and_offset (expr, offset_tmp, ++ realpart_tmp, imagpart_tmp, ++ accesstype_tmp, &num); ++ ++ tree ptype = TREE_TYPE (newbase1); ++ /* Specify the correct size for the multi-layer pointer. */ ++ tree size = isptrptr (ptype) ? TYPE_SIZE_UNIT (ptype) : ++ TYPE_SIZE_UNIT (inner_type (ptype)); ++ mem_offset = (num != NULL) ++ ? TREE_INT_CST_LOW (num) * tree_to_shwi (size) ++ : 0; ++ newbase1 = build2 (MEM_REF, TREE_TYPE (ptype), newbase1, ++ build_int_cst (ptype, mem_offset)); ++ } ++ else ++ newbase1 = build_simple_mem_ref (newbase1); ++ } + newexpr[i] = build3 (COMPONENT_REF, TREE_TYPE (f->newfield[i]), + newbase1, f->newfield[i], NULL_TREE); + if (imagpart) +@@ -4151,8 +5530,12 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + return remove; + } + +- if (gimple_assign_rhs_code (stmt) == EQ_EXPR +- || gimple_assign_rhs_code (stmt) == NE_EXPR) ++ if ((current_mode != STRUCT_REORDER_FIELDS ++ && (gimple_assign_rhs_code (stmt) == EQ_EXPR ++ || gimple_assign_rhs_code (stmt) == NE_EXPR)) ++ || (current_mode == STRUCT_REORDER_FIELDS ++ && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) ++ == tcc_comparison))) + { + tree rhs1 = gimple_assign_rhs1 (stmt); + tree rhs2 = gimple_assign_rhs2 (stmt); +@@ -4160,6 +5543,10 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + tree newrhs2[max_split]; + tree_code rhs_code = gimple_assign_rhs_code (stmt); + tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && rhs_code != EQ_EXPR && rhs_code != NE_EXPR) ++ code = rhs_code; ++ + if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2)) + return false; + tree newexpr = NULL_TREE; +@@ -4201,20 +5588,78 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + internal_error ( + "The rhs of pointer is not a multiplicate and it slips through"); + +- num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num); ++ /* Add the judgment of num, support for POINTER_DIFF_EXPR. ++ _6 = _4 + _5; ++ _5 = (long unsigned int) _3; ++ _3 = _1 - old_2. */ ++ if (current_mode != STRUCT_REORDER_FIELDS ++ || (current_mode == STRUCT_REORDER_FIELDS && (num != NULL))) ++ num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num); + for (unsigned i = 0; i < max_split && newlhs[i]; i++) + { + gimple *new_stmt; + +- tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhs[i]))); +- newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize); +- new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR, +- newrhs[i], newsize); ++ if (num != NULL) ++ { ++ tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhs[i]))); ++ newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, ++ newsize); ++ new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR, ++ newrhs[i], newsize); ++ } ++ else ++ new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR, ++ newrhs[i], rhs2); + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + remove = true; + } + return remove; + } ++ ++ /* Support POINTER_DIFF_EXPR rewriting. */ ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR) ++ { ++ tree rhs1 = gimple_assign_rhs1 (stmt); ++ tree rhs2 = gimple_assign_rhs2 (stmt); ++ tree newrhs1[max_split]; ++ tree newrhs2[max_split]; ++ ++ bool r1 = rewrite_expr (rhs1, newrhs1); ++ bool r2 = rewrite_expr (rhs2, newrhs2); ++ ++ if (r1 != r2) ++ { ++ /* Handle NULL pointer specially. */ ++ if (r1 && !r2 && integer_zerop (rhs2)) ++ { ++ r2 = true; ++ for (unsigned i = 0; i < max_split && newrhs1[i]; i++) ++ newrhs2[i] = fold_convert (TREE_TYPE (newrhs1[i]), rhs2); ++ } ++ else if (r2 && !r1 && integer_zerop (rhs1)) ++ { ++ r1 = true; ++ for (unsigned i = 0; i < max_split && newrhs2[i]; i++) ++ newrhs1[i] = fold_convert (TREE_TYPE (newrhs2[i]), rhs1); ++ } ++ else ++ return false; ++ } ++ else if (!r1 && !r2) ++ return false; ++ ++ /* The two operands always have pointer/reference type. */ ++ for (unsigned i = 0; i < max_split && newrhs1[i] && newrhs2[i]; i++) ++ { ++ gimple_assign_set_rhs1 (stmt, newrhs1[i]); ++ gimple_assign_set_rhs2 (stmt, newrhs2[i]); ++ update_stmt (stmt); ++ } ++ remove = false; ++ return remove; ++ } ++ + if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) + { + tree lhs = gimple_assign_lhs (stmt); +@@ -4222,9 +5667,8 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "rewriting statement:\n"); ++ fprintf (dump_file, "\nrewriting stamtenet:\n"); + print_gimple_stmt (dump_file, stmt, 0); +- fprintf (dump_file, "\n"); + } + tree newlhs[max_split]; + tree newrhs[max_split]; +@@ -4271,7 +5715,7 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + if (!decl || !decl->type) + return false; + srtype *type = decl->type; +- tree num = allocate_size (type, stmt); ++ tree num = allocate_size (type, decl, stmt); + gcc_assert (num); + memset (newrhs1, 0, sizeof (newrhs1)); + +@@ -4291,7 +5735,10 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + /* Go through each new lhs. */ + for (unsigned i = 0; i < max_split && decl->newdecl[i]; i++) + { +- tree newsize = TYPE_SIZE_UNIT (type->type); ++ /* Specify the correct size for the multi-layer pointer. */ ++ tree newsize = isptrptr (decl->orig_type) ++ ? TYPE_SIZE_UNIT (decl->orig_type) ++ : TYPE_SIZE_UNIT (type->newtype[i]); + gimple *g; + /* Every allocation except for calloc needs + the size multiplied out. */ +@@ -4352,6 +5799,23 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + gcc_assert (node); + srfunction *f = find_function (node); + ++ /* Add a safe func mechanism. */ ++ if (current_mode == STRUCT_REORDER_FIELDS && f && f->is_safe_func) ++ { ++ tree expr = gimple_call_arg (stmt, 0); ++ tree newexpr[max_split]; ++ if (!rewrite_expr (expr, newexpr)) ++ return false; ++ ++ if (newexpr[1] == NULL) ++ { ++ gimple_call_set_arg (stmt, 0, newexpr[0]); ++ update_stmt (stmt); ++ return false; ++ } ++ return false; ++ } ++ + /* Did not find the function or had not cloned it return saying don't + change the function call. */ + if (!f || !f->newf) +@@ -4437,7 +5901,7 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME) + SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; + +- gsi_replace (gsi, new_stmt, false); ++ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + + /* We need to defer cleaning EH info on the new statement to + fixup-cfg. We may not have dominator information at this point +@@ -4450,7 +5914,7 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + add_stmt_to_eh_lp (new_stmt, lp_nr); + } + +- return false; ++ return true; + } + + /* Rewrite the conditional statement STMT. Return TRUE if the +@@ -4462,50 +5926,52 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) + tree_code rhs_code = gimple_cond_code (stmt); + + /* Handle only equals or not equals conditionals. */ +- if (rhs_code != EQ_EXPR +- && rhs_code != NE_EXPR) ++ if ((current_mode != STRUCT_REORDER_FIELDS ++ && (rhs_code != EQ_EXPR && rhs_code != NE_EXPR)) ++ || (current_mode == STRUCT_REORDER_FIELDS ++ && TREE_CODE_CLASS (rhs_code) != tcc_comparison)) + return false; +- tree rhs1 = gimple_cond_lhs (stmt); +- tree rhs2 = gimple_cond_rhs (stmt); ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "COND: Rewriting\n"); ++ fprintf (dump_file, "\nCOND: Rewriting\n"); + print_gimple_stmt (dump_file, stmt, 0); ++ print_generic_expr (dump_file, lhs); + fprintf (dump_file, "\n"); +- print_generic_expr (dump_file, rhs1); +- fprintf (dump_file, "\n"); +- print_generic_expr (dump_file, rhs2); ++ print_generic_expr (dump_file, rhs); + fprintf (dump_file, "\n"); + } + +- tree newrhs1[max_split]; +- tree newrhs2[max_split]; +- tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; +- if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2)) ++ tree newlhs[max_split] = {}; ++ tree newrhs[max_split] = {}; ++ if (!rewrite_lhs_rhs (lhs, rhs, newlhs, newrhs)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) +- fprintf (dump_file, "\nDid nothing to statement.\n"); ++ fprintf (dump_file, "Did nothing to statement.\n"); + return false; + } + +- tree newexpr = NULL_TREE; +- for (unsigned i = 0; i < max_split && newrhs1[i]; i++) ++ /* Old rewrite: if (x_1 != 0B) ++ -> _1 = x.reorder.0_1 != 0B; if (_1 != 1) ++ The logic is incorrect. ++ New rewrite: if (x_1 != 0B) ++ -> if (x.reorder.0_1 != 0B); */ ++ for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++) + { +- tree expr = gimplify_build2 (gsi, rhs_code, boolean_type_node, +- newrhs1[i], newrhs2[i]); +- if (!newexpr) +- newexpr = expr; +- else +- newexpr = gimplify_build2 (gsi, code, boolean_type_node, +- newexpr, expr); +- } +- +- if (newexpr) +- { +- gimple_cond_set_lhs (stmt, newexpr); +- gimple_cond_set_rhs (stmt, boolean_true_node); ++ if (newlhs[i]) ++ gimple_cond_set_lhs (stmt, newlhs[i]); ++ if (newrhs[i]) ++ gimple_cond_set_rhs (stmt, newrhs[i]); + update_stmt (stmt); ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "replaced with:\n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ fprintf (dump_file, "\n"); ++ } + } + return false; + } +@@ -4516,6 +5982,9 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) + bool + ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *) + { ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ /* Delete debug gimple now. */ ++ return true; + bool remove = false; + if (gimple_debug_bind_p (stmt)) + { +@@ -4568,7 +6037,7 @@ ipa_struct_reorg::rewrite_phi (gphi *phi) + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "\nrewriting PHI:"); ++ fprintf (dump_file, "\nrewriting PHI:\n"); + print_gimple_stmt (dump_file, phi, 0); + } + +@@ -4579,7 +6048,15 @@ ipa_struct_reorg::rewrite_phi (gphi *phi) + { + tree newrhs[max_split]; + phi_arg_d rhs = *gimple_phi_arg (phi, i); +- rewrite_expr (rhs.def, newrhs); ++ ++ /* Handling the NULL phi Node. */ ++ bool r = rewrite_expr (rhs.def, newrhs); ++ if (!r && integer_zerop (rhs.def)) ++ { ++ for (unsigned i = 0; i < max_split && newlhs[i]; i++) ++ newrhs[i] = fold_convert (TREE_TYPE (newlhs[i]), rhs.def); ++ } ++ + for (unsigned j = 0; j < max_split && newlhs[j]; j++) + { + SET_PHI_ARG_DEF (newphi[j], i, newrhs[j]); +@@ -4590,7 +6067,7 @@ ipa_struct_reorg::rewrite_phi (gphi *phi) + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "\ninto\n:"); ++ fprintf (dump_file, "into:\n"); + for (unsigned i = 0; i < max_split && newlhs[i]; i++) + { + print_gimple_stmt (dump_file, newphi[i], 0); +@@ -4663,12 +6140,59 @@ ipa_struct_reorg::rewrite_functions (void) + /* Create new types, if we did not create any new types, + then don't rewrite any accesses. */ + if (!create_new_types ()) +- return 0; ++ { ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ for (unsigned i = 0; i < functions.length (); i++) ++ { ++ srfunction *f = functions[i]; ++ cgraph_node *node = f->node; ++ push_cfun (DECL_STRUCT_FUNCTION (node->decl)); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nNo rewrite:\n"); ++ dump_function_to_file (current_function_decl, dump_file, ++ dump_flags | TDF_VOPS); ++ } ++ pop_cfun (); ++ } ++ } ++ return 0; ++ } ++ ++ if (current_mode == STRUCT_REORDER_FIELDS && dump_file) ++ { ++ fprintf (dump_file, "=========== all created newtypes: ===========\n\n"); ++ dump_newtypes (dump_file); ++ } + + if (functions.length ()) + { + retval = TODO_remove_functions; + create_new_functions (); ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ prune_escaped_types (); ++ } ++ } ++ ++ if (current_mode == STRUCT_REORDER_FIELDS) ++ { ++ for (unsigned i = 0; i < functions.length (); i++) ++ { ++ srfunction *f = functions[i]; ++ cgraph_node *node = f->node; ++ push_cfun (DECL_STRUCT_FUNCTION (node->decl)); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "==== Before create decls: %dth_%s ====\n\n", ++ i, f->node->name ()); ++ if (current_function_decl) ++ dump_function_to_file (current_function_decl, dump_file, ++ dump_flags | TDF_VOPS); ++ } ++ pop_cfun (); ++ } + } + + create_new_decls (); +@@ -4691,9 +6215,12 @@ ipa_struct_reorg::rewrite_functions (void) + + if (dump_file && (dump_flags & TDF_DETAILS)) + { +- fprintf (dump_file, "\nBefore rewrite:\n"); ++ fprintf (dump_file, "\nBefore rewrite: %dth_%s\n", ++ i, f->node->name ()); + dump_function_to_file (current_function_decl, dump_file, + dump_flags | TDF_VOPS); ++ fprintf (dump_file, "\n======== Start to rewrite: %dth_%s ========\n", ++ i, f->node->name ()); + } + FOR_EACH_BB_FN (bb, cfun) + { +@@ -4761,9 +6288,10 @@ ipa_struct_reorg::rewrite_functions (void) + + free_dominance_info (CDI_DOMINATORS); + +- if (dump_file && (dump_flags & TDF_DETAILS)) ++ if (dump_file) + { +- fprintf (dump_file, "\nAfter rewrite:\n"); ++ fprintf (dump_file, "\nAfter rewrite: %dth_%s\n", ++ i, f->node->name ()); + dump_function_to_file (current_function_decl, dump_file, + dump_flags | TDF_VOPS); + } +@@ -4809,16 +6337,21 @@ ipa_struct_reorg::execute (enum srmode mode) + { + unsigned int ret = 0; + +- if (mode == NORMAL) ++ if (dump_file) ++ fprintf (dump_file, "\n\n====== ipa_struct_reorg level %d ======\n\n", ++ mode); ++ ++ if (mode == NORMAL || mode == STRUCT_REORDER_FIELDS) + { +- current_mode = NORMAL; +- /* FIXME: If there is a top-level inline-asm, ++ current_mode = mode; ++ /* If there is a top-level inline-asm, + the pass immediately returns. */ + if (symtab->first_asm_symbol ()) + return 0; + record_accesses (); + prune_escaped_types (); +- analyze_types (); ++ if (current_mode == NORMAL) ++ analyze_types (); + + ret = rewrite_functions (); + } +@@ -4881,7 +6414,55 @@ pass_ipa_struct_reorg::gate (function *) + && flag_lto_partition == LTO_PARTITION_ONE + /* Only enable struct optimizations in C since other + languages' grammar forbid. */ +- && lang_c_p ()); ++ && lang_c_p () ++ /* Only enable struct optimizations in lto or whole_program. */ ++ && (in_lto_p || flag_whole_program)); ++} ++ ++const pass_data pass_data_ipa_reorder_fields = ++{ ++ SIMPLE_IPA_PASS, // type ++ "reorder_fields", // name ++ OPTGROUP_NONE, // optinfo_flags ++ TV_IPA_REORDER_FIELDS, // tv_id ++ 0, // properties_required ++ 0, // properties_provided ++ 0, // properties_destroyed ++ 0, // todo_flags_start ++ 0, // todo_flags_finish ++}; ++ ++class pass_ipa_reorder_fields : public simple_ipa_opt_pass ++{ ++public: ++ pass_ipa_reorder_fields (gcc::context *ctxt) ++ : simple_ipa_opt_pass (pass_data_ipa_reorder_fields, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *); ++ virtual unsigned int execute (function *) ++ { ++ unsigned int ret = 0; ++ ret = ipa_struct_reorg ().execute (STRUCT_REORDER_FIELDS); ++ return ret; ++ } ++ ++}; // class pass_ipa_reorder_fields ++ ++bool ++pass_ipa_reorder_fields::gate (function *) ++{ ++ return (optimize >= 3 ++ && flag_ipa_reorder_fields ++ /* Don't bother doing anything if the program has errors. */ ++ && !seen_error () ++ && flag_lto_partition == LTO_PARTITION_ONE ++ /* Only enable struct optimizations in C since other ++ languages' grammar forbid. */ ++ && lang_c_p () ++ /* Only enable struct optimizations in lto or whole_program. */ ++ && (in_lto_p || flag_whole_program)); + } + + } // anon namespace +@@ -4891,4 +6472,10 @@ simple_ipa_opt_pass * + make_pass_ipa_struct_reorg (gcc::context *ctxt) + { + return new pass_ipa_struct_reorg (ctxt); +-} +\ No newline at end of file ++} ++ ++simple_ipa_opt_pass * ++make_pass_ipa_reorder_fields (gcc::context *ctxt) ++{ ++ return new pass_ipa_reorder_fields (ctxt); ++} +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +index ef7f4c780..6f85adeb4 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +@@ -68,12 +68,14 @@ struct srfunction + auto_vec args; + auto_vec globals; + auto_vec_del decls; +- srdecl *record_decl (srtype *, tree, int arg); ++ srdecl *record_decl (srtype *, tree, int arg, tree orig_type = NULL); + + srfunction *old; + cgraph_node *newnode; + srfunction *newf; + ++ bool is_safe_func; ++ + // Constructors + srfunction (cgraph_node *n); + +@@ -184,6 +186,11 @@ struct srfield + void create_new_fields (tree newtype[max_split], + tree newfields[max_split], + tree newlast[max_split]); ++ void reorder_fields (tree newfields[max_split], tree newlast[max_split], ++ tree &field); ++ void create_new_reorder_fields (tree newtype[max_split], ++ tree newfields[max_split], ++ tree newlast[max_split]); + }; + + struct sraccess +@@ -221,8 +228,11 @@ struct srdecl + + tree newdecl[max_split]; + ++ /* Auxiliary record complete original type information of the void* type. */ ++ tree orig_type; ++ + // Constructors +- srdecl (srtype *type, tree decl, int argumentnum = -1); ++ srdecl (srtype *type, tree decl, int argumentnum = -1, tree orgtype = NULL); + + // Methods + void dump (FILE *file); +diff --git a/gcc/passes.def b/gcc/passes.def +index 9692066e4..bdc835b87 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -178,6 +178,7 @@ along with GCC; see the file COPYING3. If not see + compiled unit. */ + INSERT_PASSES_AFTER (all_late_ipa_passes) + NEXT_PASS (pass_ipa_pta); ++ NEXT_PASS (pass_ipa_reorder_fields); + /* FIXME: this should be a normal IP pass. */ + NEXT_PASS (pass_ipa_struct_reorg); + NEXT_PASS (pass_omp_simd_clone); +diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h +index 3fe64047c..6fa529eee 100644 +--- a/gcc/symbol-summary.h ++++ b/gcc/symbol-summary.h +@@ -105,7 +105,7 @@ protected: + { + /* In structure optimizatons, we call new to ensure that + the allocated memory is initialized to 0. */ +- if (flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) + return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T () + : new T (); + +@@ -122,7 +122,7 @@ protected: + ggc_delete (item); + else + { +- if (flag_ipa_struct_reorg) ++ if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) + delete item; + else + m_allocator.remove (item); +diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c +new file mode 100644 +index 000000000..b95be2dab +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c +@@ -0,0 +1,75 @@ ++// escape_instance_field, "Type escapes via a field of instance". ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++ node_t node; ++} network_t; ++ ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++ network_t* net_add; ++ node_t node; ++}; ++ ++ ++const int MAX = 100; ++ ++/* let it escape_array, "Type is used in an array [not handled yet]". */ ++network_t* net[2]; ++ ++int ++main () ++{ ++ net[0] = (network_t*) calloc (1, sizeof(network_t)); ++ net[0]->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ ++ /* Contains an escape type and has structure instance field. */ ++ net[0]->arcs->node = net[0]->node; ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c +new file mode 100644 +index 000000000..3d243313b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c +@@ -0,0 +1,94 @@ ++// Verify in escape_dependent_type_escapes, ++// the multi-layer dereference is rewriting correctly,and the memory access ++// is correct. ++ ++// release ++// escape_dependent_type_escapes, ++// "Type uses a type which escapes or is used by a type which escapes" ++// avoid escape_cast_another_ptr, "Type escapes a cast to a different pointer" ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++ network_t* net_add; ++}; ++ ++ ++const int MAX = 100; ++ ++/* let it escape_array, "Type is used in an array [not handled yet]". */ ++network_t* net[2]; ++arc_p stop_arcs = NULL; ++ ++int ++main () ++{ ++ net[0] = (network_t*) calloc (1, sizeof(network_t)); ++ net[0]->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ ++ net[0]->arcs->id = 100; ++ ++ for (unsigned i = 0; i < 3; i++) ++ { ++ net[0]->arcs->id = net[0]->arcs->id + 2; ++ stop_arcs->cost = net[0]->arcs->id / 2; ++ stop_arcs->net_add = net[0]; ++ printf("stop_arcs->cost = %ld\n", stop_arcs->cost); ++ net[0]->arcs++; ++ stop_arcs++; ++ } ++ ++ if( net[1] != 0 && stop_arcs != 0) ++ { ++ return -1; ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c +new file mode 100644 +index 000000000..faaf1e3a5 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c +@@ -0,0 +1,24 @@ ++/* check_ptr_layers bugfix.*/ ++/* { dg-do compile } */ ++struct { ++ char a; ++} **b = 0, *e = 0; ++long c; ++char d = 9; ++int f; ++ ++void g() ++{ ++ for (; f;) ++ if (c) ++ (*e).a++; ++ if (!d) ++ for (;;) ++ b &&c; ++} ++int ++main() ++{ ++ g(); ++} ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c +new file mode 100644 +index 000000000..886706ae9 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c +@@ -0,0 +1,82 @@ ++// bugfix: ++// Common members do not need to reconstruct. ++// Otherwise, eg:int* -> int** and void* -> void**. ++/* { dg-do compile } */ ++ ++#include ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t* cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t** org_cost; ++}; ++ ++struct a ++{ ++ int t; ++ int t1; ++}; ++ ++__attribute__((noinline)) int ++f(int i, int j) ++{ ++ struct a *t = NULL; ++ struct a t1 = {i, j}; ++ t = &t1; ++ auto int g(void) __attribute__((noinline)); ++ int g(void) ++ { ++ return t->t + t->t1; ++ } ++ return g(); ++} ++ ++arc_t **ap = NULL; ++const int MAX = 100; ++ ++int ++main() ++{ ++ if (f(1, 2) != 3) ++ { ++ abort (); ++ } ++ ap = (arc_t**) malloc(MAX * sizeof(arc_t*)); ++ (*ap)[0].id = 300; ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c +new file mode 100644 +index 000000000..f3785f392 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c +@@ -0,0 +1,56 @@ ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++#define MallocOrDie(x) sre_malloc((x)) ++ ++struct gki_elem { ++ char *key; ++ int idx; ++ struct gki_elem *nxt; ++}; ++ ++typedef struct { ++ struct gki_elem **table; ++ ++ int primelevel; ++ int nhash; ++ int nkeys; ++} GKI; ++ ++void ++Die(char *format, ...) ++{ ++ exit(1); ++} ++ ++void * ++sre_malloc(size_t size) ++{ ++ void *ptr; ++ ++ if ((ptr = malloc (size)) == NULL) ++ { ++ Die("malloc of %ld bytes failed", size); ++ } ++ return ptr; ++} ++ ++ ++__attribute__((noinline)) int ++GKIStoreKey(GKI *hash, char *key) ++{ ++ hash->table[0] = MallocOrDie(sizeof(struct gki_elem)); ++} ++ ++int ++main () ++{ ++ GKI *hash; ++ char *key; ++ GKIStoreKey(hash, key); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c +new file mode 100644 +index 000000000..1415d759a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c +@@ -0,0 +1,60 @@ ++// verify newarc[cmp-1].flow ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++arc_p ap = NULL; ++ ++int ++main () ++{ ++ ap = (arc_p) calloc(MAX, sizeof(arc_t)); ++ printf("%d\n", ap[0].id); ++ for (int i = 1; i < MAX; i++) ++ { ++ ap[i-1].id = 500; ++ } ++ printf("%d\n", ap[0].id); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c +new file mode 100644 +index 000000000..003da0b57 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c +@@ -0,0 +1,83 @@ ++// release type is used by a type which escapes. ++// avoid escape_cast_another_ptr, "Type escapes a cast to a different pointer" ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++network_t* net = NULL; ++arc_p stop_arcs = NULL; ++int cnt = 0; ++ ++int ++main () ++{ ++ net = (network_t*) calloc (1, 20); ++ net->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ if(!(net->arcs)) ++ { ++ return -1; ++ } ++ ++ for( int i = 0; i < MAX; i++, net->arcs = stop_arcs) ++ { ++ cnt++; ++ } ++ ++ net = (network_t*) calloc (1, 20); ++ if( !(net->arcs) ) ++ { ++ return -1; ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_external_func_types.c b/gcc/testsuite/gcc.dg/struct/rf_external_func_types.c +new file mode 100644 +index 000000000..84a34f241 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_external_func_types.c +@@ -0,0 +1,69 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-shared" } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ int x; ++ arc_p arcs, sorted_arcs; ++ node_p nodes, stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++extern int bcf_sr_add_reader (network_t *); ++extern int bcf_hdr_dup (arc_p); ++ ++int ++test () ++{ ++ network_t *net = (network_t *) calloc (1, 20); ++ ++ if (!bcf_sr_add_reader(net)) ++ printf("error"); ++ arc_p arc = net->nodes->basic_arc; ++ if(!bcf_hdr_dup(arc)) ++ { ++ return -1; ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c +new file mode 100644 +index 000000000..10dcf098c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c +@@ -0,0 +1,72 @@ ++// release escape_cast_another_ptr, "Type escapes a cast to a different pointer" ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++typedef int cmp_t(const void *, const void *); ++ ++__attribute__((noinline)) void ++spec_qsort(void *a, cmp_t *cmp) ++{ ++ char *pb = NULL; ++ while (cmp(pb, a)) ++ { ++ pb += 1; ++ } ++} ++ ++static int arc_compare( arc_t **a1, int a2 ) ++{ ++ if( (*a1)->id < a2 ) ++ { ++ return -1; ++ } ++ return 1; ++} ++ ++int ++main() ++{ ++ spec_qsort(NULL, (int (*)(const void *, const void *))arc_compare); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c +new file mode 100644 +index 000000000..8d1a9a114 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c +@@ -0,0 +1,58 @@ ++/* Supports the MEM_REF offset. ++ _1 = MEM[(struct arc *)ap_4 + 72B].flow; ++ Old rewrite:_1 = ap.reorder.0_8->flow; ++ New rewrite:_1 = MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow. */ ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main () ++{ ++ const int MAX = 100; ++ /* A similar scenario can be reproduced only by using local variables. */ ++ arc_p ap = NULL; ++ ap = (arc_p) calloc(MAX, sizeof(arc_t)); ++ printf("%d\n", ap[1].flow); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c +new file mode 100644 +index 000000000..23765fc56 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct T_HASH_ENTRY ++{ ++ unsigned int hash; ++ unsigned int klen; ++ char *key; ++} iHashEntry; ++ ++typedef struct T_HASH ++{ ++ unsigned int size; ++ unsigned int fill; ++ unsigned int keys; ++ ++ iHashEntry **array; ++} uHash; ++ ++uHash *retval; ++ ++int ++main() { ++ retval->array = (iHashEntry **)calloc(sizeof(iHashEntry *), retval->size); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c +new file mode 100644 +index 000000000..54e737ee8 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c +@@ -0,0 +1,109 @@ ++// For testing: ++/* ++Compile options: gcc -O3 -g ++-flto -flto-partition=one -fipa-reorder-fields -fipa-struct-reorg ++-v -save-temps -fdump-ipa-all-details test.c -o test ++ ++in COMPLETE_STRUCT_RELAYOUT pass: ++N type: struct node.reorder.0 new = "Type escapes a cast to a different pointer" ++copy$head_26 = test_arc.reorder.0_49->head; ++ ++type : struct arc.reorder.0(1599) { ++fields = { ++field (5382) {type = cost_t} ++field (5383) {type = struct node.reorder.0 *} // but node has escaped. ++field (5384) {type = struct node.reorder.0 *} ++field (5386) {type = struct arc.reorder.0 *} ++field (5387) {type = struct arc.reorder.0 *} ++field (5388) {type = flow_t} ++field (5389) {type = cost_t} ++field (5381) {type = int} ++field (5385) {type = short int} ++} ++ ++// The types of the two types are inconsistent after the rewriting. ++newarc_2(D)->tail = tail_1(D); ++vs ++struct_reorder.0_61(D)->tail = tail_1(D); ++*/ ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++__attribute__((noinline)) void ++replace_weaker_arc( arc_t *newarc, node_t *tail, node_t *head) ++{ ++ printf("test"); ++} ++ ++__attribute__((noinline)) int64_t ++switch_arcs(arc_t** deleted_arcs, arc_t* arcnew) ++{ ++ int64_t count = 0; ++ arc_t *test_arc, copy; ++ ++ if (!test_arc->ident) ++ { ++ copy = *test_arc; ++ count++; ++ *test_arc = arcnew[0]; ++ replace_weaker_arc(arcnew, NULL, NULL); ++ } ++ return count; ++} ++ ++int ++main () ++{ ++ switch_arcs(NULL, NULL); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c +new file mode 100644 +index 000000000..2ae46fb31 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c +@@ -0,0 +1,87 @@ ++// escape_cast_void, "Type escapes a cast to/from void*" ++// stop_393 = net.stop_nodes; void *stop; ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs, sorted_arcs; ++ int x; ++ node_p nodes, stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++network_t* net = NULL; ++int cnt = 0; ++ ++__attribute__((noinline)) int ++primal_feasible (network_t *net) ++{ ++ void* stop; ++ node_t *node; ++ ++ node = net->nodes; ++ stop = (void *)net->stop_nodes; ++ for( node++; node < (node_t *)stop; node++ ) ++ { ++ printf( "PRIMAL NETWORK SIMPLEX: " ); ++ } ++ return 0; ++} ++ ++int ++main () ++{ ++ net = (network_t*) calloc (1, 20); ++ net->nodes = calloc (MAX, sizeof (node_t)); ++ net->stop_nodes = calloc (MAX, sizeof (node_t)); ++ cnt = primal_feasible( net ); ++ ++ net = (network_t*) calloc (1, 20); ++ if( !(net->arcs) ) ++ { ++ return -1; ++ } ++ return cnt; ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c +new file mode 100644 +index 000000000..3a3c10b70 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c +@@ -0,0 +1,71 @@ ++// support POINTER_DIFF_EXPR & NOP_EXPR to avoid ++// escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt" ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main () ++{ ++ arc_t *old_arcs; ++ node_t *node; ++ node_t *stop; ++ size_t off; ++ network_t* net; ++ ++ for( ; node->number < stop->number; node++ ) ++ { ++ off = node->basic_arc - old_arcs; ++ node->basic_arc = (arc_t *)(net->arcs + off); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c +new file mode 100644 +index 000000000..7b7d110df +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c +@@ -0,0 +1,55 @@ ++// support NEGATE_EXPR rewriting ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main () ++{ ++ int64_t susp = 0; ++ const int MAX = 100; ++ arc_p ap = (arc_p) calloc(MAX, sizeof(arc_t)); ++ ap -= susp; ++ printf("%d\n", ap[1].flow); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c +new file mode 100644 +index 000000000..317aafa5f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c +@@ -0,0 +1,34 @@ ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++struct node ++{ ++ struct node *left, *right; ++ double a, b, c, d, e, f; ++} ++*a; ++int b, c; ++void ++CreateNode (struct node **p1) ++{ ++ *p1 = calloc (10, sizeof (struct node)); ++} ++ ++int ++main () ++{ ++ a->left = 0; ++ struct node *t = a; ++ CreateNode (&t->right); ++ ++ struct node p = *a; ++ b = 1; ++ if (p.left) ++ b = 0; ++ if (b) ++ printf (" Tree.\n"); ++} ++ ++/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c +new file mode 100644 +index 000000000..01a33f669 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c +@@ -0,0 +1,55 @@ ++// release escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]"; ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++arc_t **ap = NULL; ++ ++int ++main () ++{ ++ ap = (arc_t**) malloc(MAX * sizeof(arc_t*)); ++ (*ap)[0].id = 300; ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c +new file mode 100644 +index 000000000..a38556533 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c +@@ -0,0 +1,58 @@ ++// release escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]" ++ ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++arc_p **ap; ++ ++ ++int ++main () ++{ ++ ap = (arc_p**) calloc(MAX, sizeof(arc_p*)); ++ (**ap)[0].id = 500; ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c +new file mode 100644 +index 000000000..5c17ee528 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c +@@ -0,0 +1,57 @@ ++// release escape_rescusive_type, "Recusive type" ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++arc_p ap = NULL; ++ ++int ++main () ++{ ++ ap = (arc_p) calloc (MAX, sizeof (arc_t)); ++ ap[0].id = 100; ++ ap[0].head = (node_p) calloc (MAX, sizeof (node_t)); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c +new file mode 100644 +index 000000000..710517ee9 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c +@@ -0,0 +1,65 @@ ++// support more gimple assign rhs code ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++__attribute__((noinline)) int ++compare(arc_p p1, arc_p p2) ++{ ++ return p1 < p2; ++} ++ ++int n = 0; ++int m = 0; ++ ++int ++main () ++{ ++ scanf ("%d %d", &n, &m); ++ arc_p p = calloc (10, sizeof (struct arc)); ++ if (compare (&p[n], &p[m])) ++ { ++ printf ("ss!"); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c +new file mode 100644 +index 000000000..6ed0a5d2d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c +@@ -0,0 +1,72 @@ ++// rewrite_cond bugfixï¼› ++/* ++if (iterator_600 != 0B) ++old rewrite: _1369 = iterator.reorder.0_1249 != 0B; if (_1369 != 1) ++new rewrite: if (iterator.reorder.0_1249 != 0B) ++*/ ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct list_elem ++{ ++ arc_t* arc; ++ struct list_elem* next; ++}list_elem; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int i = 0; ++ ++int ++main () ++{ ++ register list_elem *first_list_elem; ++ register list_elem* iterator; ++ iterator = first_list_elem->next; ++ while (iterator) ++ { ++ iterator = iterator->next; ++ i++; ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c +new file mode 100644 +index 000000000..5a2dd964f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c +@@ -0,0 +1,58 @@ ++// support if (_150 >= _154) ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main() ++{ ++ arc_p **ap = (arc_p**) malloc(1 * sizeof(arc_p*)); ++ arc_p **arcs_pointer_sorted = (arc_p**) malloc(1 * sizeof(arc_p*)); ++ arcs_pointer_sorted[0] = (arc_p*) calloc (1, sizeof(arc_p)); ++ ++ if (arcs_pointer_sorted >= ap) ++ { ++ return -1; ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c +new file mode 100644 +index 000000000..faa90b42d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c +@@ -0,0 +1,81 @@ ++/* ++Exclude the rewriting error caused by ++first_list_elem = (list_elem *)NULL; ++rewriting PHI:first_list_elem_700 = PHI <0B(144), 0B(146)> ++into: ++first_list_elem.reorder.0_55 = PHI <(144), (146)> ++*/ ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct list_elem ++{ ++ arc_t* arc; ++ struct list_elem* next; ++}list_elem; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout, firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail, head; ++ short ident; ++ arc_p nextout, nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++ ++list_elem* new_list_elem; ++list_elem* first_list_elem; ++ ++int ++main () ++{ ++ int i = 0; ++ list_elem *first_list_elem; ++ list_elem *new_list_elem; ++ arc_t *arcout; ++ for( ; i < MAX && arcout->ident == -1; i++); ++ ++ first_list_elem = (list_elem *)NULL; ++ for( ; i < MAX; i++) ++ { ++ new_list_elem = (list_elem*) calloc(1, sizeof(list_elem)); ++ new_list_elem->next = first_list_elem; ++ first_list_elem = new_list_elem; ++ } ++ if (first_list_elem != 0) ++ { ++ return -1; ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_shwi.c b/gcc/testsuite/gcc.dg/struct/rf_shwi.c +new file mode 100644 +index 000000000..2bb326ff2 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_shwi.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++ ++struct foo {int dx; long dy; int dz; }; ++struct goo {long offset; struct foo* pfoo; }; ++ ++void* func (long); ++ ++__attribute__((used)) static void ++test(struct goo* g) ++{ ++ void* pvoid; ++ struct foo* f; ++ ++ for (f = g->pfoo; f->dx; f++) ++ { ++ if (f->dy) ++ break; ++ } ++ f--; ++ ++ pvoid = func(f->dz + g->offset); ++ return; ++} +diff --git a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c +new file mode 100644 +index 000000000..8f2da99cc +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c +@@ -0,0 +1,92 @@ ++// release escape_visible_function, "Type escapes via expternally visible function call" ++// compile options: gcc -O3 -fno-inline -fwhole-program ++// -flto-partition=one -fipa-struct-reorg arc_compare.c -fdump-ipa-all -S -v ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++__attribute__((noinline)) static int ++arc_compare( arc_t **a1, arc_t **a2 ) ++{ ++ if( (*a1)->flow > (*a2)->flow ) ++ { ++ return 1; ++ } ++ if( (*a1)->flow < (*a2)->flow ) ++ { ++ return -1; ++ } ++ if( (*a1)->id < (*a2)->id ) ++ { ++ return -1; ++ } ++ ++ return 1; ++} ++ ++__attribute__((noinline)) void ++spec_qsort(void *array, int nitems, int size, ++ int (*cmp)(const void*,const void*)) ++{ ++ for (int i = 0; i < nitems - 1; i++) ++ { ++ if (cmp (array , array)) ++ { ++ printf ("CMP 1\n"); ++ } ++ else ++ { ++ printf ("CMP 2\n"); ++ } ++ } ++} ++ ++typedef int cmp_t(const void *, const void *); ++ ++int ++main () ++{ ++ void *p = calloc (100, sizeof (arc_t **)); ++ spec_qsort (p, 100, 0, (int (*)(const void *, const void *))arc_compare); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c +new file mode 100644 +index 000000000..723142c59 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c +@@ -0,0 +1,54 @@ ++// Add a safe func mechanism. ++// avoid escape_unkown_field, "Type escapes via an unkown field accessed" ++// avoid escape_cast_void, "Type escapes a cast to/from void*" eg: GIMPLE_NOP ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++void ++__attribute__((noinline)) spec_qsort (void *a, size_t es) ++{ ++ char *pa; ++ char *pb; ++ int cmp_result; ++ ++ while ((*(arc_t **)a)->id < *((int *)a)) ++ { ++ if (cmp_result == 0) ++ { ++ spec_qsort (a, es); ++ pa = (char *)a - es; ++ a += es; ++ *(long *)pb = *(long *)pa; ++ } ++ else ++ { ++ a -= pa - pb; ++ } ++ } ++} ++ ++int ++main() ++{ ++ arc_p **arcs_pointer_sorted; ++ spec_qsort (arcs_pointer_sorted[0], sizeof (arc_p)); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +index 43913104e..5a476e8f9 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp ++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +@@ -27,8 +27,21 @@ set STRUCT_REORG_TORTURE_OPTIONS [list \ + + set-torture-options $STRUCT_REORG_TORTURE_OPTIONS {{}} + +-gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \ ++# -fipa-struct-reorg ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_*.c]] \ ++ "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_*.c]] \ + "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/struct_reorg*.cpp]] \ ++ "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/sr_*.c]] \ ++ "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/csr_*.c]] \ ++ "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" ++ ++# -fipa-reorder-fields ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf_*.c]] \ ++ "" "-fipa-reorder-fields -fdump-ipa-all -flto-partition=one -fwhole-program" + + # All done. + torture-finish +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c +index 6565fe8dd..23444fe8b 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c +@@ -1,5 +1,5 @@ + // { dg-do compile } +-// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" } ++// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all -fwhole-program" } + + struct a + { +@@ -21,4 +21,10 @@ int g(void) + return b->t; + } + ++int main() ++{ ++ f (); ++ return g (); ++} ++ + /* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c +index 5864ad46f..2d1f95c99 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c ++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c +@@ -1,5 +1,5 @@ + // { dg-do compile } +-// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" } ++// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all -fwhole-program" } + + #include + typedef struct { +@@ -10,7 +10,7 @@ typedef struct { + compile_stack_elt_t *stack; + unsigned size; + } compile_stack_type; +-void f (const char *p, const char *pend, int c) ++__attribute__((noinline)) void f (const char *p, const char *pend, int c) + { + compile_stack_type compile_stack; + while (p != pend) +@@ -20,4 +20,9 @@ void f (const char *p, const char *pend, int c) + * sizeof (compile_stack_elt_t)); + } + ++int main() ++{ ++ f (NULL, NULL, 1); ++} ++ + /* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 98a5a490f..2b27c858a 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -80,6 +80,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp") + DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") + DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") + DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") ++DEFTIMEVAR (TV_IPA_REORDER_FIELDS , "ipa struct reorder fields optimization") + DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") + DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") + DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 56898e019..a9ec8ed21 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -527,6 +527,7 @@ extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); ++extern simple_ipa_opt_pass *make_pass_ipa_reorder_fields (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); +-- +2.33.0 + diff --git a/0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch b/0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch new file mode 100644 index 0000000..2822078 --- /dev/null +++ b/0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch @@ -0,0 +1,1753 @@ +From 9d03b0a7741915e3a0172d60b9c21bf5abbda89e Mon Sep 17 00:00:00 2001 +From: Mingchuan Wu +Date: Mon, 28 Aug 2023 18:11:02 +0800 +Subject: [PATCH 22/22] [DFE] Add Dead Field Elimination in Struct-Reorg. + +We can transform gimple to eliminate fields that are never read +and replace the dead fields in stmt by creating a new ssa. +--- + gcc/common.opt | 4 + + gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 240 +++++++++++++++++- + gcc/ipa-struct-reorg/ipa-struct-reorg.h | 8 + + gcc/opts.cc | 17 ++ + gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c | 86 +++++++ + .../gcc.dg/struct/dfe_ele_minus_verify.c | 60 +++++ + .../gcc.dg/struct/dfe_extr_board_init.c | 77 ++++++ + gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c | 84 ++++++ + gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c | 56 ++++ + gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c | 162 ++++++++++++ + gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c | 126 +++++++++ + .../gcc.dg/struct/dfe_extr_mv_udc_core.c | 82 ++++++ + .../gcc.dg/struct/dfe_extr_tcp_usrreq.c | 58 +++++ + .../gcc.dg/struct/dfe_extr_ui_main.c | 61 +++++ + .../gcc.dg/struct/dfe_mem_ref_offset.c | 58 +++++ + .../struct/dfe_mul_layer_ptr_record_bug.c | 30 +++ + gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c | 71 ++++++ + .../gcc.dg/struct/dfe_ptr_negate_expr.c | 55 ++++ + gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c | 55 ++++ + gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 4 + + .../struct/wo_prof_escape_replace_type.c | 49 ++++ + 21 files changed, 1436 insertions(+), 7 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c + create mode 100644 gcc/testsuite/gcc.dg/struct/wo_prof_escape_replace_type.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index 14633c821..8bb735551 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1988,6 +1988,10 @@ fipa-struct-reorg + Common Var(flag_ipa_struct_reorg) Init(0) Optimization + Perform structure layout optimizations. + ++fipa-struct-reorg= ++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 3) ++-fipa-struct-reorg=[0,1,2,3] adding none, struct-reorg, reorder-fields, dfe optimizations. ++ + fipa-vrp + Common Var(flag_ipa_vrp) Optimization + Perform IPA Value Range Propagation. +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +index 3e5f9538b..eac5fac7e 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +@@ -87,6 +87,7 @@ along with GCC; see the file COPYING3. If not see + #include "tree-pretty-print.h" + #include "gimple-pretty-print.h" + #include "gimple-iterator.h" ++#include "gimple-walk.h" + #include "cfg.h" + #include "ssa.h" + #include "tree-dfa.h" +@@ -268,10 +269,43 @@ enum srmode + STRUCT_REORDER_FIELDS + }; + ++/* Enum the struct layout optimize level, ++ which should be the same as the option -fstruct-reorg=. */ ++ ++enum struct_layout_opt_level ++{ ++ NONE = 0, ++ STRUCT_REORG, ++ STRUCT_REORDER_FIELDS_SLO, ++ DEAD_FIELD_ELIMINATION ++}; ++ + static bool is_result_of_mult (tree arg, tree *num, tree struct_size); + static bool isptrptr (tree type); ++void get_base (tree &base, tree expr); + + srmode current_mode; ++hash_map replace_type_map; ++ ++/* Return true if one of these types is created by struct-reorg. */ ++ ++static bool ++is_replace_type (tree type1, tree type2) ++{ ++ if (replace_type_map.is_empty ()) ++ return false; ++ if (type1 == NULL_TREE || type2 == NULL_TREE) ++ return false; ++ tree *type_value = replace_type_map.get (type1); ++ if (type_value) ++ if (types_compatible_p (*type_value, type2)) ++ return true; ++ type_value = replace_type_map.get (type2); ++ if (type_value) ++ if (types_compatible_p (*type_value, type1)) ++ return true; ++ return false; ++} + + } // anon namespace + +@@ -353,7 +387,8 @@ srfield::srfield (tree field, srtype *base) + fielddecl (field), + base (base), + type (NULL), +- clusternum (0) ++ clusternum (0), ++ field_access (EMPTY_FIELD) + { + for (int i = 0; i < max_split; i++) + newfield[i] = NULL_TREE; +@@ -392,6 +427,25 @@ srtype::srtype (tree type) + } + } + ++/* Check it if all fields in the RECORD_TYPE are referenced. */ ++ ++bool ++srtype::has_dead_field (void) ++{ ++ bool may_dfe = false; ++ srfield *this_field; ++ unsigned i; ++ FOR_EACH_VEC_ELT (fields, i, this_field) ++ { ++ if (!(this_field->field_access & READ_FIELD)) ++ { ++ may_dfe = true; ++ break; ++ } ++ } ++ return may_dfe; ++} ++ + /* Mark the type as escaping type E at statement STMT. */ + + void +@@ -595,7 +649,17 @@ srtype::analyze (void) + into 2 different structures. In future we intend to add profile + info and/or static heuristics to differentiate splitting process. */ + if (fields.length () == 2) +- fields[1]->clusternum = 1; ++ { ++ /* Currently, when the replacement structure type exists, ++ we only split the replacement structure. */ ++ for (hash_map::iterator it = replace_type_map.begin (); ++ it != replace_type_map.end (); ++it) ++ { ++ if (types_compatible_p ((*it).second, this->type)) ++ return; ++ } ++ fields[1]->clusternum = 1; ++ } + + /* Otherwise we do nothing. */ + if (fields.length () >= 3) +@@ -838,6 +902,10 @@ srtype::create_new_type (void) + for (unsigned i = 0; i < fields.length (); i++) + { + srfield *f = fields[i]; ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ && !(f->field_access & READ_FIELD)) ++ continue; + f->create_new_fields (newtype, newfields, newlast); + } + +@@ -856,6 +924,16 @@ srtype::create_new_type (void) + + warn_padded = save_warn_padded; + ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && replace_type_map.get (this->newtype[0]) == NULL) ++ replace_type_map.put (this->newtype[0], this->type); ++ if (dump_file) ++ { ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ && has_dead_field ()) ++ fprintf (dump_file, "Dead field elimination.\n"); ++ } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Created %d types:\n", maxclusters); +@@ -1269,6 +1347,7 @@ public: + void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt); + + unsigned execute_struct_relayout (void); ++ bool remove_dead_field_stmt (tree lhs); + }; + + struct ipa_struct_relayout +@@ -3057,6 +3136,119 @@ ipa_struct_reorg::find_vars (gimple *stmt) + } + } + ++static HOST_WIDE_INT ++get_offset (tree op, HOST_WIDE_INT offset) ++{ ++ switch (TREE_CODE (op)) ++ { ++ case COMPONENT_REF: ++ { ++ return int_byte_position (TREE_OPERAND (op, 1)); ++ } ++ case MEM_REF: ++ { ++ return tree_to_uhwi (TREE_OPERAND (op, 1)); ++ } ++ default: ++ return offset; ++ } ++ return offset; ++} ++ ++/* Record field access. */ ++static void ++record_field_access (tree type, HOST_WIDE_INT offset, ++ unsigned access, void *data) ++{ ++ srtype *this_srtype = ((ipa_struct_reorg *)data)->find_type (type); ++ if (this_srtype == NULL) ++ return; ++ srfield *this_srfield = this_srtype->find_field (offset); ++ if (this_srfield == NULL) ++ return; ++ ++ this_srfield->field_access |= access; ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "record field access %d:", access); ++ print_generic_expr (dump_file, type); ++ fprintf (dump_file, " field:"); ++ print_generic_expr (dump_file, this_srfield->fielddecl); ++ fprintf (dump_file, "\n"); ++ } ++ return; ++ ++} ++ ++/* Update field_access in srfield. */ ++ ++static void ++update_field_access (tree node, tree op, unsigned access, void *data) ++{ ++ HOST_WIDE_INT offset = 0; ++ offset = get_offset (op, offset); ++ tree node_type = inner_type (TREE_TYPE (node)); ++ record_field_access (node_type, offset, access, data); ++ tree base = node; ++ get_base (base, node); ++ tree base_type = inner_type (TREE_TYPE (base)); ++ if (!types_compatible_p (base_type, node_type)) ++ { ++ record_field_access (base_type, get_offset (node, offset), ++ access, data); ++ } ++ return; ++} ++ ++/* A callback for walk_stmt_load_store_ops to visit store. */ ++ ++static bool ++find_field_p_store (gimple *stmt ATTRIBUTE_UNUSED, ++ tree node, tree op, void *data) ++{ ++ update_field_access (node, op, WRITE_FIELD, data); ++ ++ return false; ++} ++ ++/* A callback for walk_stmt_load_store_ops to visit load. */ ++ ++static bool ++find_field_p_load (gimple *stmt ATTRIBUTE_UNUSED, ++ tree node, tree op, void *data) ++{ ++ update_field_access (node, op, READ_FIELD, data); ++ ++ return false; ++} ++ ++/* Determine whether the stmt should be deleted. */ ++ ++bool ++ipa_struct_reorg::remove_dead_field_stmt (tree lhs) ++{ ++ tree base = NULL_TREE; ++ bool indirect = false; ++ srtype *t = NULL; ++ srfield *f = NULL; ++ bool realpart = false; ++ bool imagpart = false; ++ bool address = false; ++ bool escape_from_base = false; ++ if (!get_type_field (lhs, base, indirect, t, f, realpart, imagpart, ++ address, escape_from_base)) ++ return false; ++ if (t ==NULL) ++ return false; ++ if (t->newtype[0] == t->type) ++ return false; ++ if (f == NULL) ++ return false; ++ if (f->newfield[0] == NULL) ++ return true; ++ return false; ++} ++ + /* Maybe record access of statement for further analaysis. */ + + void +@@ -3078,6 +3270,13 @@ ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt) + default: + break; + } ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION) ++ { ++ /* Look for loads and stores. */ ++ walk_stmt_load_store_ops (stmt, this, find_field_p_load, ++ find_field_p_store); ++ } + } + + /* Calculate the multiplier. */ +@@ -3368,8 +3567,11 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, + } + else if (type != d->type) + { +- type->mark_escape (escape_cast_another_ptr, stmt); +- d->type->mark_escape (escape_cast_another_ptr, stmt); ++ if (!is_replace_type (d->type->type, type->type)) ++ { ++ type->mark_escape (escape_cast_another_ptr, stmt); ++ d->type->mark_escape (escape_cast_another_ptr, stmt); ++ } + } + /* x_1 = y.x_nodes; void *x; + Directly mark the structure pointer type assigned +@@ -3949,8 +4151,9 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl, + } + /* If we have a non void* or a decl (which is hard to track), + then mark the type as escaping. */ +- if (!VOID_POINTER_P (TREE_TYPE (newdecl)) +- || DECL_P (newdecl)) ++ if (replace_type_map.get (type->type) == NULL ++ && (!VOID_POINTER_P (TREE_TYPE (newdecl)) ++ || DECL_P (newdecl))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { +@@ -4216,7 +4419,9 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, + } + + srtype *t1 = find_type (inner_type (t)); +- if (t1 == type) ++ /* In the other side check, escape mark is added ++ when the replacement struct type exists. */ ++ if (t1 == type || is_replace_type (inner_type (t), type->type)) + { + /* In Complete Struct Relayout, if lhs type is the same + as rhs type, we could return without any harm. */ +@@ -5513,6 +5718,27 @@ bool + ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + { + bool remove = false; ++ ++ if (current_mode == STRUCT_REORDER_FIELDS ++ && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ && remove_dead_field_stmt (gimple_assign_lhs (stmt))) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\n rewriting statement (remove): \n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ } ++ /* Replace the dead field in stmt by creating a dummy ssa. */ ++ tree dummy_ssa = make_ssa_name (TREE_TYPE (gimple_assign_lhs (stmt))); ++ gimple_assign_set_lhs (stmt, dummy_ssa); ++ update_stmt (stmt); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "To: \n"); ++ print_gimple_stmt (dump_file, stmt, 0); ++ } ++ } ++ + if (gimple_clobber_p (stmt)) + { + tree lhs = gimple_assign_lhs (stmt); +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +index 6f85adeb4..719f7b308 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h +@@ -143,6 +143,7 @@ public: + + bool create_new_type (void); + void analyze (void); ++ bool has_dead_field (void); + void mark_escape (escape_type, gimple *stmt); + bool has_escaped (void) + { +@@ -164,6 +165,12 @@ public: + } + }; + ++/* Bitflags used for determining if a field ++ is never accessed, read or written. */ ++const unsigned EMPTY_FIELD = 0x0u; ++const unsigned READ_FIELD = 0x01u; ++const unsigned WRITE_FIELD = 0x02u; ++ + struct srfield + { + unsigned HOST_WIDE_INT offset; +@@ -175,6 +182,7 @@ struct srfield + unsigned clusternum; + + tree newfield[max_split]; ++ unsigned field_access; /* FIELD_DECL -> bitflag (use for dfe). */ + + // Constructors + srfield (tree field, srtype *base); +diff --git a/gcc/opts.cc b/gcc/opts.cc +index c3cc2c169..b868d189e 100644 +--- a/gcc/opts.cc ++++ b/gcc/opts.cc +@@ -2957,6 +2957,23 @@ common_handle_option (struct gcc_options *opts, + SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value); + break; + ++ case OPT_fipa_struct_reorg_: ++ /* No break here - do -fipa-struct-reorg processing. */ ++ /* FALLTHRU. */ ++ case OPT_fipa_struct_reorg: ++ opts->x_flag_ipa_struct_reorg = value; ++ if (value && !opts->x_struct_layout_optimize_level) ++ { ++ /* Using the -fipa-struct-reorg option is equivalent to using ++ -fipa-struct-reorg=1. */ ++ opts->x_struct_layout_optimize_level = 1; ++ } ++ break; ++ ++ case OPT_fipa_reorder_fields: ++ SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_struct_reorg, value); ++ break; ++ + case OPT_fprofile_generate_: + opts->x_profile_data_prefix = xstrdup (arg); + value = true; +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c +new file mode 100644 +index 000000000..0c9e384c4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c +@@ -0,0 +1,86 @@ ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++ network_t* net_add; ++}; ++ ++ ++const int MAX = 100; ++ ++/* let it escape_array, "Type is used in an array [not handled yet]". */ ++network_t* net[2]; ++arc_p stop_arcs = NULL; ++ ++int ++main () ++{ ++ net[0] = (network_t*) calloc (1, sizeof(network_t)); ++ net[0]->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t)); ++ ++ net[0]->arcs->id = 100; ++ ++ for (unsigned i = 0; i < 3; i++) ++ { ++ net[0]->arcs->id = net[0]->arcs->id + 2; ++ stop_arcs->cost = net[0]->arcs->id / 2; ++ stop_arcs->net_add = net[0]; ++ printf("stop_arcs->cost = %ld\n", stop_arcs->cost); ++ net[0]->arcs++; ++ stop_arcs++; ++ } ++ ++ if( net[1] != 0 && stop_arcs != 0) ++ { ++ return -1; ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c +new file mode 100644 +index 000000000..717fcc386 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c +@@ -0,0 +1,60 @@ ++// verify newarc[cmp-1].flow ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++arc_p ap = NULL; ++ ++int ++main () ++{ ++ ap = (arc_p) calloc(MAX, sizeof(arc_t)); ++ printf("%d\n", ap[0].id); ++ for (int i = 1; i < MAX; i++) ++ { ++ ap[i-1].id = 500; ++ } ++ printf("%d\n", ap[0].id); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c +new file mode 100644 +index 000000000..7723c240b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c +@@ -0,0 +1,77 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++typedef struct TYPE_5__ TYPE_2__; ++typedef struct TYPE_4__ TYPE_1__; ++ ++struct TYPE_4__ ++{ ++ int Pin; ++ int Pull; ++ int Mode; ++ int Speed; ++}; ++ ++struct TYPE_5__ ++{ ++ int MEMRMP; ++}; ++typedef TYPE_1__ GPIO_InitTypeDef; ++ ++int BT_RST_PIN; ++int BT_RST_PORT; ++int CONN_POS10_PIN; ++int CONN_POS10_PORT; ++int GPIO_HIGH (int, int); ++int GPIO_MODE_INPUT; ++int GPIO_MODE_OUTPUT_PP; ++int GPIO_NOPULL; ++int GPIO_PULLUP; ++int GPIO_SPEED_FREQ_LOW; ++int HAL_GPIO_Init (int, TYPE_1__ *); ++scalar_t__ IS_GPIO_RESET (int, int); ++TYPE_2__ *SYSCFG; ++int __HAL_RCC_GPIOB_CLK_ENABLE (); ++int __HAL_RCC_GPIOC_CLK_ENABLE (); ++ ++__attribute__((used)) static void ++LBF_DFU_If_Needed (void) ++{ ++ GPIO_InitTypeDef GPIO_InitStruct; ++ __HAL_RCC_GPIOC_CLK_ENABLE (); ++ GPIO_InitStruct.Mode = GPIO_MODE_OUTPUT_PP; ++ GPIO_InitStruct.Pull = GPIO_NOPULL; ++ GPIO_InitStruct.Speed = GPIO_SPEED_FREQ_LOW; ++ GPIO_InitStruct.Pin = BT_RST_PIN; ++ HAL_GPIO_Init (BT_RST_PORT, &GPIO_InitStruct); ++ ++ GPIO_HIGH (BT_RST_PORT, BT_RST_PIN); ++ __HAL_RCC_GPIOB_CLK_ENABLE (); ++ GPIO_InitStruct.Mode = GPIO_MODE_INPUT; ++ GPIO_InitStruct.Pull = GPIO_PULLUP; ++ GPIO_InitStruct.Pin = CONN_POS10_PIN; ++ HAL_GPIO_Init (CONN_POS10_PORT, &GPIO_InitStruct); ++ ++ if (IS_GPIO_RESET (CONN_POS10_PORT, CONN_POS10_PIN)) ++ { ++ SYSCFG->MEMRMP = 0x00000001; ++ asm ( ++ "LDR R0, =0x000000\n\t" ++ "LDR SP, [R0, #0]\n\t" ++ ); ++ asm ( ++ "LDR R0, [R0, #0]\n\t" ++ "BX R0\n\t" ++ ); ++ } ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c +new file mode 100644 +index 000000000..a1feac966 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c +@@ -0,0 +1,84 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++typedef struct TYPE_2__ TYPE_1__; ++ ++struct net_device ++{ ++ struct claw_privbk* ml_priv; ++}; ++struct clawctl ++{ ++ int linkid; ++}; ++struct claw_privbk ++{ ++ int system_validate_comp; ++ TYPE_1__* p_env; ++ int ctl_bk; ++}; ++typedef int __u8; ++struct TYPE_2__ ++{ ++ scalar_t__ packing; ++ int api_type; ++}; ++ ++int CLAW_DBF_TEXT (int, int, char*); ++int CONNECTION_REQUEST; ++int HOST_APPL_NAME; ++scalar_t__ PACKING_ASK; ++scalar_t__ PACK_SEND; ++int WS_APPL_NAME_IP_NAME; ++int WS_APPL_NAME_PACKED; ++int claw_send_control (struct net_device*, int, int, int, int, int, int); ++int setup; ++ ++__attribute__((noinline)) int ++claw_send_control (struct net_device* net, int a, int b, int c, int d, int e, ++ int f) ++{ ++ return net->ml_priv->system_validate_comp + a + b + c + d + f; ++} ++ ++__attribute__((used)) static int ++claw_snd_conn_req (struct net_device *dev, __u8 link) ++{ ++ int rc; ++ struct claw_privbk *privptr = dev->ml_priv; ++ struct clawctl *p_ctl; ++ CLAW_DBF_TEXT (2, setup, "snd_conn"); ++ rc = 1; ++ p_ctl = (struct clawctl *)&privptr->ctl_bk; ++ p_ctl->linkid = link; ++ if (privptr->system_validate_comp == 0x00) ++ { ++ return rc; ++ } ++ if (privptr->p_env->packing == PACKING_ASK) ++ { ++ rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0, ++ WS_APPL_NAME_PACKED, WS_APPL_NAME_PACKED); ++ } ++ if (privptr->p_env->packing == PACK_SEND) ++ { ++ rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0, ++ WS_APPL_NAME_IP_NAME, WS_APPL_NAME_IP_NAME); ++ } ++ if (privptr->p_env->packing == 0) ++ { ++ rc = claw_send_control (dev, CONNECTION_REQUEST, 0, 0, 0, ++ HOST_APPL_NAME, privptr->p_env->api_type); ++ } ++ return rc; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c +new file mode 100644 +index 000000000..fd1e936ca +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c +@@ -0,0 +1,56 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++typedef struct TYPE_4__ TYPE_2__; ++typedef struct TYPE_3__ TYPE_1__; ++ ++typedef int uint8_t; ++typedef int uint16_t; ++ ++struct TYPE_4__ ++{ ++ size_t cpu_id; ++}; ++ ++struct TYPE_3__ ++{ ++ int cpuc_dtrace_flags; ++}; ++ ++TYPE_2__ *CPU; ++volatile int CPU_DTRACE_FAULT; ++TYPE_1__ *cpu_core; ++scalar_t__ dtrace_load8 (uintptr_t); ++ ++__attribute__((used)) static int ++dtrace_bcmp (const void *s1, const void *s2, size_t len) ++{ ++ volatile uint16_t *flags; ++ flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; ++ if (s1 == s2) ++ return (0); ++ if (s1 == NULL || s2 == NULL) ++ return (1); ++ if (s1 != s2 && len != 0) ++ { ++ const uint8_t *ps1 = s1; ++ const uint8_t *ps2 = s2; ++ do ++ { ++ if (dtrace_load8 ((uintptr_t)ps1++) != *ps2++) ++ return (1); ++ } ++ while (--len != 0 && !(*flags & CPU_DTRACE_FAULT)); ++ } ++ return (0); ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c +new file mode 100644 +index 000000000..b13d785a9 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c +@@ -0,0 +1,162 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++struct mrb_context ++{ ++ size_t stack; ++ size_t stbase; ++ size_t stend; ++ size_t eidx; ++ int *ci; ++ int *cibase; ++ int status; ++}; ++ ++struct RObject ++{ ++ int dummy; ++}; ++ ++struct RHash ++{ ++ int dummy; ++}; ++ ++struct RFiber ++{ ++ struct mrb_context *cxt; ++}; ++ ++struct RClass ++{ ++ int dummy; ++}; ++ ++struct RBasic ++{ ++ int tt; ++}; ++ ++struct RArray ++{ ++ int dummy; ++}; ++ ++typedef int mrb_state; ++typedef int mrb_gc; ++typedef int mrb_callinfo; ++size_t ARY_LEN (struct RArray *); ++size_t MRB_ENV_STACK_LEN (struct RBasic *); ++int MRB_FIBER_TERMINATED; ++ ++#define MRB_TT_ARRAY 140 ++#define MRB_TT_CLASS 139 ++#define MRB_TT_DATA 138 ++#define MRB_TT_ENV 137 ++#define MRB_TT_EXCEPTION 136 ++#define MRB_TT_FIBER 135 ++#define MRB_TT_HASH 134 ++#define MRB_TT_ICLASS 133 ++#define MRB_TT_MODULE 132 ++#define MRB_TT_OBJECT 131 ++#define MRB_TT_PROC 130 ++#define MRB_TT_RANGE 129 ++#define MRB_TT_SCLASS 128 ++ ++size_t ci_nregs (int *); ++int gc_mark_children (int *, int *, struct RBasic *); ++size_t mrb_gc_mark_hash_size (int *, struct RHash *); ++size_t mrb_gc_mark_iv_size (int *, struct RObject *); ++size_t mrb_gc_mark_mt_size (int *, struct RClass *); ++ ++__attribute__((used)) static size_t ++gc_gray_mark (mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) ++{ ++ size_t children = 0; ++ gc_mark_children (mrb, gc, obj); ++ switch (obj->tt) ++ { ++ case MRB_TT_ICLASS: ++ children++; ++ break; ++ ++ case MRB_TT_CLASS: ++ case MRB_TT_SCLASS: ++ case MRB_TT_MODULE: ++ { ++ struct RClass *c = (struct RClass *)obj; ++ children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj); ++ children += mrb_gc_mark_mt_size (mrb, c); ++ children ++; ++ } ++ break; ++ ++ case MRB_TT_OBJECT: ++ case MRB_TT_DATA: ++ case MRB_TT_EXCEPTION: ++ children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj); ++ break; ++ ++ case MRB_TT_ENV: ++ children += MRB_ENV_STACK_LEN (obj); ++ break; ++ ++ case MRB_TT_FIBER: ++ { ++ struct mrb_context *c = ((struct RFiber *)obj)->cxt; ++ size_t i; ++ mrb_callinfo *ci; ++ if (!c || c->status == MRB_FIBER_TERMINATED) ++ break; ++ ++ i = c->stack - c->stbase; ++ if (c->ci) ++ { ++ i += ci_nregs (c->ci); ++ } ++ if (c->stbase + i > c->stend) ++ i = c->stend - c->stbase; ++ ++ children += i; ++ children += c->eidx; ++ if (c->cibase) ++ { ++ for (i = 0, ci = c->cibase; ci <= c->ci; i++, ci++) ++ ; ++ } ++ children += i; ++ } ++ break; ++ ++ case MRB_TT_ARRAY: ++ { ++ struct RArray *a = (struct RArray *)obj; ++ children += ARY_LEN (a); ++ } ++ break; ++ ++ case MRB_TT_HASH: ++ children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj); ++ children += mrb_gc_mark_hash_size (mrb, (struct RHash *)obj); ++ break; ++ ++ case MRB_TT_PROC: ++ case MRB_TT_RANGE: ++ children += 2; ++ break; ++ default: ++ break; ++ } ++ ++ return children; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c +new file mode 100644 +index 000000000..bc28a658a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c +@@ -0,0 +1,126 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++typedef struct TYPE_6__ TYPE_3__; ++typedef struct TYPE_5__ TYPE_2__; ++typedef struct TYPE_4__ TYPE_1__; ++ ++struct io_accel2_cmd ++{ ++ int dummy; ++}; ++ ++struct hpsa_tmf_struct ++{ ++ int it_nexus; ++}; ++ ++struct hpsa_scsi_dev_t ++{ ++ int nphysical_disks; ++ int ioaccel_handle; ++ struct hpsa_scsi_dev_t **phys_disk; ++}; ++ ++struct ctlr_info ++{ ++ TYPE_3__ *pdev; ++ struct io_accel2_cmd *ioaccel2_cmd_pool; ++}; ++struct TYPE_4__ ++{ ++ int LunAddrBytes; ++}; ++ ++struct TYPE_5__ ++{ ++ TYPE_1__ LUN; ++}; ++ ++struct CommandList ++{ ++ size_t cmdindex; ++ int cmd_type; ++ struct hpsa_scsi_dev_t *phys_disk; ++ TYPE_2__ Header; ++}; ++ ++struct TYPE_6__ ++{ ++ int dev; ++}; ++ ++int BUG (); ++#define CMD_IOACCEL1 132 ++#define CMD_IOACCEL2 131 ++#define CMD_IOCTL_PEND 130 ++#define CMD_SCSI 129 ++#define IOACCEL2_TMF 128 ++int dev_err (int *, char *, int); ++scalar_t__ hpsa_is_cmd_idle (struct CommandList *); ++int le32_to_cpu (int); ++int test_memcmp (unsigned char *, int *, int); ++ ++__attribute__((used)) static bool ++hpsa_cmd_dev_match (struct ctlr_info *h, struct CommandList *c, ++ struct hpsa_scsi_dev_t *dev, unsigned char *scsi3addr) ++{ ++ int i; ++ bool match = false; ++ struct io_accel2_cmd * c2 = &h->ioaccel2_cmd_pool[c->cmdindex]; ++ struct hpsa_tmf_struct *ac = (struct hpsa_tmf_struct *)c2; ++ ++ if (hpsa_is_cmd_idle (c)) ++ return false; ++ ++ switch (c->cmd_type) ++ { ++ case CMD_SCSI: ++ case CMD_IOCTL_PEND: ++ match = !test_memcmp (scsi3addr, &c->Header.LUN.LunAddrBytes, ++ sizeof (c->Header.LUN.LunAddrBytes)); ++ break; ++ ++ case CMD_IOACCEL1: ++ case CMD_IOACCEL2: ++ if (c->phys_disk == dev) ++ { ++ match = true; ++ } ++ else ++ { ++ for (i = 0; i < dev->nphysical_disks && !match; i++) ++ { ++ match = dev->phys_disk[i] == c->phys_disk; ++ } ++ } ++ break; ++ ++ case IOACCEL2_TMF: ++ for (i = 0; i < dev->nphysical_disks && !match; i++) ++ { ++ match = dev->phys_disk[i]->ioaccel_handle == ++ le32_to_cpu (ac->it_nexus); ++ } ++ break; ++ ++ case 0: ++ match = false; ++ break; ++ default: ++ dev_err (&h->pdev->dev, "unexpected cmd_type: %d\n", c->cmd_type); ++ BUG (); ++ } ++ ++ return match; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c +new file mode 100644 +index 000000000..0a585ac3d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c +@@ -0,0 +1,82 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++typedef struct TYPE_4__ TYPE_2__; ++typedef struct TYPE_3__ TYPE_1__; ++typedef int u32; ++ ++struct mv_udc ++{ ++ TYPE_2__ *op_regs; ++ TYPE_1__ *ep_dqh; ++ struct mv_ep *eps; ++}; ++ ++struct mv_ep ++{ ++ TYPE_1__ *dqh; ++ struct mv_udc *udc; ++}; ++ ++struct TYPE_4__ ++{ ++ int *epctrlx; ++}; ++ ++struct TYPE_3__ ++{ ++ int max_packet_length; ++ int next_dtd_ptr; ++}; ++ ++int EP0_MAX_PKT_SIZE; ++int EPCTRL_RX_ENABLE; ++int EPCTRL_RX_EP_TYPE_SHIFT; ++int EPCTRL_TX_ENABLE; ++int EPCTRL_TX_EP_TYPE_SHIFT; ++int EP_QUEUE_HEAD_IOS; ++int EP_QUEUE_HEAD_MAX_PKT_LEN_POS; ++int EP_QUEUE_HEAD_NEXT_TERMINATE; ++int USB_ENDPOINT_XFER_CONTROL; ++int readl (int *); ++int writel (int, int *); ++ ++__attribute__((used)) static void ++ep0_reset (struct mv_udc *udc) ++{ ++ struct mv_ep *ep; ++ u32 epctrlx; ++ int i = 0; ++ for (i = 0; i < 2; i++) ++ { ++ ep = &udc->eps[i]; ++ ep->udc = udc; ++ ep->dqh = &udc->ep_dqh[i]; ++ ep->dqh->max_packet_length = ++ (EP0_MAX_PKT_SIZE << EP_QUEUE_HEAD_MAX_PKT_LEN_POS) ++ | EP_QUEUE_HEAD_IOS; ++ ep->dqh->next_dtd_ptr = EP_QUEUE_HEAD_NEXT_TERMINATE; ++ epctrlx = readl (&udc->op_regs->epctrlx[0]); ++ if (i) ++ { ++ epctrlx |= EPCTRL_TX_ENABLE ++ | (USB_ENDPOINT_XFER_CONTROL << EPCTRL_TX_EP_TYPE_SHIFT); ++ } ++ else ++ { ++ epctrlx |= EPCTRL_RX_ENABLE ++ | (USB_ENDPOINT_XFER_CONTROL << EPCTRL_RX_EP_TYPE_SHIFT); ++ } ++ writel (epctrlx, &udc->op_regs->epctrlx[0]); ++ } ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c +new file mode 100644 +index 000000000..bddd862fe +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c +@@ -0,0 +1,58 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++struct tcpcb ++{ ++ int t_state; ++}; ++ ++struct socket ++{ ++ int dummy; ++}; ++ ++struct proc ++{ ++ int dummy; ++}; ++ ++struct inpcb ++{ ++ scalar_t__ inp_lport; ++}; ++ ++int COMMON_END (int); ++int COMMON_START (); ++int PRU_LISTEN; ++int TCPS_LISTEN; ++int in_pcbbind (struct inpcb *, int *, struct proc *); ++struct inpcb* sotoinpcb (struct socket *); ++ ++__attribute__((used)) static void ++tcp_usr_listen (struct socket *so, struct proc *p) ++{ ++ int error = 0; ++ struct inpcb *inp = sotoinpcb (so); ++ struct tcpcb *tp; ++ ++ COMMON_START (); ++ if (inp->inp_lport == 0) ++ { ++ error = in_pcbbind (inp, NULL, p); ++ } ++ if (error == 0) ++ { ++ tp->t_state = TCPS_LISTEN; ++ } ++ COMMON_END (PRU_LISTEN); ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c +new file mode 100644 +index 000000000..1a06f5eec +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c +@@ -0,0 +1,61 @@ ++/* { dg-do compile} */ ++ ++#define NULL ((void*)0) ++typedef unsigned long size_t; ++typedef long intptr_t; ++typedef unsigned long uintptr_t; ++typedef long scalar_t__; ++typedef int bool; ++#define false 0 ++#define true 1 ++ ++typedef struct TYPE_4__ TYPE_2__; ++typedef struct TYPE_3__ TYPE_1__; ++ ++struct TYPE_4__ ++{ ++ size_t modCount; ++ TYPE_1__ *modList; ++}; ++ ++struct TYPE_3__ ++{ ++ void *modDescr; ++ void *modName; ++}; ++ ++size_t MAX_MODS; ++void *String_Alloc (char *); ++int test_strlen (char *); ++int trap_FD_GetFileList (char *, char *, char *, int); ++TYPE_2__ uiInfo; ++ ++__attribute__((used)) static void ++UI_LoadMods () ++{ ++ int numdirs; ++ char dirlist[2048]; ++ char *dirptr; ++ char *descptr; ++ int i; ++ int dirlen; ++ ++ uiInfo.modCount = 0; ++ numdirs = trap_FD_GetFileList ("$modelist", "", dirlist, sizeof (dirlist)); ++ dirptr = dirlist; ++ for (i = 0; i < numdirs; i++) ++ { ++ dirlen = test_strlen (dirptr) + 1; ++ descptr = dirptr + dirlen; ++ uiInfo.modList[uiInfo.modCount].modName = String_Alloc (dirptr); ++ uiInfo.modList[uiInfo.modCount].modDescr = String_Alloc (descptr); ++ dirptr += dirlen + test_strlen (descptr) + 1; ++ uiInfo.modCount++; ++ if (uiInfo.modCount >= MAX_MODS) ++ { ++ break; ++ } ++ } ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c +new file mode 100644 +index 000000000..94eb88d5c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c +@@ -0,0 +1,58 @@ ++/* Supports the MEM_REF offset. ++ _1 = MEM[(struct arc *)ap_4 + 72B].flow; ++ Old rewrite:_1 = ap.reorder.0_8->flow; ++ New rewrite:_1 = MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow. */ ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main () ++{ ++ const int MAX = 100; ++ /* A similar scenario can be reproduced only by using local variables. */ ++ arc_p ap = NULL; ++ ap = (arc_p) calloc(MAX, sizeof(arc_t)); ++ printf("%d\n", ap[1].flow); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c +new file mode 100644 +index 000000000..bbf9420d0 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c +@@ -0,0 +1,30 @@ ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct T_HASH_ENTRY ++{ ++ unsigned int hash; ++ unsigned int klen; ++ char *key; ++} iHashEntry; ++ ++typedef struct T_HASH ++{ ++ unsigned int size; ++ unsigned int fill; ++ unsigned int keys; ++ ++ iHashEntry **array; ++} uHash; ++ ++uHash *retval; ++ ++int ++main() { ++ retval->array = (iHashEntry **)calloc(sizeof(iHashEntry *), retval->size); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c +new file mode 100644 +index 000000000..f706db968 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c +@@ -0,0 +1,71 @@ ++// support POINTER_DIFF_EXPR & NOP_EXPR to avoid ++// escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt" ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++typedef struct network ++{ ++ arc_p arcs; ++ arc_p sorted_arcs; ++ int x; ++ node_p nodes; ++ node_p stop_nodes; ++} network_t; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main () ++{ ++ arc_t *old_arcs; ++ node_t *node; ++ node_t *stop; ++ size_t off; ++ network_t* net; ++ ++ for( ; node->number < stop->number; node++ ) ++ { ++ off = node->basic_arc - old_arcs; ++ node->basic_arc = (arc_t *)(net->arcs + off); ++ } ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 3 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c +new file mode 100644 +index 000000000..963295cb4 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c +@@ -0,0 +1,55 @@ ++// support NEGATE_EXPR rewriting ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++int ++main () ++{ ++ int64_t susp = 0; ++ const int MAX = 100; ++ arc_p ap = (arc_p) calloc(MAX, sizeof(arc_t)); ++ ap -= susp; ++ printf("%d\n", ap[1].flow); ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c +new file mode 100644 +index 000000000..aa10506a1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c +@@ -0,0 +1,55 @@ ++// release escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]"; ++/* { dg-do compile } */ ++ ++#include ++#include ++ ++typedef struct node node_t; ++typedef struct node *node_p; ++ ++typedef struct arc arc_t; ++typedef struct arc *arc_p; ++ ++struct node ++{ ++ int64_t potential; ++ int orientation; ++ node_p child; ++ node_p pred; ++ node_p sibling; ++ node_p sibling_prev; ++ arc_p basic_arc; ++ arc_p firstout; ++ arc_p firstin; ++ arc_p arc_tmp; ++ int64_t flow; ++ int64_t depth; ++ int number; ++ int time; ++}; ++ ++struct arc ++{ ++ int id; ++ int64_t cost; ++ node_p tail; ++ node_p head; ++ short ident; ++ arc_p nextout; ++ arc_p nextin; ++ int64_t flow; ++ int64_t org_cost; ++}; ++ ++const int MAX = 100; ++arc_t **ap = NULL; ++ ++int ++main () ++{ ++ ap = (arc_t**) malloc(MAX * sizeof(arc_t*)); ++ (*ap)[0].id = 300; ++ return 0; ++} ++ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +index 5a476e8f9..6ccb753b5 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp ++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +@@ -43,6 +43,10 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/csr_*.c]] \ + gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf_*.c]] \ + "" "-fipa-reorder-fields -fdump-ipa-all -flto-partition=one -fwhole-program" + ++# -fipa-struct-reorg=3 ++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dfe*.c]] \ ++ "" "-fipa-reorder-fields -fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program" ++ + # All done. + torture-finish + dg-finish +diff --git a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_replace_type.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_replace_type.c +new file mode 100644 +index 000000000..fa8c66b9e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_replace_type.c +@@ -0,0 +1,49 @@ ++/* { dg-do compile } */ ++ ++#include ++ ++struct AngleDef ++{ ++ double K; ++ double th0; ++}; ++typedef struct AngleDef angldef; ++ ++struct bndangdihe ++{ ++ int nbond; ++ int nangl; ++ int ndihe; ++}; ++typedef struct bndangdihe bah; ++ ++struct ambprmtop ++{ ++ double *AnglK; ++ double *AnglEq; ++ bah nBAH; ++ angldef *AParam; ++ char source[512]; ++ char eprulesource[512]; ++}; ++typedef struct ambprmtop prmtop; ++ ++static void OrderBondParameters (prmtop *tp) ++{ ++ int i; ++ tp->AParam = (angldef *)malloc (tp->nBAH.nangl * sizeof (angldef)); ++ for (i = 0; i < tp->nBAH.nangl; i++) ++ { ++ tp->AParam[i].K = tp->AnglK[i]; ++ tp->AParam[i].th0 = tp->AnglEq[i]; ++ } ++} ++ ++void main () ++{ ++ prmtop *tp = (prmtop *)malloc (100 * sizeof (prmtop)); ++ OrderBondParameters (tp); ++} ++ ++/*---------------------------------------------------------------------------------------------*/ ++/* { dg-final { scan-ipa-dump "No structures to transform" "struct_reorg" } } */ +-- +2.33.0 + diff --git a/gcc.spec b/gcc.spec index aae0ee3..6589a9c 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 7 +%global gcc_release 8 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -136,12 +136,27 @@ Provides: bundled(libbacktrace) Provides: bundled(libffi) Provides: gcc(major) = %{gcc_major} -Patch0: 0000-Version-Set-version-to-12.3.1.patch -Patch1: 0001-CONFIG-Regenerate-configure-file.patch -Patch2: 0002-libquadmath-Enable-libquadmath-on-kunpeng.patch -Patch3: 0003-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch -Patch4: 0004-Enable-small-loop-unrolling-for-O2.patch -Patch5: 0005-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch +Patch1: 0001-Version-Set-version-to-12.3.1.patch +Patch2: 0002-RISCV-Backport-inline-subword-atomic-patches.patch +Patch3: 0003-CONFIG-Regenerate-configure-file.patch +Patch4: 0004-libquadmath-Enable-libquadmath-on-kunpeng.patch +Patch6: 0006-MULL64-1-3-Add-A-B-op-CST-B-match-and-simplify-optim.patch +Patch7: 0007-MULL64-2-3-Fold-series-of-instructions-into-mul.patch +Patch8: 0008-MULL64-3-3-Fold-series-of-instructions-into-umulh.patch +Patch9: 0009-MULL64-Disable-mull64-transformation-by-default.patch +Patch10: 0010-Version-Clear-DATESTAMP_s.patch +Patch11: 0011-Add-attribute-hot-judgement-for-INLINE_HINT_known_ho.patch +Patch12: 0012-Enable-small-loop-unrolling-for-O2.patch +Patch13: 0013-i386-Only-enable-small-loop-unrolling-in-backend-PR-.patch +Patch14: 0014-Array-widen-compare-Add-a-new-optimization-for-array.patch +Patch15: 0015-Backport-Structure-reorganization-optimization.patch +Patch16: 0016-CompleteStructRelayout-Complete-Structure-Relayout.patch +Patch17: 0017-StructReorg-Some-bugfix-for-structure-reorganization.patch +Patch18: 0018-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch +Patch19: 0019-fp-model-Enable-fp-model-on-kunpeng.patch +Patch20: 0020-simdmath-Enable-simdmath-on-kunpeng.patch +Patch21: 0021-StructReorderFields-Structure-reorder-fields.patch +Patch22: 0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. @@ -609,12 +624,27 @@ not stable, so plugins must be rebuilt any time GCC is updated. %prep %setup -q -n gcc-12.3.0 -%patch0 -p1 %patch1 -p1 %patch2 -p1 %patch3 -p1 %patch4 -p1 -%patch5 -p1 +%patch6 -p1 +%patch7 -p1 +%patch8 -p1 +%patch9 -p1 +%patch10 -p1 +%patch11 -p1 +%patch12 -p1 +%patch13 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 +%patch20 -p1 +%patch21 -p1 +%patch22 -p1 echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE @@ -2718,6 +2748,10 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Tue Aug 29 2023 huangxiaoquan 12.3.1-8 +- Type: Sync +- DESC: Sync patch from openeuler/gcc + * Fri Aug 11 2023 Hongyu Wang 12.3.1-7 - Type:Sync - i386: Only enable small loop unrolling in backend [PR 107692]. -- Gitee From 609cef8ac6ca4de64bed245700975082853ad49a Mon Sep 17 00:00:00 2001 From: huang-xiaoquan Date: Tue, 29 Aug 2023 22:08:04 +0800 Subject: [PATCH 5/8] [Sync] Sync patch from openeuler/gcc Sync patch from openeuler/gcc - 20230829 part 2 --- ...kernel-pgo-option-to-support-PGO-ker.patch | 44 + ...actoring-and-merge-reorder-fields-in.patch | 1470 +++++++++++++++++ gcc.spec | 10 +- 3 files changed, 1523 insertions(+), 1 deletion(-) create mode 100644 0023-PGO-kernel-Add-fkernel-pgo-option-to-support-PGO-ker.patch create mode 100644 0024-Struct-Reorg-Refactoring-and-merge-reorder-fields-in.patch diff --git a/0023-PGO-kernel-Add-fkernel-pgo-option-to-support-PGO-ker.patch b/0023-PGO-kernel-Add-fkernel-pgo-option-to-support-PGO-ker.patch new file mode 100644 index 0000000..7577a01 --- /dev/null +++ b/0023-PGO-kernel-Add-fkernel-pgo-option-to-support-PGO-ker.patch @@ -0,0 +1,44 @@ +From 9dc6d315ba350c9113f486ec897217a82838fb73 Mon Sep 17 00:00:00 2001 +From: Xiong Zhou +Date: Mon, 7 Aug 2023 14:44:56 +0800 +Subject: [PATCH 1/2] [PGO kernel] Add fkernel-pgo option to support PGO kernel + compilation. + +--- + gcc/common.opt | 4 ++++ + gcc/tree-profile.cc | 4 +++- + 2 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/gcc/common.opt b/gcc/common.opt +index e365a48bc..bd3b7dcb1 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2363,6 +2363,10 @@ fprofile-generate= + Common Joined RejectNegative + Enable common options for generating profile info for profile feedback directed optimizations, and set -fprofile-dir=. + ++fkernel-pgo ++Common Var(flag_kernel_pgo) Optimization Init(0) ++Disable TLS setting of instrumentation variables to support PGO kernel compilation in -fprofile-generate, as kernel does not support TLS. ++ + fprofile-info-section + Common RejectNegative + Register the profile information in the .gcov_info section instead of using a constructor/destructor. +diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc +index 6d40401f8..e7646f1a1 100644 +--- a/gcc/tree-profile.cc ++++ b/gcc/tree-profile.cc +@@ -108,7 +108,9 @@ init_ic_make_global_vars (void) + DECL_ARTIFICIAL (ic_tuple_var) = 1; + DECL_INITIAL (ic_tuple_var) = NULL; + DECL_EXTERNAL (ic_tuple_var) = 1; +- if (targetm.have_tls) ++ /* Disable TLS setting when compiling kernel in -fprofile-generate, ++ as kernel does not support TLS. */ ++ if (targetm.have_tls && !flag_kernel_pgo) + set_decl_tls_model (ic_tuple_var, decl_default_tls_model (ic_tuple_var)); + } + +-- +2.33.0 + diff --git a/0024-Struct-Reorg-Refactoring-and-merge-reorder-fields-in.patch b/0024-Struct-Reorg-Refactoring-and-merge-reorder-fields-in.patch new file mode 100644 index 0000000..ddea235 --- /dev/null +++ b/0024-Struct-Reorg-Refactoring-and-merge-reorder-fields-in.patch @@ -0,0 +1,1470 @@ +From 535d00d764c38e70c563ac59e702a20e3b744a95 Mon Sep 17 00:00:00 2001 +From: liyancheng <412998149@qq.com> +Date: Tue, 29 Aug 2023 19:18:21 +0800 +Subject: [PATCH] [Struct Reorg] Refactoring and merge reorder fields into + struct reorg optimization + +Merge reorder_fields pass into struct_reorg pass. Using flag -fipa-struct-reorg=[0,1,2,3] +to enable none, strcut reorg, reorder fields and dfe optimizations. +--- + gcc/gimple-ssa-warn-access.cc | 2 +- + gcc/ipa-free-lang-data.cc | 9 +- + gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 353 ++++++++---------- + gcc/passes.def | 1 - + gcc/symbol-summary.h | 4 +- + gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c | 2 +- + .../gcc.dg/struct/dfe_ele_minus_verify.c | 2 +- + .../gcc.dg/struct/dfe_extr_board_init.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c | 2 +- + .../gcc.dg/struct/dfe_extr_mv_udc_core.c | 2 +- + .../gcc.dg/struct/dfe_extr_tcp_usrreq.c | 2 +- + .../gcc.dg/struct/dfe_extr_ui_main.c | 2 +- + .../gcc.dg/struct/dfe_mem_ref_offset.c | 2 +- + .../struct/dfe_mul_layer_ptr_record_bug.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c | 2 +- + .../gcc.dg/struct/dfe_ptr_negate_expr.c | 2 +- + gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c | 2 +- + .../struct/rf_DTE_struct_instance_field.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c | 2 +- + .../gcc.dg/struct/rf_check_ptr_layers_bug.c | 2 +- + .../gcc.dg/struct/rf_create_fields_bug.c | 2 +- + .../gcc.dg/struct/rf_create_new_func_bug.c | 2 +- + .../gcc.dg/struct/rf_ele_minus_verify.c | 2 +- + .../gcc.dg/struct/rf_escape_by_base.c | 2 +- + .../gcc.dg/struct/rf_external_func_types.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c | 2 +- + .../gcc.dg/struct/rf_mem_ref_offset.c | 2 +- + .../struct/rf_mul_layer_ptr_record_bug.c | 2 +- + .../gcc.dg/struct/rf_pass_conflict.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c | 2 +- + .../gcc.dg/struct/rf_ptr_negate_expr.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c | 2 +- + .../gcc.dg/struct/rf_rescusive_type.c | 2 +- + .../struct/rf_rewrite_assign_more_cmp.c | 2 +- + .../gcc.dg/struct/rf_rewrite_cond_bug.c | 2 +- + .../gcc.dg/struct/rf_rewrite_cond_more_cmp.c | 2 +- + .../gcc.dg/struct/rf_rewrite_phi_bug.c | 2 +- + gcc/testsuite/gcc.dg/struct/rf_visible_func.c | 2 +- + .../gcc.dg/struct/rf_void_ptr_param_func.c | 2 +- + gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 2 +- + gcc/timevar.def | 1 - + gcc/tree-pass.h | 1 - + 48 files changed, 201 insertions(+), 252 deletions(-) + +diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc +index 7f5c92c96..a24645783 100644 +--- a/gcc/gimple-ssa-warn-access.cc ++++ b/gcc/gimple-ssa-warn-access.cc +@@ -2198,7 +2198,7 @@ pass_waccess::gate (function *) + In pass waccess, it will traverse all SSA and cause ICE + when handling these unused SSA. So temporarily disable + pass waccess when enable structure optimizations. */ +- if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) ++ if (flag_ipa_struct_reorg) + return false; + + return (warn_free_nonheap_object +diff --git a/gcc/ipa-free-lang-data.cc b/gcc/ipa-free-lang-data.cc +index a88381ddb..801e95cea 100644 +--- a/gcc/ipa-free-lang-data.cc ++++ b/gcc/ipa-free-lang-data.cc +@@ -49,6 +49,9 @@ + #include "except.h" + #include "ipa-utils.h" + ++/* Check whether in C language or LTO with only C language. */ ++extern bool lang_c_p (void); ++ + namespace { + + /* Data used when collecting DECLs and TYPEs for language data removal. */ +@@ -105,7 +108,8 @@ fld_simplified_type_name (tree type) + /* Simplify type will cause that struct A and struct A within + struct B are different type pointers, so skip it in structure + optimizations. */ +- if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) ++ if (flag_ipa_struct_reorg && lang_c_p () ++ && flag_lto_partition == LTO_PARTITION_ONE) + return TYPE_NAME (type); + + if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL) +@@ -349,7 +353,8 @@ fld_simplified_type (tree t, class free_lang_data_d *fld) + /* Simplify type will cause that struct A and struct A within + struct B are different type pointers, so skip it in structure + optimizations. */ +- if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) ++ if (flag_ipa_struct_reorg && lang_c_p () ++ && flag_lto_partition == LTO_PARTITION_ONE) + return t; + if (POINTER_TYPE_P (t)) + return fld_incomplete_type_of (t, fld); +diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +index eac5fac7e..dcc6df496 100644 +--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc ++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc +@@ -108,6 +108,37 @@ along with GCC; see the file COPYING3. If not see + #include "cfgloop.h" + #include "langhooks.h" + ++/* Check whether in C language or LTO with only C language. */ ++ ++bool ++lang_c_p (void) ++{ ++ const char *language_string = lang_hooks.name; ++ ++ if (!language_string) ++ return false; ++ ++ if (lang_GNU_C ()) ++ return true; ++ else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check ++ { ++ unsigned i = 0; ++ tree t = NULL_TREE; ++ ++ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t) ++ { ++ language_string = TRANSLATION_UNIT_LANGUAGE (t); ++ if (language_string == NULL ++ || strncmp (language_string, "GNU C", 5) ++ || (language_string[5] != '\0' ++ && !(ISDIGIT (language_string[5])))) ++ return false; ++ } ++ return true; ++ } ++ return false; ++} ++ + namespace { + + using namespace struct_reorg; +@@ -198,37 +229,6 @@ gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type, + GSI_SAME_STMT); + } + +-/* Check whether in C language or LTO with only C language. */ +- +-static bool +-lang_c_p (void) +-{ +- const char *language_string = lang_hooks.name; +- +- if (!language_string) +- return false; +- +- if (lang_GNU_C ()) +- return true; +- else if (strcmp (language_string, "GNU GIMPLE") == 0) // For LTO check +- { +- unsigned i = 0; +- tree t = NULL_TREE; +- +- FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t) +- { +- language_string = TRANSLATION_UNIT_LANGUAGE (t); +- if (language_string == NULL +- || strncmp (language_string, "GNU C", 5) +- || (language_string[5] != '\0' +- && !(ISDIGIT (language_string[5])))) +- return false; +- } +- return true; +- } +- return false; +-} +- + /* Get the number of pointer layers. */ + + int +@@ -262,29 +262,23 @@ is_from_void_ptr_parm (tree ssa_name) + && VOID_POINTER_P (TREE_TYPE (ssa_name))); + } + +-enum srmode +-{ +- NORMAL = 0, +- COMPLETE_STRUCT_RELAYOUT, +- STRUCT_REORDER_FIELDS +-}; +- + /* Enum the struct layout optimize level, + which should be the same as the option -fstruct-reorg=. */ + + enum struct_layout_opt_level + { + NONE = 0, +- STRUCT_REORG, +- STRUCT_REORDER_FIELDS_SLO, +- DEAD_FIELD_ELIMINATION ++ STRUCT_SPLIT = 1 << 0, ++ COMPLETE_STRUCT_RELAYOUT = 1 << 1, ++ STRUCT_REORDER_FIELDS = 1 << 2, ++ DEAD_FIELD_ELIMINATION = 1 << 3 + }; + + static bool is_result_of_mult (tree arg, tree *num, tree struct_size); + static bool isptrptr (tree type); + void get_base (tree &base, tree expr); + +-srmode current_mode; ++static unsigned int current_layout_opt_level; + hash_map replace_type_map; + + /* Return true if one of these types is created by struct-reorg. */ +@@ -626,7 +620,7 @@ void + srtype::simple_dump (FILE *f) + { + print_generic_expr (f, type); +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + fprintf (f, "(%d)", TYPE_UID (type)); + } + +@@ -673,7 +667,7 @@ srfield::create_new_fields (tree newtype[max_split], + tree newfields[max_split], + tree newlast[max_split]) + { +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + create_new_reorder_fields (newtype, newfields, newlast); + return; +@@ -861,7 +855,7 @@ srtype::create_new_type (void) + we are not splitting the struct into two clusters, + then just return false and don't change the type. */ + if (!createnewtype && maxclusters == 0 +- && current_mode != STRUCT_REORDER_FIELDS) ++ && current_layout_opt_level < STRUCT_REORDER_FIELDS) + { + newtype[0] = type; + return false; +@@ -889,8 +883,7 @@ srtype::create_new_type (void) + sprintf (id, "%d", i); + if (tname) + { +- name = concat (tname, current_mode == STRUCT_REORDER_FIELDS +- ? ".reorder." : ".reorg.", id, NULL); ++ name = concat (tname, ".reorg.", id, NULL); + TYPE_NAME (newtype[i]) = build_decl (UNKNOWN_LOCATION, + TYPE_DECL, + get_identifier (name), +@@ -902,8 +895,7 @@ srtype::create_new_type (void) + for (unsigned i = 0; i < fields.length (); i++) + { + srfield *f = fields[i]; +- if (current_mode == STRUCT_REORDER_FIELDS +- && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ if (current_layout_opt_level & DEAD_FIELD_ELIMINATION + && !(f->field_access & READ_FIELD)) + continue; + f->create_new_fields (newtype, newfields, newlast); +@@ -924,13 +916,12 @@ srtype::create_new_type (void) + + warn_padded = save_warn_padded; + +- if (current_mode == STRUCT_REORDER_FIELDS ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && replace_type_map.get (this->newtype[0]) == NULL) + replace_type_map.put (this->newtype[0], this->type); + if (dump_file) + { +- if (current_mode == STRUCT_REORDER_FIELDS +- && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ if (current_layout_opt_level & DEAD_FIELD_ELIMINATION + && has_dead_field ()) + fprintf (dump_file, "Dead field elimination.\n"); + } +@@ -1052,8 +1043,7 @@ srfunction::create_new_decls (void) + sprintf (id, "%d", j); + if (tname) + { +- name = concat (tname, current_mode == STRUCT_REORDER_FIELDS +- ? ".reorder." : ".reorg.", id, NULL); ++ name = concat (tname, ".reorg.", id, NULL); + new_name = get_identifier (name); + free (name); + } +@@ -1264,7 +1254,7 @@ public: + bool done_recording; + + // Methods +- unsigned execute (enum srmode mode); ++ unsigned execute (unsigned int opt); + void mark_type_as_escape (tree type, escape_type escapes, + gimple *stmt = NULL); + +@@ -2651,7 +2641,7 @@ escape_type_volatile_array_or_ptrptr (tree type) + return escape_volatile; + if (isarraytype (type)) + return escape_array; +- if (isptrptr (type) && (current_mode != STRUCT_REORDER_FIELDS)) ++ if (isptrptr (type) && (current_layout_opt_level < STRUCT_REORDER_FIELDS)) + return escape_ptr_ptr; + return does_not_escape; + } +@@ -2672,12 +2662,11 @@ ipa_struct_reorg::record_field_type (tree field, srtype *base_srtype) + field_srfield->type = field_srtype; + field_srtype->add_field_site (field_srfield); + } +- if (field_srtype == base_srtype && current_mode != COMPLETE_STRUCT_RELAYOUT +- && current_mode != STRUCT_REORDER_FIELDS) ++ if (field_srtype == base_srtype && current_layout_opt_level == STRUCT_SPLIT) + base_srtype->mark_escape (escape_rescusive_type, NULL); + /* Types of non-pointer field are difficult to track the correctness + of the rewrite when it used by the escaped type. */ +- if (current_mode == STRUCT_REORDER_FIELDS ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE (field_type) == RECORD_TYPE) + field_srtype->mark_escape (escape_instance_field, NULL); + } +@@ -2704,7 +2693,7 @@ ipa_struct_reorg::record_struct_field_types (tree base_type, + base_srtype->mark_escape (e, NULL); + /* Types of non-pointer field are difficult to track the correctness + of the rewrite when it used by the escaped type. */ +- if (current_mode == STRUCT_REORDER_FIELDS ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE (field_type) == RECORD_TYPE) + base_srtype->mark_escape (escape_instance_field, NULL); + if (handled_type (field_type)) +@@ -2895,8 +2884,7 @@ ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) + + /* Separate instance is hard to trace in complete struct + relayout optimization. */ +- if ((current_mode == COMPLETE_STRUCT_RELAYOUT +- || current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT + && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE) + e = escape_separate_instance; + +@@ -3000,7 +2988,7 @@ ipa_struct_reorg::find_vars (gimple *stmt) + /* Add a safe func mechanism. */ + bool l_find = true; + bool r_find = true; +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + l_find = !(current_function->is_safe_func + && TREE_CODE (lhs) == SSA_NAME +@@ -3046,7 +3034,7 @@ ipa_struct_reorg::find_vars (gimple *stmt) + } + } + } +- else if ((current_mode == STRUCT_REORDER_FIELDS) ++ else if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS) + && (gimple_assign_rhs_code (stmt) == LE_EXPR + || gimple_assign_rhs_code (stmt) == LT_EXPR + || gimple_assign_rhs_code (stmt) == GE_EXPR +@@ -3057,7 +3045,7 @@ ipa_struct_reorg::find_vars (gimple *stmt) + find_var (gimple_assign_rhs2 (stmt), stmt); + } + /* Find void ssa_name from stmt such as: _2 = _1 - old_arcs_1. */ +- else if ((current_mode == STRUCT_REORDER_FIELDS) ++ else if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS) + && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR + && types_compatible_p ( + TYPE_MAIN_VARIANT (TREE_TYPE (gimple_assign_rhs1 (stmt))), +@@ -3270,8 +3258,7 @@ ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt) + default: + break; + } +- if (current_mode == STRUCT_REORDER_FIELDS +- && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION) ++ if (current_layout_opt_level & DEAD_FIELD_ELIMINATION) + { + /* Look for loads and stores. */ + walk_stmt_load_store_ops (stmt, this, find_field_p_load, +@@ -3427,9 +3414,11 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + arg = gimple_assign_rhs1 (size_def_stmt); + size_def_stmt = SSA_NAME_DEF_STMT (arg); + } +- else if (rhs_code == NEGATE_EXPR && current_mode == STRUCT_REORDER_FIELDS) ++ else if (rhs_code == NEGATE_EXPR ++ && current_layout_opt_level >= STRUCT_REORDER_FIELDS) + return trace_calculate_negate (size_def_stmt, num, struct_size); +- else if (rhs_code == NOP_EXPR && current_mode == STRUCT_REORDER_FIELDS) ++ else if (rhs_code == NOP_EXPR ++ && current_layout_opt_level >= STRUCT_REORDER_FIELDS) + return trace_calculate_diff (size_def_stmt, num); + else + { +@@ -3447,15 +3436,15 @@ is_result_of_mult (tree arg, tree *num, tree struct_size) + bool + ipa_struct_reorg::handled_allocation_stmt (gimple *stmt) + { +- if ((current_mode == STRUCT_REORDER_FIELDS) ++ if ((current_layout_opt_level >= STRUCT_REORDER_FIELDS) + && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))) + return true; +- if ((current_mode == COMPLETE_STRUCT_RELAYOUT) ++ if ((current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT) + && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) + return true; +- if ((current_mode == NORMAL) ++ if ((current_layout_opt_level == STRUCT_SPLIT) + && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) +@@ -3576,7 +3565,7 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, + /* x_1 = y.x_nodes; void *x; + Directly mark the structure pointer type assigned + to the void* variable as escape. */ +- else if (current_mode == STRUCT_REORDER_FIELDS ++ else if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE (side) == SSA_NAME + && VOID_POINTER_P (TREE_TYPE (side)) + && SSA_NAME_VAR (side) +@@ -3834,7 +3823,7 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + and doesn't mark escape follow.). */ + /* _1 = MEM[(struct arc_t * *)a_1]. + then base a_1: ssa_name - pointer_type - integer_type. */ +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + bool is_int_ptr = POINTER_TYPE_P (TREE_TYPE (base)) + && (TREE_CODE (inner_type (TREE_TYPE (base))) +@@ -3896,7 +3885,7 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, + /* Escape the operation of fetching field with pointer offset such as: + *(&(t->right)) = malloc (0); -> MEM[(struct node * *)_1 + 8B] = malloc (0); + */ +- if (current_mode != NORMAL ++ if (current_layout_opt_level > STRUCT_SPLIT + && (TREE_CODE (expr) == MEM_REF) && (offset != 0)) + { + gcc_assert (can_escape); +@@ -4060,7 +4049,7 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt) + /* callee_func (_1, _2); + Check the callee func, instead of current func. */ + if (!(free_or_realloc +- || (current_mode == STRUCT_REORDER_FIELDS ++ || (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && safe_functions.contains ( + node->get_edge (stmt)->callee))) + && VOID_POINTER_P (argtypet)) +@@ -4088,12 +4077,7 @@ ipa_struct_reorg::record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt) + realpart, imagpart, address, escape_from_base)) + return; + +- if (current_mode == STRUCT_REORDER_FIELDS) +- { +- if (!opt_for_fn (current_function_decl, flag_ipa_reorder_fields)) +- type->mark_escape (escape_non_optimize, stmt); +- } +- else ++ if (current_layout_opt_level > NONE) + { + if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg)) + type->mark_escape (escape_non_optimize, stmt); +@@ -4197,7 +4181,7 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl, + void + ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) + { +- if (current_mode == COMPLETE_STRUCT_RELAYOUT ++ if (current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT + && handled_allocation_stmt (stmt)) + { + tree arg0 = gimple_call_arg (stmt, 0); +@@ -4291,7 +4275,7 @@ ipa_struct_reorg::check_definition_call (srdecl *decl, vec &worklist) + if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) + check_type_and_push (gimple_call_arg (stmt, 0), decl, worklist, stmt); + +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + if (!handled_allocation_stmt (stmt)) + type->mark_escape (escape_return, stmt); +@@ -4341,7 +4325,8 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist) + type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name)); + return; + } +- if (current_mode == STRUCT_REORDER_FIELDS && SSA_NAME_VAR (ssa_name) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS ++ && SSA_NAME_VAR (ssa_name) + && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (ssa_name)))) + type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name)); + gimple *stmt = SSA_NAME_DEF_STMT (ssa_name); +@@ -4425,7 +4410,7 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, + { + /* In Complete Struct Relayout, if lhs type is the same + as rhs type, we could return without any harm. */ +- if (current_mode == COMPLETE_STRUCT_RELAYOUT) ++ if (current_layout_opt_level == COMPLETE_STRUCT_RELAYOUT) + return; + + tree base; +@@ -4437,7 +4422,7 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, + if (!get_type_field (other, base, indirect, type1, field, + realpart, imagpart, address, escape_from_base)) + { +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + /* Release INTEGER_TYPE cast to struct pointer. */ + bool cast_from_int_ptr = current_function->is_safe_func && base +@@ -4487,7 +4472,8 @@ get_base (tree &base, tree expr) + void + ipa_struct_reorg::check_ptr_layers (tree a_expr, tree b_expr, gimple *stmt) + { +- if (current_mode != STRUCT_REORDER_FIELDS || current_function->is_safe_func ++ if (current_layout_opt_level < STRUCT_REORDER_FIELDS ++ || current_function->is_safe_func + || !(POINTER_TYPE_P (TREE_TYPE (a_expr))) + || !(POINTER_TYPE_P (TREE_TYPE (b_expr))) + || !handled_type (TREE_TYPE (a_expr)) +@@ -4554,12 +4540,9 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + tree rhs2 = gimple_cond_rhs (stmt); + tree orhs = rhs1; + enum tree_code code = gimple_cond_code (stmt); +- if ((current_mode == NORMAL && (code != EQ_EXPR && code != NE_EXPR)) +- || (current_mode == COMPLETE_STRUCT_RELAYOUT +- && (code != EQ_EXPR && code != NE_EXPR +- && code != LT_EXPR && code != LE_EXPR +- && code != GT_EXPR && code != GE_EXPR)) +- || (current_mode == STRUCT_REORDER_FIELDS ++ if ((current_layout_opt_level == STRUCT_SPLIT ++ && (code != EQ_EXPR && code != NE_EXPR)) ++ || (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT + && (code != EQ_EXPR && code != NE_EXPR + && code != LT_EXPR && code != LE_EXPR + && code != GT_EXPR && code != GE_EXPR))) +@@ -4592,15 +4575,12 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, + tree rhs2 = gimple_assign_rhs2 (stmt); + tree orhs = rhs1; + enum tree_code code = gimple_assign_rhs_code (stmt); +- if ((current_mode == NORMAL && (code != EQ_EXPR && code != NE_EXPR)) +- || (current_mode == COMPLETE_STRUCT_RELAYOUT +- && (code != EQ_EXPR && code != NE_EXPR +- && code != LT_EXPR && code != LE_EXPR +- && code != GT_EXPR && code != GE_EXPR)) +- || (current_mode == STRUCT_REORDER_FIELDS ++ if ((current_layout_opt_level == STRUCT_SPLIT ++ && (code != EQ_EXPR && code != NE_EXPR)) ++ || (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT + && (code != EQ_EXPR && code != NE_EXPR + && code != LT_EXPR && code != LE_EXPR +- && code != GT_EXPR && code != GE_EXPR))) ++ && code != GT_EXPR && code != GE_EXPR))) + { + mark_expr_escape (rhs1, escape_non_eq, stmt); + mark_expr_escape (rhs2, escape_non_eq, stmt); +@@ -4722,9 +4702,9 @@ ipa_struct_reorg::record_function (cgraph_node *node) + escapes = escape_marked_as_used; + else if (!node->local) + { +- if (current_mode != STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level < STRUCT_REORDER_FIELDS) + escapes = escape_visible_function; +- if (current_mode == STRUCT_REORDER_FIELDS && node->externally_visible) ++ else if (node->externally_visible) + escapes = escape_visible_function; + } + else if (!node->can_change_signature) +@@ -4732,12 +4712,7 @@ ipa_struct_reorg::record_function (cgraph_node *node) + else if (!tree_versionable_function_p (node->decl)) + escapes = escape_noclonable_function; + +- if (current_mode == STRUCT_REORDER_FIELDS) +- { +- if (!opt_for_fn (node->decl, flag_ipa_reorder_fields)) +- escapes = escape_non_optimize; +- } +- else if (current_mode == NORMAL || current_mode == COMPLETE_STRUCT_RELAYOUT) ++ if (current_layout_opt_level > NONE) + { + if (!opt_for_fn (node->decl, flag_ipa_struct_reorg)) + escapes = escape_non_optimize; +@@ -4747,10 +4722,10 @@ ipa_struct_reorg::record_function (cgraph_node *node) + gimple_stmt_iterator si; + + /* Add a safe func mechanism. */ +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + current_function->is_safe_func = safe_functions.contains (node); +- if (dump_file) ++ if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nfunction %s/%u: is_safe_func = %d\n", + node->name (), node->order, +@@ -4958,7 +4933,7 @@ ipa_struct_reorg::record_accesses (void) + } + + /* Add a safe func mechanism. */ +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + record_safe_func_with_void_ptr_parm (); + + FOR_EACH_FUNCTION (cnode) +@@ -5174,8 +5149,7 @@ ipa_struct_reorg::propagate_escape_via_ext_func_types (void) + void + ipa_struct_reorg::prune_escaped_types (void) + { +- if (current_mode != COMPLETE_STRUCT_RELAYOUT +- && current_mode != STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level == STRUCT_SPLIT) + { + /* Detect recusive types and mark them as escaping. */ + detect_cycles (); +@@ -5183,7 +5157,7 @@ ipa_struct_reorg::prune_escaped_types (void) + mark them as escaping. */ + propagate_escape (); + } +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + propagate_escape_via_original (); + propagate_escape_via_empty_with_no_original (); +@@ -5244,7 +5218,7 @@ ipa_struct_reorg::prune_escaped_types (void) + if (function->args.is_empty () + && function->decls.is_empty () + && function->globals.is_empty () +- && current_mode != STRUCT_REORDER_FIELDS) ++ && current_layout_opt_level < STRUCT_REORDER_FIELDS) + { + delete function; + functions.ordered_remove (i); +@@ -5272,7 +5246,7 @@ ipa_struct_reorg::prune_escaped_types (void) + /* The escape type is not deleted in STRUCT_REORDER_FIELDS, + Then the type that contains the escaped type fields + can find complete information. */ +- if (current_mode != STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level < STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < types.length ();) + { +@@ -5320,7 +5294,7 @@ ipa_struct_reorg::create_new_types (void) + for (unsigned i = 0; i < types.length (); i++) + newtypes += types[i]->create_new_type (); + +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < types.length (); i++) + { +@@ -5444,8 +5418,7 @@ ipa_struct_reorg::create_new_args (cgraph_node *new_node) + char *name = NULL; + if (tname) + { +- name = concat (tname, current_mode == STRUCT_REORDER_FIELDS +- ? ".reorder.0" : ".reorg.0", NULL); ++ name = concat (tname, ".reorg.0", NULL); + new_name = get_identifier (name); + free (name); + } +@@ -5532,9 +5505,7 @@ ipa_struct_reorg::create_new_functions (void) + } + statistics_counter_event (NULL, "Create new function", 1); + new_node = node->create_version_clone_with_body ( +- vNULL, NULL, NULL, NULL, NULL, +- current_mode == STRUCT_REORDER_FIELDS +- ? "struct_reorder" : "struct_reorg"); ++ vNULL, NULL, NULL, NULL, NULL, "struct_reorg"); + new_node->can_change_signature = node->can_change_signature; + new_node->make_local (); + f->newnode = new_node; +@@ -5661,7 +5632,7 @@ ipa_struct_reorg::rewrite_expr (tree expr, + newbase1 = build_fold_addr_expr (newbase1); + if (indirect) + { +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + /* Supports the MEM_REF offset. + _1 = MEM[(struct arc *)ap_1 + 72B].flow; +@@ -5719,8 +5690,7 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + { + bool remove = false; + +- if (current_mode == STRUCT_REORDER_FIELDS +- && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION ++ if (current_layout_opt_level & DEAD_FIELD_ELIMINATION + && remove_dead_field_stmt (gimple_assign_lhs (stmt))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) +@@ -5756,10 +5726,10 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + return remove; + } + +- if ((current_mode != STRUCT_REORDER_FIELDS ++ if ((current_layout_opt_level < STRUCT_REORDER_FIELDS + && (gimple_assign_rhs_code (stmt) == EQ_EXPR + || gimple_assign_rhs_code (stmt) == NE_EXPR)) +- || (current_mode == STRUCT_REORDER_FIELDS ++ || (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) + == tcc_comparison))) + { +@@ -5769,7 +5739,7 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + tree newrhs2[max_split]; + tree_code rhs_code = gimple_assign_rhs_code (stmt); + tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; +- if (current_mode == STRUCT_REORDER_FIELDS ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && rhs_code != EQ_EXPR && rhs_code != NE_EXPR) + code = rhs_code; + +@@ -5818,8 +5788,9 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + _6 = _4 + _5; + _5 = (long unsigned int) _3; + _3 = _1 - old_2. */ +- if (current_mode != STRUCT_REORDER_FIELDS +- || (current_mode == STRUCT_REORDER_FIELDS && (num != NULL))) ++ if (current_layout_opt_level < STRUCT_REORDER_FIELDS ++ || (current_layout_opt_level >= STRUCT_REORDER_FIELDS ++ && (num != NULL))) + num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num); + for (unsigned i = 0; i < max_split && newlhs[i]; i++) + { +@@ -5843,7 +5814,7 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) + } + + /* Support POINTER_DIFF_EXPR rewriting. */ +- if (current_mode == STRUCT_REORDER_FIELDS ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR) + { + tree rhs1 = gimple_assign_rhs1 (stmt); +@@ -6026,7 +5997,8 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) + srfunction *f = find_function (node); + + /* Add a safe func mechanism. */ +- if (current_mode == STRUCT_REORDER_FIELDS && f && f->is_safe_func) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS ++ && f && f->is_safe_func) + { + tree expr = gimple_call_arg (stmt, 0); + tree newexpr[max_split]; +@@ -6152,9 +6124,9 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) + tree_code rhs_code = gimple_cond_code (stmt); + + /* Handle only equals or not equals conditionals. */ +- if ((current_mode != STRUCT_REORDER_FIELDS ++ if ((current_layout_opt_level < STRUCT_REORDER_FIELDS + && (rhs_code != EQ_EXPR && rhs_code != NE_EXPR)) +- || (current_mode == STRUCT_REORDER_FIELDS ++ || (current_layout_opt_level >= STRUCT_REORDER_FIELDS + && TREE_CODE_CLASS (rhs_code) != tcc_comparison)) + return false; + tree lhs = gimple_cond_lhs (stmt); +@@ -6208,7 +6180,7 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) + bool + ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *) + { +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + /* Delete debug gimple now. */ + return true; + bool remove = false; +@@ -6367,7 +6339,7 @@ ipa_struct_reorg::rewrite_functions (void) + then don't rewrite any accesses. */ + if (!create_new_types ()) + { +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < functions.length (); i++) + { +@@ -6386,7 +6358,7 @@ ipa_struct_reorg::rewrite_functions (void) + return 0; + } + +- if (current_mode == STRUCT_REORDER_FIELDS && dump_file) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS && dump_file) + { + fprintf (dump_file, "=========== all created newtypes: ===========\n\n"); + dump_newtypes (dump_file); +@@ -6396,13 +6368,13 @@ ipa_struct_reorg::rewrite_functions (void) + { + retval = TODO_remove_functions; + create_new_functions (); +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + prune_escaped_types (); + } + } + +- if (current_mode == STRUCT_REORDER_FIELDS) ++ if (current_layout_opt_level >= STRUCT_REORDER_FIELDS) + { + for (unsigned i = 0; i < functions.length (); i++) + { +@@ -6559,33 +6531,33 @@ ipa_struct_reorg::execute_struct_relayout (void) + } + + unsigned int +-ipa_struct_reorg::execute (enum srmode mode) ++ipa_struct_reorg::execute (unsigned int opt) + { + unsigned int ret = 0; + + if (dump_file) + fprintf (dump_file, "\n\n====== ipa_struct_reorg level %d ======\n\n", +- mode); ++ opt); + +- if (mode == NORMAL || mode == STRUCT_REORDER_FIELDS) ++ if (opt != COMPLETE_STRUCT_RELAYOUT) + { +- current_mode = mode; ++ current_layout_opt_level = opt; + /* If there is a top-level inline-asm, + the pass immediately returns. */ + if (symtab->first_asm_symbol ()) + return 0; + record_accesses (); + prune_escaped_types (); +- if (current_mode == NORMAL) ++ if (current_layout_opt_level == STRUCT_SPLIT) + analyze_types (); + + ret = rewrite_functions (); + } +- else if (mode == COMPLETE_STRUCT_RELAYOUT) ++ else + { + if (dump_file) + fprintf (dump_file, "\n\nTry Complete Struct Relayout:\n"); +- current_mode = COMPLETE_STRUCT_RELAYOUT; ++ current_layout_opt_level = COMPLETE_STRUCT_RELAYOUT; + if (symtab->first_asm_symbol ()) + return 0; + record_accesses (); +@@ -6622,10 +6594,37 @@ public: + virtual unsigned int execute (function *) + { + unsigned int ret = 0; +- ret = ipa_struct_reorg ().execute (NORMAL); +- if (!ret) +- ret = ipa_struct_reorg ().execute (COMPLETE_STRUCT_RELAYOUT); +- return ret; ++ unsigned int ret_reorg = 0; ++ unsigned int level = 0; ++ switch (struct_layout_optimize_level) ++ { ++ case 3: level |= DEAD_FIELD_ELIMINATION; ++ // FALLTHRU ++ case 2: level |= STRUCT_REORDER_FIELDS; ++ // FALLTHRU ++ case 1: ++ level |= COMPLETE_STRUCT_RELAYOUT; ++ level |= STRUCT_SPLIT; ++ break; ++ case 0: break; ++ default: gcc_unreachable (); ++ } ++ /* Preserved for backward compatibility, reorder fields needs run before ++ struct split and complete struct relayout. */ ++ if (flag_ipa_reorder_fields && level < STRUCT_REORDER_FIELDS) ++ ret = ipa_struct_reorg ().execute (STRUCT_REORDER_FIELDS); ++ ++ if (level >= STRUCT_REORDER_FIELDS) ++ ret = ipa_struct_reorg ().execute (level); ++ ++ if (level >= COMPLETE_STRUCT_RELAYOUT) ++ { ++ /* Preserved for backward compatibility. */ ++ ret_reorg = ipa_struct_reorg ().execute (STRUCT_SPLIT); ++ if (!ret_reorg) ++ ret_reorg = ipa_struct_reorg ().execute (COMPLETE_STRUCT_RELAYOUT); ++ } ++ return ret | ret_reorg; + } + + }; // class pass_ipa_struct_reorg +@@ -6645,52 +6644,6 @@ pass_ipa_struct_reorg::gate (function *) + && (in_lto_p || flag_whole_program)); + } + +-const pass_data pass_data_ipa_reorder_fields = +-{ +- SIMPLE_IPA_PASS, // type +- "reorder_fields", // name +- OPTGROUP_NONE, // optinfo_flags +- TV_IPA_REORDER_FIELDS, // tv_id +- 0, // properties_required +- 0, // properties_provided +- 0, // properties_destroyed +- 0, // todo_flags_start +- 0, // todo_flags_finish +-}; +- +-class pass_ipa_reorder_fields : public simple_ipa_opt_pass +-{ +-public: +- pass_ipa_reorder_fields (gcc::context *ctxt) +- : simple_ipa_opt_pass (pass_data_ipa_reorder_fields, ctxt) +- {} +- +- /* opt_pass methods: */ +- virtual bool gate (function *); +- virtual unsigned int execute (function *) +- { +- unsigned int ret = 0; +- ret = ipa_struct_reorg ().execute (STRUCT_REORDER_FIELDS); +- return ret; +- } +- +-}; // class pass_ipa_reorder_fields +- +-bool +-pass_ipa_reorder_fields::gate (function *) +-{ +- return (optimize >= 3 +- && flag_ipa_reorder_fields +- /* Don't bother doing anything if the program has errors. */ +- && !seen_error () +- && flag_lto_partition == LTO_PARTITION_ONE +- /* Only enable struct optimizations in C since other +- languages' grammar forbid. */ +- && lang_c_p () +- /* Only enable struct optimizations in lto or whole_program. */ +- && (in_lto_p || flag_whole_program)); +-} +- + } // anon namespace + + +@@ -6699,9 +6652,3 @@ make_pass_ipa_struct_reorg (gcc::context *ctxt) + { + return new pass_ipa_struct_reorg (ctxt); + } +- +-simple_ipa_opt_pass * +-make_pass_ipa_reorder_fields (gcc::context *ctxt) +-{ +- return new pass_ipa_reorder_fields (ctxt); +-} +diff --git a/gcc/passes.def b/gcc/passes.def +index bdc835b87..9692066e4 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -178,7 +178,6 @@ along with GCC; see the file COPYING3. If not see + compiled unit. */ + INSERT_PASSES_AFTER (all_late_ipa_passes) + NEXT_PASS (pass_ipa_pta); +- NEXT_PASS (pass_ipa_reorder_fields); + /* FIXME: this should be a normal IP pass. */ + NEXT_PASS (pass_ipa_struct_reorg); + NEXT_PASS (pass_omp_simd_clone); +diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h +index 6fa529eee..3fe64047c 100644 +--- a/gcc/symbol-summary.h ++++ b/gcc/symbol-summary.h +@@ -105,7 +105,7 @@ protected: + { + /* In structure optimizatons, we call new to ensure that + the allocated memory is initialized to 0. */ +- if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) ++ if (flag_ipa_struct_reorg) + return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T () + : new T (); + +@@ -122,7 +122,7 @@ protected: + ggc_delete (item); + else + { +- if (flag_ipa_struct_reorg || flag_ipa_reorder_fields) ++ if (flag_ipa_struct_reorg) + delete item; + else + m_allocator.remove (item); +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c +index 0c9e384c4..afa181e07 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c +@@ -83,4 +83,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c +index 717fcc386..c87db2aba 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c +@@ -57,4 +57,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c +index 7723c240b..d217f7bd8 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_board_init.c +@@ -74,4 +74,4 @@ LBF_DFU_If_Needed (void) + } + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c +index a1feac966..f9e2cf471 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_claw.c +@@ -81,4 +81,4 @@ claw_snd_conn_req (struct net_device *dev, __u8 link) + return rc; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c +index fd1e936ca..c86c4bb3c 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c +@@ -53,4 +53,4 @@ dtrace_bcmp (const void *s1, const void *s2, size_t len) + return (0); + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c +index b13d785a9..8484d29d2 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c +@@ -159,4 +159,4 @@ gc_gray_mark (mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) + return children; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c +index bc28a658a..300b2dac4 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c +@@ -123,4 +123,4 @@ hpsa_cmd_dev_match (struct ctlr_info *h, struct CommandList *c, + return match; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c +index 0a585ac3d..9397b98ea 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_mv_udc_core.c +@@ -79,4 +79,4 @@ ep0_reset (struct mv_udc *udc) + } + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c +index bddd862fe..0ae75e13e 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c +@@ -55,4 +55,4 @@ tcp_usr_listen (struct socket *so, struct proc *p) + COMMON_END (PRU_LISTEN); + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c +index 1a06f5eec..512fb37a7 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c +@@ -58,4 +58,4 @@ UI_LoadMods () + } + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c +index 94eb88d5c..0dea5517c 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c +@@ -55,4 +55,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c +index bbf9420d0..00bd911c1 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c +@@ -27,4 +27,4 @@ main() { + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c +index f706db968..0cfa6554e 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c +@@ -68,4 +68,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 3 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 3 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c +index 963295cb4..4a7069244 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c +@@ -52,4 +52,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c +index aa10506a1..b91efe10f 100644 +--- a/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c ++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c +@@ -52,4 +52,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "reorder_fields" } } */ ++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_reorg" } } */ +diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c +index b95be2dab..1b6a462e2 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c +@@ -72,4 +72,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c +index 3d243313b..346c71264 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c +@@ -91,4 +91,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c +index faaf1e3a5..b876fef86 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c +@@ -21,4 +21,4 @@ main() + { + g(); + } +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c +index 886706ae9..7d7641f01 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c +@@ -79,4 +79,4 @@ main() + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c +index f3785f392..63fb3f828 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c +@@ -53,4 +53,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c +index 1415d759a..8c431e15f 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c +@@ -57,4 +57,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c +index 003da0b57..efc95a4cd 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c +@@ -80,4 +80,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_external_func_types.c b/gcc/testsuite/gcc.dg/struct/rf_external_func_types.c +index 84a34f241..2a9bea783 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_external_func_types.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_external_func_types.c +@@ -66,4 +66,4 @@ test () + return 0; + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c +index 10dcf098c..75fc10575 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c +@@ -69,4 +69,4 @@ main() + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c +index 8d1a9a114..9fb06877b 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c +@@ -55,4 +55,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c +index 23765fc56..e8eb0eaa0 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c +@@ -27,4 +27,4 @@ main() { + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c +index 54e737ee8..bd535afd0 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c +@@ -106,4 +106,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c +index 2ae46fb31..11393a197 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c +@@ -84,4 +84,4 @@ main () + return cnt; + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c +index 3a3c10b70..d601fae64 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c +@@ -68,4 +68,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c +index 7b7d110df..4d5f25aa1 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c +@@ -52,4 +52,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c +index 317aafa5f..b3891fde9 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c +@@ -31,4 +31,4 @@ main () + printf (" Tree.\n"); + } + +-/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c +index 01a33f669..4df79e4f0 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c +@@ -52,4 +52,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c +index a38556533..49d2106d1 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c +@@ -55,4 +55,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c +index 5c17ee528..f71c7894f 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c +@@ -54,4 +54,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c +index 710517ee9..721cee2c6 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c +@@ -62,4 +62,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c +index 6ed0a5d2d..3871d3d99 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c +@@ -69,4 +69,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c +index 5a2dd964f..5ad206433 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c +@@ -55,4 +55,4 @@ main() + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c +index faa90b42d..a002f9889 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c +@@ -78,4 +78,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c +index 8f2da99cc..f77a062bd 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c +@@ -89,4 +89,4 @@ main () + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c +index 723142c59..cba6225a5 100644 +--- a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c ++++ b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c +@@ -51,4 +51,4 @@ main() + return 0; + } + +-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */ +\ No newline at end of file ++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +index 6ccb753b5..278c4e4f5 100644 +--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp ++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +@@ -45,7 +45,7 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf_*.c]] \ + + # -fipa-struct-reorg=3 + gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dfe*.c]] \ +- "" "-fipa-reorder-fields -fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program" ++ "" "-fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program" + + # All done. + torture-finish +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 2b27c858a..98a5a490f 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -80,7 +80,6 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp") + DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") + DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") + DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") +-DEFTIMEVAR (TV_IPA_REORDER_FIELDS , "ipa struct reorder fields optimization") + DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") + DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") + DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index a9ec8ed21..56898e019 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -527,7 +527,6 @@ extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); +-extern simple_ipa_opt_pass *make_pass_ipa_reorder_fields (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); + extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); +-- +2.33.0 + diff --git a/gcc.spec b/gcc.spec index 6589a9c..fa47798 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 8 +%global gcc_release 9 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -157,6 +157,8 @@ Patch19: 0019-fp-model-Enable-fp-model-on-kunpeng.patch Patch20: 0020-simdmath-Enable-simdmath-on-kunpeng.patch Patch21: 0021-StructReorderFields-Structure-reorder-fields.patch Patch22: 0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch +Patch23: 0023-PGO-kernel-Add-fkernel-pgo-option-to-support-PGO-ker.patch +Patch24: 0024-Struct-Reorg-Refactoring-and-merge-reorder-fields-in.patch # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. @@ -645,6 +647,8 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch20 -p1 %patch21 -p1 %patch22 -p1 +%patch23 -p1 +%patch24 -p1 echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE @@ -2748,6 +2752,10 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Tue Aug 29 2023 huangxiaoquan 12.3.1-9 +- Type: Sync +- DESC: Sync patch from openeuler/gcc part 2 + * Tue Aug 29 2023 huangxiaoquan 12.3.1-8 - Type: Sync - DESC: Sync patch from openeuler/gcc -- Gitee From 95427adf7d16bbb61ae3ced9c64aacbaf53dce72 Mon Sep 17 00:00:00 2001 From: dingguangya Date: Mon, 4 Sep 2023 20:06:54 +0800 Subject: [PATCH 6/8] [Sync] Sync patch from openeuler/gcc Sync patch from openeuler/gcc --- 0025-AArch64-Rewrite-the-tsv110-option.patch | 114 ++++++++++++++++++ ...utline-atomics-improves-libgomp-perf.patch | 37 ++++++ gcc.spec | 10 +- 3 files changed, 160 insertions(+), 1 deletion(-) create mode 100644 0025-AArch64-Rewrite-the-tsv110-option.patch create mode 100644 0026-GOMP-Enabling-moutline-atomics-improves-libgomp-perf.patch diff --git a/0025-AArch64-Rewrite-the-tsv110-option.patch b/0025-AArch64-Rewrite-the-tsv110-option.patch new file mode 100644 index 0000000..d2e0dcd --- /dev/null +++ b/0025-AArch64-Rewrite-the-tsv110-option.patch @@ -0,0 +1,114 @@ +From 2f0d0b1298fb9c3266bb102796b027a5570ad833 Mon Sep 17 00:00:00 2001 +From: dingguangya +Date: Mon, 4 Sep 2023 16:27:38 +0800 +Subject: [PATCH 1/2] [AArch64] Rewrite the tsv110 option + +Reset the more appropriate options for tsv110. +--- + gcc/common/config/aarch64/aarch64-common.cc | 76 +++++++++++++++++++++ + 1 file changed, 76 insertions(+) + +diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc +index dfda5b837..85ce8133b 100644 +--- a/gcc/common/config/aarch64/aarch64-common.cc ++++ b/gcc/common/config/aarch64/aarch64-common.cc +@@ -44,6 +44,8 @@ + #undef TARGET_OPTION_INIT_STRUCT + #define TARGET_OPTION_INIT_STRUCT aarch64_option_init_struct + ++#define INVALID_IMP ((unsigned) -1) ++ + /* Set default optimization options. */ + static const struct default_options aarch_option_optimization_table[] = + { +@@ -65,6 +67,77 @@ static const struct default_options aarch_option_optimization_table[] = + { OPT_LEVELS_NONE, 0, NULL, 0 } + }; + ++/* CPU vendor id. */ ++static unsigned vendor_id = INVALID_IMP; ++ ++/* The part number of the CPU. */ ++static unsigned part_id = INVALID_IMP; ++ ++/* Return the hex integer that is after ':' for the FIELD. ++ Return -1 if there was problem parsing the integer. */ ++static unsigned ++parse_cpuinfo (char *field) ++{ ++ if (field == NULL) ++ return INVALID_IMP; ++ const char *rest = strchr (field, ':'); ++ ++ if (rest == NULL) ++ return INVALID_IMP; ++ ++ char *after; ++ unsigned fint = strtol (rest + 1, &after, 16); ++ if (after == rest + 1) ++ return INVALID_IMP; ++ return fint; ++} ++ ++/* Read CPU vendor_id and part_id. */ ++ ++static void ++read_cpuinfo () ++{ ++ FILE *fp = fopen ("/proc/cpuinfo", "r"); ++ if (fp == NULL) ++ return; ++ ++ /* Read 1024-byte data from /proc/cpuinfo. */ ++ char cpuinfo[1024]; ++ fread(cpuinfo, sizeof(char), sizeof(cpuinfo) - 1, fp); ++ ++ char *vendor = strstr(cpuinfo, "CPU implementer"); ++ vendor_id = parse_cpuinfo(vendor); ++ ++ char *part = strstr(cpuinfo, "CPU part"); ++ part_id = parse_cpuinfo(part); ++ ++ fclose(fp); ++} ++ ++/* Reset the tsv110 option. After checking the platform information, ++ this function can reset the more appropriate options. ++ TODO: Currently, this function is not applicable to the cross ++ compilation scenario. */ ++ ++static void ++reset_tsv110_option () ++{ ++ /* Read CPU Information. */ ++ if (vendor_id == INVALID_IMP) ++ read_cpuinfo (); ++ ++ if (vendor_id == 0x48 && part_id == 0xd01) ++ { ++ /* Outline-atomics is enabled by default and ++ aarch64_flag_outline_atomics defaults to 2. Therefore, the current ++ modification affects only the default scenario. When the option ++ moutline-atomics is added, the value of aarch64_flag_outline_atomics is 1, ++ that is, aarch64_flag_outline_atomics is not reset to 0. */ ++ if (aarch64_flag_outline_atomics == 2) ++ aarch64_flag_outline_atomics = 0; ++ } ++} ++ + /* Implement TARGET_HANDLE_OPTION. + This function handles the target specific options for CPU/target selection. + +@@ -83,6 +156,9 @@ aarch64_handle_option (struct gcc_options *opts, + const char *arg = decoded->arg; + int val = decoded->value; + ++ /* Reset the tsv110 options. */ ++ reset_tsv110_option (); ++ + switch (code) + { + case OPT_march_: +-- +2.33.0 + diff --git a/0026-GOMP-Enabling-moutline-atomics-improves-libgomp-perf.patch b/0026-GOMP-Enabling-moutline-atomics-improves-libgomp-perf.patch new file mode 100644 index 0000000..b57a5a6 --- /dev/null +++ b/0026-GOMP-Enabling-moutline-atomics-improves-libgomp-perf.patch @@ -0,0 +1,37 @@ +From 7efae59159577657f22511aa3b2cebe85ca60d9d Mon Sep 17 00:00:00 2001 +From: dingguangya +Date: Mon, 4 Sep 2023 16:30:58 +0800 +Subject: [PATCH 2/2] [GOMP] Enabling moutline-atomics improves libgomp + performance in multi-thread scenarios + +Libgomp is used in multi-thread scenarios, +Enabling moutline-atomics improves performance. +--- + libgomp/configure.tgt | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt +index 2cd7272fc..f924e9f98 100644 +--- a/libgomp/configure.tgt ++++ b/libgomp/configure.tgt +@@ -32,6 +32,17 @@ if test $gcc_cv_have_tls = yes ; then + esac + fi + ++# Enabling moutline-atomics improves libgomp performance in multi-thread scenarios. ++case "${target_cpu}" in ++ aarch64*) ++ case "${target}" in ++ aarch64*-*-linux*) ++ XCFLAGS="${XCFLAGS} -moutline-atomics" ++ ;; ++ esac ++ ;; ++esac ++ + tmake_file= + # Since we require POSIX threads, assume a POSIX system by default. + config_path="posix" +-- +2.33.0 + diff --git a/gcc.spec b/gcc.spec index fa47798..50445a9 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 9 +%global gcc_release 10 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -159,6 +159,8 @@ Patch21: 0021-StructReorderFields-Structure-reorder-fields.patch Patch22: 0022-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch Patch23: 0023-PGO-kernel-Add-fkernel-pgo-option-to-support-PGO-ker.patch Patch24: 0024-Struct-Reorg-Refactoring-and-merge-reorder-fields-in.patch +Patch25: 0025-AArch64-Rewrite-the-tsv110-option.patch +Patch26: 0026-GOMP-Enabling-moutline-atomics-improves-libgomp-perf.patch # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. @@ -649,6 +651,8 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch22 -p1 %patch23 -p1 %patch24 -p1 +%patch25 -p1 +%patch26 -p1 echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE @@ -2752,6 +2756,10 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Mon Sep 04 2023 dingguangya 12.3.1-10 +- Type: Sync +- DESC: Sync patch from openeuler/gcc + * Tue Aug 29 2023 huangxiaoquan 12.3.1-9 - Type: Sync - DESC: Sync patch from openeuler/gcc part 2 -- Gitee From 6aadb648e6a0b7aef781fd4ea47c92c34e75d3c1 Mon Sep 17 00:00:00 2001 From: h00564365 Date: Tue, 5 Sep 2023 10:57:37 +0800 Subject: [PATCH 7/8] [Sync] Sync patch from openeuler/gcc Sync patch from openeuler/gcc - 20230905 --- ...undant-loop-elimination-optimization.patch | 503 ++++++++++++++++++ gcc.spec | 8 +- 2 files changed, 510 insertions(+), 1 deletion(-) create mode 100644 0027-LoopElim-Redundant-loop-elimination-optimization.patch diff --git a/0027-LoopElim-Redundant-loop-elimination-optimization.patch b/0027-LoopElim-Redundant-loop-elimination-optimization.patch new file mode 100644 index 0000000..91b45cd --- /dev/null +++ b/0027-LoopElim-Redundant-loop-elimination-optimization.patch @@ -0,0 +1,503 @@ +From 14d9ee793571c6b6f16fa098cde137ebac7aa58f Mon Sep 17 00:00:00 2001 +From: eastb233 +Date: Mon, 4 Sep 2023 14:58:42 +0800 +Subject: [PATCH] [LoopElim] Redundant loop elimination optimization + +Introduce redundant loop elimination optimization controlled +by -floop-elim. And it's often used with -ffinite-loops. +--- + gcc/common.opt | 4 + + gcc/tree-ssa-phiopt.cc | 448 +++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 452 insertions(+) + +diff --git a/gcc/common.opt b/gcc/common.opt +index cae7b380f..b01df919e 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1230,6 +1230,10 @@ fcompare-elim + Common Var(flag_compare_elim_after_reload) Optimization + Perform comparison elimination after register allocation has finished. + ++floop-elim ++Common Var(flag_loop_elim) Init(0) Optimization ++Perform redundant loop elimination. ++ + fconserve-stack + Common Var(flag_conserve_stack) Optimization + Do not perform optimizations increasing noticeably stack usage. +diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc +index c56d0b9ff..cf300d141 100644 +--- a/gcc/tree-ssa-phiopt.cc ++++ b/gcc/tree-ssa-phiopt.cc +@@ -77,6 +77,7 @@ static hash_set * get_non_trapping (); + static void replace_phi_edge_with_variable (basic_block, edge, gphi *, tree); + static void hoist_adjacent_loads (basic_block, basic_block, + basic_block, basic_block); ++static bool do_phiopt_pattern (basic_block, basic_block, basic_block); + static bool gate_hoist_loads (void); + + /* This pass tries to transform conditional stores into unconditional +@@ -266,6 +267,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + hoist_adjacent_loads (bb, bb1, bb2, bb3); + continue; + } ++ else if (flag_loop_elim && do_phiopt_pattern (bb, bb1, bb2)) ++ { ++ continue; ++ } + else + continue; + +@@ -3767,6 +3772,449 @@ hoist_adjacent_loads (basic_block bb0, basic_block bb1, + } + } + ++static bool check_uses (tree, hash_set *); ++ ++/* Check SSA_NAME is used in ++ if (SSA_NAME == 0) ++ ... ++ or ++ if (SSA_NAME != 0) ++ ... ++*/ ++static bool ++check_uses_cond (const_tree ssa_name, gimple *stmt, ++ hash_set *hset ATTRIBUTE_UNUSED) ++{ ++ tree_code code = gimple_cond_code (stmt); ++ if (code != EQ_EXPR && code != NE_EXPR) ++ { ++ return false; ++ } ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ if ((lhs == ssa_name && integer_zerop (rhs)) ++ || (rhs == ssa_name && integer_zerop (lhs))) ++ { ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Check SSA_NAME is used in ++ _tmp = SSA_NAME == 0; ++ or ++ _tmp = SSA_NAME != 0; ++ or ++ _tmp = SSA_NAME | _tmp2; ++*/ ++static bool ++check_uses_assign (const_tree ssa_name, gimple *stmt, hash_set *hset) ++{ ++ tree_code code = gimple_assign_rhs_code (stmt); ++ tree lhs, rhs1, rhs2; ++ ++ switch (code) ++ { ++ case EQ_EXPR: ++ case NE_EXPR: ++ rhs1 = gimple_assign_rhs1 (stmt); ++ rhs2 = gimple_assign_rhs2 (stmt); ++ if ((rhs1 == ssa_name && integer_zerop (rhs2)) ++ || (rhs2 == ssa_name && integer_zerop (rhs1))) ++ { ++ return true; ++ } ++ break; ++ ++ case BIT_IOR_EXPR: ++ lhs = gimple_assign_lhs (stmt); ++ if (hset->contains (lhs)) ++ { ++ return false; ++ } ++ /* We should check the use of _tmp further. */ ++ return check_uses (lhs, hset); ++ ++ default: ++ break; ++ } ++ return false; ++} ++ ++/* Check SSA_NAME is used in ++ # result = PHI ++*/ ++static bool ++check_uses_phi (const_tree ssa_name, gimple *stmt, hash_set *hset) ++{ ++ for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) ++ { ++ tree arg = gimple_phi_arg_def (stmt, i); ++ if (!integer_zerop (arg) && arg != ssa_name) ++ { ++ return false; ++ } ++ } ++ ++ tree result = gimple_phi_result (stmt); ++ ++ /* It is used to avoid infinite recursion, ++ ++ if (cond) ++ goto ++ else ++ goto ++ ++ ++ # _tmp2 = PHI <0 (bb 1), _tmp3 (bb 3)> ++ {BODY} ++ if (cond) ++ goto ++ else ++ goto ++ ++ ++ # _tmp3 = PHI <0 (bb 1), _tmp2 (bb 2)> ++ {BODY} ++ if (cond) ++ goto ++ else ++ goto ++ ++ ++ ... ++ */ ++ if (hset->contains (result)) ++ { ++ return false; ++ } ++ ++ return check_uses (result, hset); ++} ++ ++/* Check the use of SSA_NAME, it should only be used in comparison ++ operation and PHI node. HSET is used to record the ssa_names ++ that have been already checked. */ ++static bool ++check_uses (tree ssa_name, hash_set *hset) ++{ ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ ++ if (TREE_CODE (ssa_name) != SSA_NAME) ++ { ++ return false; ++ } ++ ++ if (SSA_NAME_VAR (ssa_name) ++ && is_global_var (SSA_NAME_VAR (ssa_name))) ++ { ++ return false; ++ } ++ ++ hset->add (ssa_name); ++ ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name) ++ { ++ gimple *stmt = USE_STMT (use_p); ++ ++ /* Ignore debug gimple statements. */ ++ if (is_gimple_debug (stmt)) ++ { ++ continue; ++ } ++ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_COND: ++ if (!check_uses_cond (ssa_name, stmt, hset)) ++ { ++ return false; ++ } ++ break; ++ ++ case GIMPLE_ASSIGN: ++ if (!check_uses_assign (ssa_name, stmt, hset)) ++ { ++ return false; ++ } ++ break; ++ ++ case GIMPLE_PHI: ++ if (!check_uses_phi (ssa_name, stmt, hset)) ++ { ++ return false; ++ } ++ break; ++ ++ default: ++ return false; ++ } ++ } ++ return true; ++} ++ ++static bool ++check_def_gimple (gimple *def1, gimple *def2, const_tree result) ++{ ++ /* def1 and def2 should be POINTER_PLUS_EXPR. */ ++ if (!is_gimple_assign (def1) || !is_gimple_assign (def2) ++ || gimple_assign_rhs_code (def1) != POINTER_PLUS_EXPR ++ || gimple_assign_rhs_code (def2) != POINTER_PLUS_EXPR) ++ { ++ return false; ++ } ++ ++ tree rhs12 = gimple_assign_rhs2 (def1); ++ ++ tree rhs21 = gimple_assign_rhs1 (def2); ++ tree rhs22 = gimple_assign_rhs2 (def2); ++ ++ if (rhs21 != result) ++ { ++ return false; ++ } ++ ++ /* We should have a positive pointer-plus constant to ensure ++ that the pointer value is continuously increasing. */ ++ if (TREE_CODE (rhs12) != INTEGER_CST || TREE_CODE (rhs22) != INTEGER_CST ++ || compare_tree_int (rhs12, 0) <= 0 || compare_tree_int (rhs22, 0) <= 0) ++ { ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool ++check_loop_body (basic_block bb0, basic_block bb2, const_tree result) ++{ ++ gimple *g01 = first_stmt (bb0); ++ if (!g01 || !is_gimple_assign (g01) ++ || gimple_assign_rhs_code (g01) != MEM_REF ++ || TREE_OPERAND (gimple_assign_rhs1 (g01), 0) != result) ++ { ++ return false; ++ } ++ ++ gimple *g02 = g01->next; ++ /* GIMPLE_COND would be the last gimple in a basic block, ++ and have no other side effects on RESULT. */ ++ if (!g02 || gimple_code (g02) != GIMPLE_COND) ++ { ++ return false; ++ } ++ ++ if (first_stmt (bb2) != last_stmt (bb2)) ++ { ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Pattern is like ++
++   arg1 = base (rhs11) + cst (rhs12); [def1]
++   goto 
++
++   
++   arg2 = result (rhs21) + cst (rhs22); [def2]
++
++   
++   # result = PHI 
++   _v = *result;  [g01]
++   if (_v == 0)   [g02]
++     goto 
++   else
++     goto 
++
++   
++   _1 = result - base;     [g1]
++   _2 = _1 /[ex] cst;      [g2]
++   _3 = (unsigned int) _2; [g3]
++   if (_3 == 0)
++   ...
++*/
++static bool
++check_bb_order (basic_block bb0, basic_block &bb1, basic_block &bb2,
++		gphi *phi_stmt, gimple *&output)
++{
++  /* Start check from PHI node in BB0.  */
++  if (gimple_phi_num_args (phi_stmt) != 2
++      || virtual_operand_p (gimple_phi_result (phi_stmt)))
++    {
++      return false;
++    }
++
++  tree result = gimple_phi_result (phi_stmt);
++  tree arg1 = gimple_phi_arg_def (phi_stmt, 0);
++  tree arg2 = gimple_phi_arg_def (phi_stmt, 1);
++
++  if (TREE_CODE (arg1) != SSA_NAME
++      || TREE_CODE (arg2) != SSA_NAME
++      || SSA_NAME_IS_DEFAULT_DEF (arg1)
++      || SSA_NAME_IS_DEFAULT_DEF (arg2))
++    {
++      return false;
++    }
++
++  gimple *def1 = SSA_NAME_DEF_STMT (arg1);
++  gimple *def2 = SSA_NAME_DEF_STMT (arg2);
++
++  /* Swap bb1 and bb2 if pattern is like
++     if (_v != 0)
++       goto 
++     else
++       goto 
++  */
++  if (gimple_bb (def2) == bb1 && EDGE_SUCC (bb1, 0)->dest == bb0)
++    {
++      std::swap (bb1, bb2);
++    }
++
++  /* prebb[def1] --> bb0 <-- bb2[def2] */
++  if (!gimple_bb (def1)
++      || EDGE_SUCC (gimple_bb (def1), 0)->dest != bb0
++      || gimple_bb (def2) != bb2 || EDGE_SUCC (bb2, 0)->dest != bb0)
++    {
++      return false;
++    }
++
++  /* Check whether define gimple meets the pattern requirements.  */
++  if (!check_def_gimple (def1, def2, result))
++    {
++      return false;
++    }
++
++  if (!check_loop_body (bb0, bb2, result))
++    {
++      return false;
++    }
++
++  output = def1;
++  return true;
++}
++
++/* Check pattern
++   
++   _1 = result - base;     [g1]
++   _2 = _1 /[ex] cst;      [g2]
++   _3 = (unsigned int) _2; [g3]
++   if (_3 == 0)
++   ...
++*/
++static bool
++check_gimple_order (basic_block bb1, const_tree base, const_tree cst,
++		    const_tree result, gimple *&output)
++{
++  gimple *g1 = first_stmt (bb1);
++  if (!g1 || !is_gimple_assign (g1)
++      || gimple_assign_rhs_code (g1) != POINTER_DIFF_EXPR
++      || gimple_assign_rhs1 (g1) != result
++      || gimple_assign_rhs2 (g1) != base)
++    {
++      return false;
++    }
++
++  gimple *g2 = g1->next;
++  if (!g2 || !is_gimple_assign (g2)
++      || gimple_assign_rhs_code (g2) != EXACT_DIV_EXPR
++      || gimple_assign_lhs (g1) != gimple_assign_rhs1 (g2)
++      || TREE_CODE (gimple_assign_rhs2 (g2)) != INTEGER_CST)
++    {
++      return false;
++    }
++
++  /* INTEGER_CST cst in gimple def1.  */
++  HOST_WIDE_INT num1 = TREE_INT_CST_LOW (cst);
++  /* INTEGER_CST cst in gimple g2.  */
++  HOST_WIDE_INT num2 = TREE_INT_CST_LOW (gimple_assign_rhs2 (g2));
++  /* _2 must be at least a positive number.  */
++  if (num2 == 0 || num1 / num2 <= 0)
++    {
++      return false;
++    }
++
++  gimple *g3 = g2->next;
++  if (!g3 || !is_gimple_assign (g3)
++      || gimple_assign_rhs_code (g3) != NOP_EXPR
++      || gimple_assign_lhs (g2) != gimple_assign_rhs1 (g3)
++      || TREE_CODE (gimple_assign_lhs (g3)) != SSA_NAME)
++    {
++      return false;
++    }
++
++  /* _3 should only be used in comparison operation or PHI node.  */
++  hash_set *hset = new hash_set;
++  if (!check_uses (gimple_assign_lhs (g3), hset))
++    {
++      delete hset;
++      return false;
++    }
++  delete hset;
++
++  output = g3;
++  return true;
++}
++
++static bool
++do_phiopt_pattern (basic_block bb0, basic_block bb1, basic_block bb2)
++{
++  gphi_iterator gsi;
++
++  for (gsi = gsi_start_phis (bb0); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gphi *phi_stmt = gsi.phi ();
++      gimple *def1 = NULL;
++      tree base, cst, result;
++
++      if (!check_bb_order (bb0, bb1, bb2, phi_stmt, def1))
++	{
++	  continue;
++	}
++
++      base = gimple_assign_rhs1 (def1);
++      cst = gimple_assign_rhs2 (def1);
++      result = gimple_phi_result (phi_stmt);
++
++      gimple *stmt = NULL;
++      if (!check_gimple_order (bb1, base, cst, result, stmt))
++	{
++	  continue;
++	}
++
++      gcc_assert (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "PHIOPT pattern optimization (1) - Rewrite:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "to\n");
++	}
++
++      /* Rewrite statement
++	   _3 = (unsigned int) _2;
++	 to
++	   _3 = (unsigned int) 1;
++      */
++      tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
++      gimple_assign_set_rhs1 (stmt, build_int_cst (type, 1));
++      update_stmt (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++
++      return true;
++    }
++  return false;
++}
++
+ /* Determine whether we should attempt to hoist adjacent loads out of
+    diamond patterns in pass_phiopt.  Always hoist loads if
+    -fhoist-adjacent-loads is specified and the target machine has
+-- 
+2.28.0.windows.1
+
diff --git a/gcc.spec b/gcc.spec
index 50445a9..a744214 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -2,7 +2,7 @@
 %global gcc_major 12
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %%{release}, append them after %%{gcc_release} on Release: line.
-%global gcc_release 10
+%global gcc_release 11
 
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
@@ -161,6 +161,7 @@ Patch23: 0023-PGO-kernel-Add-fkernel-pgo-option-to-support-PGO-ker.patch
 Patch24: 0024-Struct-Reorg-Refactoring-and-merge-reorder-fields-in.patch
 Patch25: 0025-AArch64-Rewrite-the-tsv110-option.patch
 Patch26: 0026-GOMP-Enabling-moutline-atomics-improves-libgomp-perf.patch
+Patch27: 0027-LoopElim-Redundant-loop-elimination-optimization.patch
 
 # On ARM EABI systems, we do want -gnueabi to be part of the
 # target triple.
@@ -653,6 +654,7 @@ not stable, so plugins must be rebuilt any time GCC is updated.
 %patch24 -p1
 %patch25 -p1
 %patch26 -p1
+%patch27 -p1
 
 echo '%{_vendor} %{version}-%{release}' > gcc/DEV-PHASE
 
@@ -2756,6 +2758,10 @@ end
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Tue Sep 05 2023 huangxiaoquan  12.3.1-11
+- Type: Sync
+- DESC: Sync patch from openeuler/gcc
+
 * Mon Sep 04 2023 dingguangya  12.3.1-10
 - Type: Sync
 - DESC: Sync patch from openeuler/gcc
-- 
Gitee


From da577ebd493da6751eb3f870cafc9425c52181c9 Mon Sep 17 00:00:00 2001
From: zhenyu--zhao 
Date: Tue, 5 Sep 2023 20:40:55 +0800
Subject: [PATCH 8/8] [SPEC] Enable strip on gcc and %{_target_platform}-gcc

Enable strip on gcc and %{_target_platform}-gcc
---
 gcc.spec | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gcc.spec b/gcc.spec
index a744214..62c874f 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -2,7 +2,7 @@
 %global gcc_major 12
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %%{release}, append them after %%{gcc_release} on Release: line.
-%global gcc_release 11
+%global gcc_release 12
 
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
@@ -1663,6 +1663,9 @@ echo gcc-%{version}-%{release}.%{_arch} > $FULLPATH/rpmver
 ln -s ../../libexec/gcc/%{gcc_target_platform}/%{gcc_major}/liblto_plugin.so \
   %{buildroot}%{_libdir}/bfd-plugins/
 
+strip -s %{buildroot}%{_prefix}/bin/gcc
+strip -s %{buildroot}%{_prefix}/bin/%{_target_platform}-gcc
+
 %check
 cd obj-%{gcc_target_platform}
 
@@ -2758,6 +2761,10 @@ end
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Tue Sep 05 2023 zhaozhenyu  12.3.1-12
+- Type: SPEC
+- DESC: Enable Strip for gcc
+
 * Tue Sep 05 2023 huangxiaoquan  12.3.1-11
 - Type: Sync
 - DESC: Sync patch from openeuler/gcc
-- 
Gitee